| OLD | NEW | 
|     1 /* |     1 /* | 
|     2 ** 2006 Oct 10 |     2 ** 2006 Oct 10 | 
|     3 ** |     3 ** | 
|     4 ** The author disclaims copyright to this source code.  In place of |     4 ** The author disclaims copyright to this source code.  In place of | 
|     5 ** a legal notice, here is a blessing: |     5 ** a legal notice, here is a blessing: | 
|     6 ** |     6 ** | 
|     7 **    May you do good and not evil. |     7 **    May you do good and not evil. | 
|     8 **    May you find forgiveness for yourself and forgive others. |     8 **    May you find forgiveness for yourself and forgive others. | 
|     9 **    May you share freely, never taking more than you give. |     9 **    May you share freely, never taking more than you give. | 
|    10 ** |    10 ** | 
|    11 ****************************************************************************** |    11 ****************************************************************************** | 
|    12 ** |    12 ** | 
|    13 ** This is an SQLite module implementing full-text search. |    13 ** This is an SQLite module implementing full-text search. | 
|    14 */ |    14 */ | 
|    15  |    15  | 
|    16 /* |    16 /* | 
|    17 ** The code in this file is only compiled if: |    17 ** The code in this file is only compiled if: | 
|    18 ** |    18 ** | 
|    19 **     * The FTS3 module is being built as an extension |    19 **     * The FTS3 module is being built as an extension | 
|    20 **       (in which case SQLITE_CORE is not defined), or |    20 **       (in which case SQLITE_CORE is not defined), or | 
|    21 ** |    21 ** | 
|    22 **     * The FTS3 module is being built into the core of |    22 **     * The FTS3 module is being built into the core of | 
|    23 **       SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |    23 **       SQLite (in which case SQLITE_ENABLE_FTS3 is defined). | 
|    24 */ |    24 */ | 
|    25  |    25  | 
|    26 /* TODO(shess) Consider exporting this comment to an HTML file or the |  | 
|    27 ** wiki. |  | 
|    28 */ |  | 
|    29 /* The full-text index is stored in a series of b+tree (-like) |    26 /* The full-text index is stored in a series of b+tree (-like) | 
|    30 ** structures called segments which map terms to doclists.  The |    27 ** structures called segments which map terms to doclists.  The | 
|    31 ** structures are like b+trees in layout, but are constructed from the |    28 ** structures are like b+trees in layout, but are constructed from the | 
|    32 ** bottom up in optimal fashion and are not updatable.  Since trees |    29 ** bottom up in optimal fashion and are not updatable.  Since trees | 
|    33 ** are built from the bottom up, things will be described from the |    30 ** are built from the bottom up, things will be described from the | 
|    34 ** bottom up. |    31 ** bottom up. | 
|    35 ** |    32 ** | 
|    36 ** |    33 ** | 
|    37 **** Varints **** |    34 **** Varints **** | 
|    38 ** The basic unit of encoding is a variable-length integer called a |    35 ** The basic unit of encoding is a variable-length integer called a | 
|    39 ** varint.  We encode variable-length integers in little-endian order |    36 ** varint.  We encode variable-length integers in little-endian order | 
|    40 ** using seven bits * per byte as follows: |    37 ** using seven bits * per byte as follows: | 
|    41 ** |    38 ** | 
|    42 ** KEY: |    39 ** KEY: | 
|    43 **         A = 0xxxxxxx    7 bits of data and one flag bit |    40 **         A = 0xxxxxxx    7 bits of data and one flag bit | 
|    44 **         B = 1xxxxxxx    7 bits of data and one flag bit |    41 **         B = 1xxxxxxx    7 bits of data and one flag bit | 
|    45 ** |    42 ** | 
|    46 **  7 bits - A |    43 **  7 bits - A | 
|    47 ** 14 bits - BA |    44 ** 14 bits - BA | 
|    48 ** 21 bits - BBA |    45 ** 21 bits - BBA | 
|    49 ** and so on. |    46 ** and so on. | 
|    50 ** |    47 ** | 
|    51 ** This is identical to how sqlite encodes varints (see util.c). |    48 ** This is similar in concept to how sqlite encodes "varints" but | 
 |    49 ** the encoding is not the same.  SQLite varints are big-endian | 
 |    50 ** are are limited to 9 bytes in length whereas FTS3 varints are | 
 |    51 ** little-endian and can be up to 10 bytes in length (in theory). | 
 |    52 ** | 
 |    53 ** Example encodings: | 
 |    54 ** | 
 |    55 **     1:    0x01 | 
 |    56 **   127:    0x7f | 
 |    57 **   128:    0x81 0x00 | 
|    52 ** |    58 ** | 
|    53 ** |    59 ** | 
|    54 **** Document lists **** |    60 **** Document lists **** | 
|    55 ** A doclist (document list) holds a docid-sorted list of hits for a |    61 ** A doclist (document list) holds a docid-sorted list of hits for a | 
|    56 ** given term.  Doclists hold docids, and can optionally associate |    62 ** given term.  Doclists hold docids and associated token positions. | 
|    57 ** token positions and offsets with docids. |    63 ** A docid is the unique integer identifier for a single document. | 
 |    64 ** A position is the index of a word within the document.  The first  | 
 |    65 ** word of the document has a position of 0. | 
|    58 ** |    66 ** | 
|    59 ** A DL_POSITIONS_OFFSETS doclist is stored like this: |    67 ** FTS3 used to optionally store character offsets using a compile-time | 
 |    68 ** option.  But that functionality is no longer supported. | 
 |    69 ** | 
 |    70 ** A doclist is stored like this: | 
|    60 ** |    71 ** | 
|    61 ** array { |    72 ** array { | 
|    62 **   varint docid; |    73 **   varint docid; | 
|    63 **   array {                (position list for column 0) |    74 **   array {                (position list for column 0) | 
|    64 **     varint position;     (delta from previous position plus POS_BASE) |    75 **     varint position;     (2 more than the delta from previous position) | 
|    65 **     varint startOffset;  (delta from previous startOffset) |  | 
|    66 **     varint endOffset;    (delta from startOffset) |  | 
|    67 **   } |    76 **   } | 
|    68 **   array { |    77 **   array { | 
|    69 **     varint POS_COLUMN;   (marks start of position list for new column) |    78 **     varint POS_COLUMN;   (marks start of position list for new column) | 
|    70 **     varint column;       (index of new column) |    79 **     varint column;       (index of new column) | 
|    71 **     array { |    80 **     array { | 
|    72 **       varint position;   (delta from previous position plus POS_BASE) |    81 **       varint position;   (2 more than the delta from previous position) | 
|    73 **       varint startOffset;(delta from previous startOffset) |  | 
|    74 **       varint endOffset;  (delta from startOffset) |  | 
|    75 **     } |    82 **     } | 
|    76 **   } |    83 **   } | 
|    77 **   varint POS_END;        (marks end of positions for this document. |    84 **   varint POS_END;        (marks end of positions for this document. | 
|    78 ** } |    85 ** } | 
|    79 ** |    86 ** | 
|    80 ** Here, array { X } means zero or more occurrences of X, adjacent in |    87 ** Here, array { X } means zero or more occurrences of X, adjacent in | 
|    81 ** memory.  A "position" is an index of a token in the token stream |    88 ** memory.  A "position" is an index of a token in the token stream | 
|    82 ** generated by the tokenizer, while an "offset" is a byte offset, |    89 ** generated by the tokenizer. Note that POS_END and POS_COLUMN occur  | 
|    83 ** both based at 0.  Note that POS_END and POS_COLUMN occur in the |    90 ** in the same logical place as the position element, and act as sentinals | 
|    84 ** same logical place as the position element, and act as sentinals |    91 ** ending a position list array.  POS_END is 0.  POS_COLUMN is 1. | 
|    85 ** ending a position list array. |    92 ** The positions numbers are not stored literally but rather as two more | 
 |    93 ** than the difference from the prior position, or the just the position plus | 
 |    94 ** 2 for the first position.  Example: | 
|    86 ** |    95 ** | 
|    87 ** A DL_POSITIONS doclist omits the startOffset and endOffset |    96 **   label:       A B C D E  F  G H   I  J K | 
|    88 ** information.  A DL_DOCIDS doclist omits both the position and |    97 **   value:     123 5 9 1 1 14 35 0 234 72 0 | 
|    89 ** offset information, becoming an array of varint-encoded docids. |  | 
|    90 ** |    98 ** | 
|    91 ** On-disk data is stored as type DL_DEFAULT, so we don't serialize |    99 ** The 123 value is the first docid.  For column zero in this document | 
|    92 ** the type.  Due to how deletion is implemented in the segmentation |   100 ** there are two matches at positions 3 and 10 (5-2 and 9-2+3).  The 1 | 
|    93 ** system, on-disk doclists MUST store at least positions. |   101 ** at D signals the start of a new column; the 1 at E indicates that the | 
 |   102 ** new column is column number 1.  There are two positions at 12 and 45 | 
 |   103 ** (14-2 and 35-2+12).  The 0 at H indicate the end-of-document.  The | 
 |   104 ** 234 at I is the next docid.  It has one position 72 (72-2) and then | 
 |   105 ** terminates with the 0 at K. | 
|    94 ** |   106 ** | 
 |   107 ** A "position-list" is the list of positions for multiple columns for | 
 |   108 ** a single docid.  A "column-list" is the set of positions for a single | 
 |   109 ** column.  Hence, a position-list consists of one or more column-lists, | 
 |   110 ** a document record consists of a docid followed by a position-list and | 
 |   111 ** a doclist consists of one or more document records. | 
 |   112 ** | 
 |   113 ** A bare doclist omits the position information, becoming an  | 
 |   114 ** array of varint-encoded docids. | 
|    95 ** |   115 ** | 
|    96 **** Segment leaf nodes **** |   116 **** Segment leaf nodes **** | 
|    97 ** Segment leaf nodes store terms and doclists, ordered by term.  Leaf |   117 ** Segment leaf nodes store terms and doclists, ordered by term.  Leaf | 
|    98 ** nodes are written using LeafWriter, and read using LeafReader (to |   118 ** nodes are written using LeafWriter, and read using LeafReader (to | 
|    99 ** iterate through a single leaf node's data) and LeavesReader (to |   119 ** iterate through a single leaf node's data) and LeavesReader (to | 
|   100 ** iterate through a segment's entire leaf layer).  Leaf nodes have |   120 ** iterate through a segment's entire leaf layer).  Leaf nodes have | 
|   101 ** the format: |   121 ** the format: | 
|   102 ** |   122 ** | 
|   103 ** varint iHeight;             (height from leaf level, always 0) |   123 ** varint iHeight;             (height from leaf level, always 0) | 
|   104 ** varint nTerm;               (length of first term) |   124 ** varint nTerm;               (length of first term) | 
| (...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   272 ** into a single segment. |   292 ** into a single segment. | 
|   273 */ |   293 */ | 
|   274 #define CHROMIUM_FTS3_CHANGES 1 |   294 #define CHROMIUM_FTS3_CHANGES 1 | 
|   275  |   295  | 
|   276 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |   296 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 
|   277  |   297  | 
|   278 #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) |   298 #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) | 
|   279 # define SQLITE_CORE 1 |   299 # define SQLITE_CORE 1 | 
|   280 #endif |   300 #endif | 
|   281  |   301  | 
 |   302 #include "fts3Int.h" | 
 |   303  | 
|   282 #include <assert.h> |   304 #include <assert.h> | 
|   283 #include <stdlib.h> |   305 #include <stdlib.h> | 
 |   306 #include <stddef.h> | 
|   284 #include <stdio.h> |   307 #include <stdio.h> | 
|   285 #include <string.h> |   308 #include <string.h> | 
|   286 #include <ctype.h> |   309 #include <stdarg.h> | 
|   287  |   310  | 
|   288 #include "fts3.h" |   311 #include "fts3.h" | 
|   289 #include "fts3_expr.h" |  | 
|   290 #include "fts3_hash.h" |  | 
|   291 #include "fts3_tokenizer.h" |  | 
|   292 #ifndef SQLITE_CORE  |   312 #ifndef SQLITE_CORE  | 
|   293 # include "sqlite3ext.h" |   313 # include "sqlite3ext.h" | 
|   294   SQLITE_EXTENSION_INIT1 |   314   SQLITE_EXTENSION_INIT1 | 
|   295 #endif |   315 #endif | 
|   296  |   316  | 
|   297  |   317 /*  | 
|   298 /* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it |   318 ** Write a 64-bit variable-length integer to memory starting at p[0]. | 
|   299 ** would be nice to order the file better, perhaps something along the |   319 ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. | 
|   300 ** lines of: |   320 ** The number of bytes written is returned. | 
|   301 ** |  | 
|   302 **  - utility functions |  | 
|   303 **  - table setup functions |  | 
|   304 **  - table update functions |  | 
|   305 **  - table query functions |  | 
|   306 ** |  | 
|   307 ** Put the query functions last because they're likely to reference |  | 
|   308 ** typedefs or functions from the table update section. |  | 
|   309 */ |   321 */ | 
|   310  |   322 int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ | 
|   311 #if 0 |  | 
|   312 # define FTSTRACE(A)  printf A; fflush(stdout) |  | 
|   313 #else |  | 
|   314 # define FTSTRACE(A) |  | 
|   315 #endif |  | 
|   316  |  | 
|   317 #if 0 |  | 
|   318 /* Useful to set breakpoints. See main.c sqlite3Corrupt(). */ |  | 
|   319 static int fts3Corrupt(void){ |  | 
|   320   return SQLITE_CORRUPT; |  | 
|   321 } |  | 
|   322 # define SQLITE_CORRUPT_BKPT fts3Corrupt() |  | 
|   323 #else |  | 
|   324 # define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT |  | 
|   325 #endif |  | 
|   326  |  | 
|   327 /* It is not safe to call isspace(), tolower(), or isalnum() on |  | 
|   328 ** hi-bit-set characters.  This is the same solution used in the |  | 
|   329 ** tokenizer. |  | 
|   330 */ |  | 
|   331 /* TODO(shess) The snippet-generation code should be using the |  | 
|   332 ** tokenizer-generated tokens rather than doing its own local |  | 
|   333 ** tokenization. |  | 
|   334 */ |  | 
|   335 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ |  | 
|   336 static int safe_isspace(char c){ |  | 
|   337   return (c&0x80)==0 ? isspace(c) : 0; |  | 
|   338 } |  | 
|   339 static int safe_tolower(char c){ |  | 
|   340   return (c>='A' && c<='Z') ? (c-'A'+'a') : c; |  | 
|   341 } |  | 
|   342 static int safe_isalnum(char c){ |  | 
|   343   return (c&0x80)==0 ? isalnum(c) : 0; |  | 
|   344 } |  | 
|   345  |  | 
|   346 typedef enum DocListType { |  | 
|   347   DL_DOCIDS,              /* docids only */ |  | 
|   348   DL_POSITIONS,           /* docids + positions */ |  | 
|   349   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */ |  | 
|   350 } DocListType; |  | 
|   351  |  | 
|   352 /* |  | 
|   353 ** By default, only positions and not offsets are stored in the doclists. |  | 
|   354 ** To change this so that offsets are stored too, compile with |  | 
|   355 ** |  | 
|   356 **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS |  | 
|   357 ** |  | 
|   358 ** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted |  | 
|   359 ** into (no deletes or updates). |  | 
|   360 */ |  | 
|   361 #ifndef DL_DEFAULT |  | 
|   362 # define DL_DEFAULT DL_POSITIONS |  | 
|   363 #endif |  | 
|   364  |  | 
|   365 enum { |  | 
|   366   POS_END = 0,        /* end of this position list */ |  | 
|   367   POS_COLUMN,         /* followed by new column number */ |  | 
|   368   POS_BASE |  | 
|   369 }; |  | 
|   370  |  | 
|   371 /* MERGE_COUNT controls how often we merge segments (see comment at |  | 
|   372 ** top of file). |  | 
|   373 */ |  | 
|   374 #define MERGE_COUNT 16 |  | 
|   375  |  | 
|   376 /* utility functions */ |  | 
|   377  |  | 
|   378 /* CLEAR() and SCRAMBLE() abstract memset() on a pointer to a single |  | 
|   379 ** record to prevent errors of the form: |  | 
|   380 ** |  | 
|   381 ** my_function(SomeType *b){ |  | 
|   382 **   memset(b, '\0', sizeof(b));  // sizeof(b)!=sizeof(*b) |  | 
|   383 ** } |  | 
|   384 */ |  | 
|   385 /* TODO(shess) Obvious candidates for a header file. */ |  | 
|   386 #define CLEAR(b) memset(b, '\0', sizeof(*(b))) |  | 
|   387  |  | 
|   388 #ifndef NDEBUG |  | 
|   389 #  define SCRAMBLE(b) memset(b, 0x55, sizeof(*(b))) |  | 
|   390 #else |  | 
|   391 #  define SCRAMBLE(b) |  | 
|   392 #endif |  | 
|   393  |  | 
|   394 /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ |  | 
|   395 #define VARINT_MAX 10 |  | 
|   396  |  | 
|   397 /* Write a 64-bit variable-length integer to memory starting at p[0]. |  | 
|   398  * The length of data written will be between 1 and VARINT_MAX bytes. |  | 
|   399  * The number of bytes written is returned. */ |  | 
|   400 static int fts3PutVarint(char *p, sqlite_int64 v){ |  | 
|   401   unsigned char *q = (unsigned char *) p; |   323   unsigned char *q = (unsigned char *) p; | 
|   402   sqlite_uint64 vu = v; |   324   sqlite_uint64 vu = v; | 
|   403   do{ |   325   do{ | 
|   404     *q++ = (unsigned char) ((vu & 0x7f) | 0x80); |   326     *q++ = (unsigned char) ((vu & 0x7f) | 0x80); | 
|   405     vu >>= 7; |   327     vu >>= 7; | 
|   406   }while( vu!=0 ); |   328   }while( vu!=0 ); | 
|   407   q[-1] &= 0x7f;  /* turn off high bit in final byte */ |   329   q[-1] &= 0x7f;  /* turn off high bit in final byte */ | 
|   408   assert( q - (unsigned char *)p <= VARINT_MAX ); |   330   assert( q - (unsigned char *)p <= FTS3_VARINT_MAX ); | 
|   409   return (int) (q - (unsigned char *)p); |   331   return (int) (q - (unsigned char *)p); | 
|   410 } |   332 } | 
|   411  |   333  | 
|   412 /* Read a 64-bit variable-length integer from memory starting at p[0]. |   334 /*  | 
|   413  * Return the number of bytes read, or 0 on error. |   335 ** Read a 64-bit variable-length integer from memory starting at p[0]. | 
|   414  * The value is stored in *v. */ |   336 ** Return the number of bytes read, or 0 on error. | 
|   415 static int fts3GetVarintSafe(const char *p, sqlite_int64 *v, int max){ |   337 ** The value is stored in *v. | 
 |   338 */ | 
 |   339 int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ | 
|   416   const unsigned char *q = (const unsigned char *) p; |   340   const unsigned char *q = (const unsigned char *) p; | 
|   417   sqlite_uint64 x = 0, y = 1; |   341   sqlite_uint64 x = 0, y = 1; | 
|   418   if( max>VARINT_MAX ) max = VARINT_MAX; |   342   while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){ | 
|   419   while( max && (*q & 0x80) == 0x80 ){ |  | 
|   420     max--; |  | 
|   421     x += y * (*q++ & 0x7f); |   343     x += y * (*q++ & 0x7f); | 
|   422     y <<= 7; |   344     y <<= 7; | 
|   423   } |   345   } | 
|   424   if( !max ){ |  | 
|   425     assert( 0 ); |  | 
|   426     return 0;  /* tried to read too much; bad data */ |  | 
|   427   } |  | 
|   428   x += y * (*q++); |   346   x += y * (*q++); | 
|   429   *v = (sqlite_int64) x; |   347   *v = (sqlite_int64) x; | 
|   430   return (int) (q - (unsigned char *)p); |   348   return (int) (q - (unsigned char *)p); | 
|   431 } |   349 } | 
|   432  |   350  | 
|   433 static int fts3GetVarint(const char *p, sqlite_int64 *v){ |   351 /* | 
|   434   return fts3GetVarintSafe(p, v, VARINT_MAX); |   352 ** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a | 
|   435 } |   353 ** 32-bit integer before it is returned. | 
|   436  |   354 */ | 
|   437 static int fts3GetVarint32Safe(const char *p, int *pi, int max){ |   355 int sqlite3Fts3GetVarint32(const char *p, int *pi){ | 
|   438  sqlite_int64 i; |   356  sqlite_int64 i; | 
|   439  int ret = fts3GetVarintSafe(p, &i, max); |   357  int ret = sqlite3Fts3GetVarint(p, &i); | 
|   440  if( !ret ) return ret; |  | 
|   441  *pi = (int) i; |   358  *pi = (int) i; | 
|   442  assert( *pi==i ); |  | 
|   443  return ret; |   359  return ret; | 
|   444 } |   360 } | 
|   445  |   361  | 
|   446 static int fts3GetVarint32(const char* p, int *pi){ |   362 /* | 
|   447   return fts3GetVarint32Safe(p, pi, VARINT_MAX); |   363 ** Return the number of bytes required to encode v as a varint | 
|   448 } |   364 */ | 
|   449  |   365 int sqlite3Fts3VarintLen(sqlite3_uint64 v){ | 
|   450 /*******************************************************************/ |   366   int i = 0; | 
|   451 /* DataBuffer is used to collect data into a buffer in piecemeal |   367   do{ | 
|   452 ** fashion.  It implements the usual distinction between amount of |   368     i++; | 
|   453 ** data currently stored (nData) and buffer capacity (nCapacity). |   369     v >>= 7; | 
|   454 ** |   370   }while( v!=0 ); | 
|   455 ** dataBufferInit - create a buffer with given initial capacity. |   371   return i; | 
|   456 ** dataBufferReset - forget buffer's data, retaining capacity. |  | 
|   457 ** dataBufferDestroy - free buffer's data. |  | 
|   458 ** dataBufferSwap - swap contents of two buffers. |  | 
|   459 ** dataBufferExpand - expand capacity without adding data. |  | 
|   460 ** dataBufferAppend - append data. |  | 
|   461 ** dataBufferAppend2 - append two pieces of data at once. |  | 
|   462 ** dataBufferReplace - replace buffer's data. |  | 
|   463 */ |  | 
|   464 typedef struct DataBuffer { |  | 
|   465   char *pData;          /* Pointer to malloc'ed buffer. */ |  | 
|   466   int nCapacity;        /* Size of pData buffer. */ |  | 
|   467   int nData;            /* End of data loaded into pData. */ |  | 
|   468 } DataBuffer; |  | 
|   469  |  | 
|   470 static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){ |  | 
|   471   assert( nCapacity>=0 ); |  | 
|   472   pBuffer->nData = 0; |  | 
|   473   pBuffer->nCapacity = nCapacity; |  | 
|   474   pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity); |  | 
|   475 } |  | 
|   476 static void dataBufferReset(DataBuffer *pBuffer){ |  | 
|   477   pBuffer->nData = 0; |  | 
|   478 } |  | 
|   479 static void dataBufferDestroy(DataBuffer *pBuffer){ |  | 
|   480   if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData); |  | 
|   481   SCRAMBLE(pBuffer); |  | 
|   482 } |  | 
|   483 static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){ |  | 
|   484   DataBuffer tmp = *pBuffer1; |  | 
|   485   *pBuffer1 = *pBuffer2; |  | 
|   486   *pBuffer2 = tmp; |  | 
|   487 } |  | 
|   488 static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ |  | 
|   489   assert( nAddCapacity>0 ); |  | 
|   490   /* TODO(shess) Consider expanding more aggressively.  Note that the |  | 
|   491   ** underlying malloc implementation may take care of such things for |  | 
|   492   ** us already. |  | 
|   493   */ |  | 
|   494   if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){ |  | 
|   495     pBuffer->nCapacity = pBuffer->nData+nAddCapacity; |  | 
|   496     pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity); |  | 
|   497   } |  | 
|   498 } |  | 
|   499 static void dataBufferAppend(DataBuffer *pBuffer, |  | 
|   500                              const char *pSource, int nSource){ |  | 
|   501   assert( nSource>0 && pSource!=NULL ); |  | 
|   502   dataBufferExpand(pBuffer, nSource); |  | 
|   503   memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource); |  | 
|   504   pBuffer->nData += nSource; |  | 
|   505 } |  | 
|   506 static void dataBufferAppend2(DataBuffer *pBuffer, |  | 
|   507                               const char *pSource1, int nSource1, |  | 
|   508                               const char *pSource2, int nSource2){ |  | 
|   509   assert( nSource1>0 && pSource1!=NULL ); |  | 
|   510   assert( nSource2>0 && pSource2!=NULL ); |  | 
|   511   dataBufferExpand(pBuffer, nSource1+nSource2); |  | 
|   512   memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); |  | 
|   513   memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); |  | 
|   514   pBuffer->nData += nSource1+nSource2; |  | 
|   515 } |  | 
|   516 static void dataBufferReplace(DataBuffer *pBuffer, |  | 
|   517                               const char *pSource, int nSource){ |  | 
|   518   dataBufferReset(pBuffer); |  | 
|   519   dataBufferAppend(pBuffer, pSource, nSource); |  | 
|   520 } |  | 
|   521  |  | 
|   522 /* StringBuffer is a null-terminated version of DataBuffer. */ |  | 
|   523 typedef struct StringBuffer { |  | 
|   524   DataBuffer b;            /* Includes null terminator. */ |  | 
|   525 } StringBuffer; |  | 
|   526  |  | 
|   527 static void initStringBuffer(StringBuffer *sb){ |  | 
|   528   dataBufferInit(&sb->b, 100); |  | 
|   529   dataBufferReplace(&sb->b, "", 1); |  | 
|   530 } |  | 
|   531 static int stringBufferLength(StringBuffer *sb){ |  | 
|   532   return sb->b.nData-1; |  | 
|   533 } |  | 
|   534 static char *stringBufferData(StringBuffer *sb){ |  | 
|   535   return sb->b.pData; |  | 
|   536 } |  | 
|   537 static void stringBufferDestroy(StringBuffer *sb){ |  | 
|   538   dataBufferDestroy(&sb->b); |  | 
|   539 } |  | 
|   540  |  | 
|   541 static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ |  | 
|   542   assert( sb->b.nData>0 ); |  | 
|   543   if( nFrom>0 ){ |  | 
|   544     sb->b.nData--; |  | 
|   545     dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1); |  | 
|   546   } |  | 
|   547 } |  | 
|   548 static void append(StringBuffer *sb, const char *zFrom){ |  | 
|   549   nappend(sb, zFrom, strlen(zFrom)); |  | 
|   550 } |  | 
|   551  |  | 
|   552 /* Append a list of strings separated by commas. */ |  | 
|   553 static void appendList(StringBuffer *sb, int nString, char **azString){ |  | 
|   554   int i; |  | 
|   555   for(i=0; i<nString; ++i){ |  | 
|   556     if( i>0 ) append(sb, ", "); |  | 
|   557     append(sb, azString[i]); |  | 
|   558   } |  | 
|   559 } |  | 
|   560  |  | 
|   561 static int endsInWhiteSpace(StringBuffer *p){ |  | 
|   562   return stringBufferLength(p)>0 && |  | 
|   563     safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]); |  | 
|   564 } |  | 
|   565  |  | 
|   566 /* If the StringBuffer ends in something other than white space, add a |  | 
|   567 ** single space character to the end. |  | 
|   568 */ |  | 
|   569 static void appendWhiteSpace(StringBuffer *p){ |  | 
|   570   if( stringBufferLength(p)==0 ) return; |  | 
|   571   if( !endsInWhiteSpace(p) ) append(p, " "); |  | 
|   572 } |  | 
|   573  |  | 
|   574 /* Remove white space from the end of the StringBuffer */ |  | 
|   575 static void trimWhiteSpace(StringBuffer *p){ |  | 
|   576   while( endsInWhiteSpace(p) ){ |  | 
|   577     p->b.pData[--p->b.nData-1] = '\0'; |  | 
|   578   } |  | 
|   579 } |  | 
|   580  |  | 
|   581 /*******************************************************************/ |  | 
|   582 /* DLReader is used to read document elements from a doclist.  The |  | 
|   583 ** current docid is cached, so dlrDocid() is fast.  DLReader does not |  | 
|   584 ** own the doclist buffer. |  | 
|   585 ** |  | 
|   586 ** dlrAtEnd - true if there's no more data to read. |  | 
|   587 ** dlrDocid - docid of current document. |  | 
|   588 ** dlrDocData - doclist data for current document (including docid). |  | 
|   589 ** dlrDocDataBytes - length of same. |  | 
|   590 ** dlrAllDataBytes - length of all remaining data. |  | 
|   591 ** dlrPosData - position data for current document. |  | 
|   592 ** dlrPosDataLen - length of pos data for current document (incl POS_END). |  | 
|   593 ** dlrStep - step to current document. |  | 
|   594 ** dlrInit - initial for doclist of given type against given data. |  | 
|   595 ** dlrDestroy - clean up. |  | 
|   596 ** |  | 
|   597 ** Expected usage is something like: |  | 
|   598 ** |  | 
|   599 **   DLReader reader; |  | 
|   600 **   dlrInit(&reader, pData, nData); |  | 
|   601 **   while( !dlrAtEnd(&reader) ){ |  | 
|   602 **     // calls to dlrDocid() and kin. |  | 
|   603 **     dlrStep(&reader); |  | 
|   604 **   } |  | 
|   605 **   dlrDestroy(&reader); |  | 
|   606 */ |  | 
|   607 typedef struct DLReader { |  | 
|   608   DocListType iType; |  | 
|   609   const char *pData; |  | 
|   610   int nData; |  | 
|   611  |  | 
|   612   sqlite_int64 iDocid; |  | 
|   613   int nElement; |  | 
|   614 } DLReader; |  | 
|   615  |  | 
|   616 static int dlrAtEnd(DLReader *pReader){ |  | 
|   617   assert( pReader->nData>=0 ); |  | 
|   618   return pReader->nData<=0; |  | 
|   619 } |  | 
|   620 static sqlite_int64 dlrDocid(DLReader *pReader){ |  | 
|   621   assert( !dlrAtEnd(pReader) ); |  | 
|   622   return pReader->iDocid; |  | 
|   623 } |  | 
|   624 static const char *dlrDocData(DLReader *pReader){ |  | 
|   625   assert( !dlrAtEnd(pReader) ); |  | 
|   626   return pReader->pData; |  | 
|   627 } |  | 
|   628 static int dlrDocDataBytes(DLReader *pReader){ |  | 
|   629   assert( !dlrAtEnd(pReader) ); |  | 
|   630   return pReader->nElement; |  | 
|   631 } |  | 
|   632 static int dlrAllDataBytes(DLReader *pReader){ |  | 
|   633   assert( !dlrAtEnd(pReader) ); |  | 
|   634   return pReader->nData; |  | 
|   635 } |  | 
|   636 /* TODO(shess) Consider adding a field to track iDocid varint length |  | 
|   637 ** to make these two functions faster.  This might matter (a tiny bit) |  | 
|   638 ** for queries. |  | 
|   639 */ |  | 
|   640 static const char *dlrPosData(DLReader *pReader){ |  | 
|   641   sqlite_int64 iDummy; |  | 
|   642   int n = fts3GetVarintSafe(pReader->pData, &iDummy, pReader->nElement); |  | 
|   643   if( !n ) return NULL; |  | 
|   644   assert( !dlrAtEnd(pReader) ); |  | 
|   645   return pReader->pData+n; |  | 
|   646 } |  | 
|   647 static int dlrPosDataLen(DLReader *pReader){ |  | 
|   648   sqlite_int64 iDummy; |  | 
|   649   int n = fts3GetVarint(pReader->pData, &iDummy); |  | 
|   650   assert( !dlrAtEnd(pReader) ); |  | 
|   651   return pReader->nElement-n; |  | 
|   652 } |  | 
|   653 static int dlrStep(DLReader *pReader){ |  | 
|   654   assert( !dlrAtEnd(pReader) ); |  | 
|   655  |  | 
|   656   /* Skip past current doclist element. */ |  | 
|   657   assert( pReader->nElement<=pReader->nData ); |  | 
|   658   pReader->pData += pReader->nElement; |  | 
|   659   pReader->nData -= pReader->nElement; |  | 
|   660  |  | 
|   661   /* If there is more data, read the next doclist element. */ |  | 
|   662   if( pReader->nData>0 ){ |  | 
|   663     sqlite_int64 iDocidDelta; |  | 
|   664     int nTotal = 0; |  | 
|   665     int iDummy, n = fts3GetVarintSafe(pReader->pData, &iDocidDelta, pReader->nDa
      ta); |  | 
|   666     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   667     nTotal += n; |  | 
|   668     pReader->iDocid += iDocidDelta; |  | 
|   669     if( pReader->iType>=DL_POSITIONS ){ |  | 
|   670       while( 1 ){ |  | 
|   671         n = fts3GetVarint32Safe(pReader->pData+nTotal, &iDummy, pReader->nData-n
      Total); |  | 
|   672         if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   673         nTotal += n; |  | 
|   674         if( iDummy==POS_END ) break; |  | 
|   675         if( iDummy==POS_COLUMN ){ |  | 
|   676           n = fts3GetVarint32Safe(pReader->pData+nTotal, &iDummy, pReader->nData
      -nTotal); |  | 
|   677           if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   678           nTotal += n; |  | 
|   679         }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ |  | 
|   680           n = fts3GetVarint32Safe(pReader->pData+nTotal, &iDummy, pReader->nData
      -nTotal); |  | 
|   681           if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   682           nTotal += n; |  | 
|   683           n = fts3GetVarint32Safe(pReader->pData+nTotal, &iDummy, pReader->nData
      -nTotal); |  | 
|   684           if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   685           nTotal += n; |  | 
|   686         } |  | 
|   687       } |  | 
|   688     } |  | 
|   689     pReader->nElement = nTotal; |  | 
|   690     assert( pReader->nElement<=pReader->nData ); |  | 
|   691   } |  | 
|   692   return SQLITE_OK; |  | 
|   693 } |  | 
|   694 static void dlrDestroy(DLReader *pReader){ |  | 
|   695   SCRAMBLE(pReader); |  | 
|   696 } |  | 
|   697 static int dlrInit(DLReader *pReader, DocListType iType, |  | 
|   698                    const char *pData, int nData){ |  | 
|   699   int rc; |  | 
|   700   assert( pData!=NULL && nData!=0 ); |  | 
|   701   pReader->iType = iType; |  | 
|   702   pReader->pData = pData; |  | 
|   703   pReader->nData = nData; |  | 
|   704   pReader->nElement = 0; |  | 
|   705   pReader->iDocid = 0; |  | 
|   706  |  | 
|   707   /* Load the first element's data.  There must be a first element. */ |  | 
|   708   rc = dlrStep(pReader); |  | 
|   709   if( rc!=SQLITE_OK ) dlrDestroy(pReader); |  | 
|   710   return rc; |  | 
|   711 } |  | 
|   712  |  | 
|   713 #ifndef NDEBUG |  | 
|   714 /* Verify that the doclist can be validly decoded.  Also returns the |  | 
|   715 ** last docid found because it is convenient in other assertions for |  | 
|   716 ** DLWriter. |  | 
|   717 */ |  | 
|   718 static void docListValidate(DocListType iType, const char *pData, int nData, |  | 
|   719                             sqlite_int64 *pLastDocid){ |  | 
|   720   sqlite_int64 iPrevDocid = 0; |  | 
|   721   assert( nData>0 ); |  | 
|   722   assert( pData!=0 ); |  | 
|   723   assert( pData+nData>pData ); |  | 
|   724   while( nData!=0 ){ |  | 
|   725     sqlite_int64 iDocidDelta; |  | 
|   726     int n = fts3GetVarint(pData, &iDocidDelta); |  | 
|   727     iPrevDocid += iDocidDelta; |  | 
|   728     if( iType>DL_DOCIDS ){ |  | 
|   729       int iDummy; |  | 
|   730       while( 1 ){ |  | 
|   731         n += fts3GetVarint32(pData+n, &iDummy); |  | 
|   732         if( iDummy==POS_END ) break; |  | 
|   733         if( iDummy==POS_COLUMN ){ |  | 
|   734           n += fts3GetVarint32(pData+n, &iDummy); |  | 
|   735         }else if( iType>DL_POSITIONS ){ |  | 
|   736           n += fts3GetVarint32(pData+n, &iDummy); |  | 
|   737           n += fts3GetVarint32(pData+n, &iDummy); |  | 
|   738         } |  | 
|   739         assert( n<=nData ); |  | 
|   740       } |  | 
|   741     } |  | 
|   742     assert( n<=nData ); |  | 
|   743     pData += n; |  | 
|   744     nData -= n; |  | 
|   745   } |  | 
|   746   if( pLastDocid ) *pLastDocid = iPrevDocid; |  | 
|   747 } |  | 
|   748 #define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o) |  | 
|   749 #else |  | 
|   750 #define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 ) |  | 
|   751 #endif |  | 
|   752  |  | 
|   753 /*******************************************************************/ |  | 
|   754 /* DLWriter is used to write doclist data to a DataBuffer.  DLWriter |  | 
|   755 ** always appends to the buffer and does not own it. |  | 
|   756 ** |  | 
|   757 ** dlwInit - initialize to write a given type doclistto a buffer. |  | 
|   758 ** dlwDestroy - clear the writer's memory.  Does not free buffer. |  | 
|   759 ** dlwAppend - append raw doclist data to buffer. |  | 
|   760 ** dlwCopy - copy next doclist from reader to writer. |  | 
|   761 ** dlwAdd - construct doclist element and append to buffer. |  | 
|   762 **    Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter). |  | 
|   763 */ |  | 
|   764 typedef struct DLWriter { |  | 
|   765   DocListType iType; |  | 
|   766   DataBuffer *b; |  | 
|   767   sqlite_int64 iPrevDocid; |  | 
|   768 #ifndef NDEBUG |  | 
|   769   int has_iPrevDocid; |  | 
|   770 #endif |  | 
|   771 } DLWriter; |  | 
|   772  |  | 
|   773 static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){ |  | 
|   774   pWriter->b = b; |  | 
|   775   pWriter->iType = iType; |  | 
|   776   pWriter->iPrevDocid = 0; |  | 
|   777 #ifndef NDEBUG |  | 
|   778   pWriter->has_iPrevDocid = 0; |  | 
|   779 #endif |  | 
|   780 } |  | 
|   781 static void dlwDestroy(DLWriter *pWriter){ |  | 
|   782   SCRAMBLE(pWriter); |  | 
|   783 } |  | 
|   784 /* iFirstDocid is the first docid in the doclist in pData.  It is |  | 
|   785 ** needed because pData may point within a larger doclist, in which |  | 
|   786 ** case the first item would be delta-encoded. |  | 
|   787 ** |  | 
|   788 ** iLastDocid is the final docid in the doclist in pData.  It is |  | 
|   789 ** needed to create the new iPrevDocid for future delta-encoding.  The |  | 
|   790 ** code could decode the passed doclist to recreate iLastDocid, but |  | 
|   791 ** the only current user (docListMerge) already has decoded this |  | 
|   792 ** information. |  | 
|   793 */ |  | 
|   794 /* TODO(shess) This has become just a helper for docListMerge. |  | 
|   795 ** Consider a refactor to make this cleaner. |  | 
|   796 */ |  | 
|   797 static int dlwAppend(DLWriter *pWriter, |  | 
|   798                      const char *pData, int nData, |  | 
|   799                      sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ |  | 
|   800   sqlite_int64 iDocid = 0; |  | 
|   801   char c[VARINT_MAX]; |  | 
|   802   int nFirstOld, nFirstNew;     /* Old and new varint len of first docid. */ |  | 
|   803 #ifndef NDEBUG |  | 
|   804   sqlite_int64 iLastDocidDelta; |  | 
|   805 #endif |  | 
|   806  |  | 
|   807   /* Recode the initial docid as delta from iPrevDocid. */ |  | 
|   808   nFirstOld = fts3GetVarintSafe(pData, &iDocid, nData); |  | 
|   809   if( !nFirstOld ) return SQLITE_CORRUPT_BKPT; |  | 
|   810   assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); |  | 
|   811   nFirstNew = fts3PutVarint(c, iFirstDocid-pWriter->iPrevDocid); |  | 
|   812  |  | 
|   813   /* Verify that the incoming doclist is valid AND that it ends with |  | 
|   814   ** the expected docid.  This is essential because we'll trust this |  | 
|   815   ** docid in future delta-encoding. |  | 
|   816   */ |  | 
|   817   ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); |  | 
|   818   assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); |  | 
|   819  |  | 
|   820   /* Append recoded initial docid and everything else.  Rest of docids |  | 
|   821   ** should have been delta-encoded from previous initial docid. |  | 
|   822   */ |  | 
|   823   if( nFirstOld<nData ){ |  | 
|   824     dataBufferAppend2(pWriter->b, c, nFirstNew, |  | 
|   825                       pData+nFirstOld, nData-nFirstOld); |  | 
|   826   }else{ |  | 
|   827     dataBufferAppend(pWriter->b, c, nFirstNew); |  | 
|   828   } |  | 
|   829   pWriter->iPrevDocid = iLastDocid; |  | 
|   830   return SQLITE_OK; |  | 
|   831 } |  | 
|   832 static int dlwCopy(DLWriter *pWriter, DLReader *pReader){ |  | 
|   833   return dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), |  | 
|   834                    dlrDocid(pReader), dlrDocid(pReader)); |  | 
|   835 } |  | 
|   836 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ |  | 
|   837   char c[VARINT_MAX]; |  | 
|   838   int n = fts3PutVarint(c, iDocid-pWriter->iPrevDocid); |  | 
|   839  |  | 
|   840   /* Docids must ascend. */ |  | 
|   841   assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); |  | 
|   842   assert( pWriter->iType==DL_DOCIDS ); |  | 
|   843  |  | 
|   844   dataBufferAppend(pWriter->b, c, n); |  | 
|   845   pWriter->iPrevDocid = iDocid; |  | 
|   846 #ifndef NDEBUG |  | 
|   847   pWriter->has_iPrevDocid = 1; |  | 
|   848 #endif |  | 
|   849 } |  | 
|   850  |  | 
|   851 /*******************************************************************/ |  | 
|   852 /* PLReader is used to read data from a document's position list.  As |  | 
|   853 ** the caller steps through the list, data is cached so that varints |  | 
|   854 ** only need to be decoded once. |  | 
|   855 ** |  | 
|   856 ** plrInit, plrDestroy - create/destroy a reader. |  | 
|   857 ** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors |  | 
|   858 ** plrAtEnd - at end of stream, only call plrDestroy once true. |  | 
|   859 ** plrStep - step to the next element. |  | 
|   860 */ |  | 
|   861 typedef struct PLReader { |  | 
|   862   /* These refer to the next position's data.  nData will reach 0 when |  | 
|   863   ** reading the last position, so plrStep() signals EOF by setting |  | 
|   864   ** pData to NULL. |  | 
|   865   */ |  | 
|   866   const char *pData; |  | 
|   867   int nData; |  | 
|   868  |  | 
|   869   DocListType iType; |  | 
|   870   int iColumn;         /* the last column read */ |  | 
|   871   int iPosition;       /* the last position read */ |  | 
|   872   int iStartOffset;    /* the last start offset read */ |  | 
|   873   int iEndOffset;      /* the last end offset read */ |  | 
|   874 } PLReader; |  | 
|   875  |  | 
|   876 static int plrAtEnd(PLReader *pReader){ |  | 
|   877   return pReader->pData==NULL; |  | 
|   878 } |  | 
|   879 static int plrColumn(PLReader *pReader){ |  | 
|   880   assert( !plrAtEnd(pReader) ); |  | 
|   881   return pReader->iColumn; |  | 
|   882 } |  | 
|   883 static int plrPosition(PLReader *pReader){ |  | 
|   884   assert( !plrAtEnd(pReader) ); |  | 
|   885   return pReader->iPosition; |  | 
|   886 } |  | 
|   887 static int plrStartOffset(PLReader *pReader){ |  | 
|   888   assert( !plrAtEnd(pReader) ); |  | 
|   889   return pReader->iStartOffset; |  | 
|   890 } |  | 
|   891 static int plrEndOffset(PLReader *pReader){ |  | 
|   892   assert( !plrAtEnd(pReader) ); |  | 
|   893   return pReader->iEndOffset; |  | 
|   894 } |  | 
|   895 static int plrStep(PLReader *pReader){ |  | 
|   896   int i, n, nTotal = 0; |  | 
|   897  |  | 
|   898   assert( !plrAtEnd(pReader) ); |  | 
|   899  |  | 
|   900   if( pReader->nData<=0 ){ |  | 
|   901     pReader->pData = NULL; |  | 
|   902     return SQLITE_OK; |  | 
|   903   } |  | 
|   904  |  | 
|   905   n = fts3GetVarint32Safe(pReader->pData, &i, pReader->nData); |  | 
|   906   if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   907   nTotal += n; |  | 
|   908   if( i==POS_COLUMN ){ |  | 
|   909     n = fts3GetVarint32Safe(pReader->pData+nTotal, &pReader->iColumn, pReader->n
      Data-nTotal); |  | 
|   910     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   911     nTotal += n; |  | 
|   912     pReader->iPosition = 0; |  | 
|   913     pReader->iStartOffset = 0; |  | 
|   914     n = fts3GetVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); |  | 
|   915     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   916     nTotal += n; |  | 
|   917   } |  | 
|   918   /* Should never see adjacent column changes. */ |  | 
|   919   assert( i!=POS_COLUMN ); |  | 
|   920  |  | 
|   921   if( i==POS_END ){ |  | 
|   922     assert( nTotal<=pReader->nData ); |  | 
|   923     pReader->nData = 0; |  | 
|   924     pReader->pData = NULL; |  | 
|   925     return SQLITE_OK; |  | 
|   926   } |  | 
|   927  |  | 
|   928   pReader->iPosition += i-POS_BASE; |  | 
|   929   if( pReader->iType==DL_POSITIONS_OFFSETS ){ |  | 
|   930     n = fts3GetVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); |  | 
|   931     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   932     nTotal += n; |  | 
|   933     pReader->iStartOffset += i; |  | 
|   934     n = fts3GetVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); |  | 
|   935     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|   936     nTotal += n; |  | 
|   937     pReader->iEndOffset = pReader->iStartOffset+i; |  | 
|   938   } |  | 
|   939   assert( nTotal<=pReader->nData ); |  | 
|   940   pReader->pData += nTotal; |  | 
|   941   pReader->nData -= nTotal; |  | 
|   942   return SQLITE_OK; |  | 
|   943 } |  | 
|   944  |  | 
|   945 static void plrDestroy(PLReader *pReader){ |  | 
|   946   SCRAMBLE(pReader); |  | 
|   947 } |  | 
|   948 static int plrInit(PLReader *pReader, DLReader *pDLReader){ |  | 
|   949   int rc; |  | 
|   950   pReader->pData = dlrPosData(pDLReader); |  | 
|   951   pReader->nData = dlrPosDataLen(pDLReader); |  | 
|   952   pReader->iType = pDLReader->iType; |  | 
|   953   pReader->iColumn = 0; |  | 
|   954   pReader->iPosition = 0; |  | 
|   955   pReader->iStartOffset = 0; |  | 
|   956   pReader->iEndOffset = 0; |  | 
|   957   rc = plrStep(pReader); |  | 
|   958   if( rc!=SQLITE_OK ) plrDestroy(pReader); |  | 
|   959   return rc; |  | 
|   960 } |  | 
|   961  |  | 
|   962 /*******************************************************************/ |  | 
|   963 /* PLWriter is used in constructing a document's position list.  As a |  | 
|   964 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. |  | 
|   965 ** PLWriter writes to the associated DLWriter's buffer. |  | 
|   966 ** |  | 
|   967 ** plwInit - init for writing a document's poslist. |  | 
|   968 ** plwDestroy - clear a writer. |  | 
|   969 ** plwAdd - append position and offset information. |  | 
|   970 ** plwCopy - copy next position's data from reader to writer. |  | 
|   971 ** plwTerminate - add any necessary doclist terminator. |  | 
|   972 ** |  | 
|   973 ** Calling plwAdd() after plwTerminate() may result in a corrupt |  | 
|   974 ** doclist. |  | 
|   975 */ |  | 
|   976 /* TODO(shess) Until we've written the second item, we can cache the |  | 
|   977 ** first item's information.  Then we'd have three states: |  | 
|   978 ** |  | 
|   979 ** - initialized with docid, no positions. |  | 
|   980 ** - docid and one position. |  | 
|   981 ** - docid and multiple positions. |  | 
|   982 ** |  | 
|   983 ** Only the last state needs to actually write to dlw->b, which would |  | 
|   984 ** be an improvement in the DLCollector case. |  | 
|   985 */ |  | 
|   986 typedef struct PLWriter { |  | 
|   987   DLWriter *dlw; |  | 
|   988  |  | 
|   989   int iColumn;    /* the last column written */ |  | 
|   990   int iPos;       /* the last position written */ |  | 
|   991   int iOffset;    /* the last start offset written */ |  | 
|   992 } PLWriter; |  | 
|   993  |  | 
|   994 /* TODO(shess) In the case where the parent is reading these values |  | 
|   995 ** from a PLReader, we could optimize to a copy if that PLReader has |  | 
|   996 ** the same type as pWriter. |  | 
|   997 */ |  | 
|   998 static void plwAdd(PLWriter *pWriter, int iColumn, int iPos, |  | 
|   999                    int iStartOffset, int iEndOffset){ |  | 
|  1000   /* Worst-case space for POS_COLUMN, iColumn, iPosDelta, |  | 
|  1001   ** iStartOffsetDelta, and iEndOffsetDelta. |  | 
|  1002   */ |  | 
|  1003   char c[5*VARINT_MAX]; |  | 
|  1004   int n = 0; |  | 
|  1005  |  | 
|  1006   /* Ban plwAdd() after plwTerminate(). */ |  | 
|  1007   assert( pWriter->iPos!=-1 ); |  | 
|  1008  |  | 
|  1009   if( pWriter->dlw->iType==DL_DOCIDS ) return; |  | 
|  1010  |  | 
|  1011   if( iColumn!=pWriter->iColumn ){ |  | 
|  1012     n += fts3PutVarint(c+n, POS_COLUMN); |  | 
|  1013     n += fts3PutVarint(c+n, iColumn); |  | 
|  1014     pWriter->iColumn = iColumn; |  | 
|  1015     pWriter->iPos = 0; |  | 
|  1016     pWriter->iOffset = 0; |  | 
|  1017   } |  | 
|  1018   assert( iPos>=pWriter->iPos ); |  | 
|  1019   n += fts3PutVarint(c+n, POS_BASE+(iPos-pWriter->iPos)); |  | 
|  1020   pWriter->iPos = iPos; |  | 
|  1021   if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){ |  | 
|  1022     assert( iStartOffset>=pWriter->iOffset ); |  | 
|  1023     n += fts3PutVarint(c+n, iStartOffset-pWriter->iOffset); |  | 
|  1024     pWriter->iOffset = iStartOffset; |  | 
|  1025     assert( iEndOffset>=iStartOffset ); |  | 
|  1026     n += fts3PutVarint(c+n, iEndOffset-iStartOffset); |  | 
|  1027   } |  | 
|  1028   dataBufferAppend(pWriter->dlw->b, c, n); |  | 
|  1029 } |  | 
|  1030 static void plwCopy(PLWriter *pWriter, PLReader *pReader){ |  | 
|  1031   plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader), |  | 
|  1032          plrStartOffset(pReader), plrEndOffset(pReader)); |  | 
|  1033 } |  | 
|  1034 static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){ |  | 
|  1035   char c[VARINT_MAX]; |  | 
|  1036   int n; |  | 
|  1037  |  | 
|  1038   pWriter->dlw = dlw; |  | 
|  1039  |  | 
|  1040   /* Docids must ascend. */ |  | 
|  1041   assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid ); |  | 
|  1042   n = fts3PutVarint(c, iDocid-pWriter->dlw->iPrevDocid); |  | 
|  1043   dataBufferAppend(pWriter->dlw->b, c, n); |  | 
|  1044   pWriter->dlw->iPrevDocid = iDocid; |  | 
|  1045 #ifndef NDEBUG |  | 
|  1046   pWriter->dlw->has_iPrevDocid = 1; |  | 
|  1047 #endif |  | 
|  1048  |  | 
|  1049   pWriter->iColumn = 0; |  | 
|  1050   pWriter->iPos = 0; |  | 
|  1051   pWriter->iOffset = 0; |  | 
|  1052 } |  | 
|  1053 /* TODO(shess) Should plwDestroy() also terminate the doclist?  But |  | 
|  1054 ** then plwDestroy() would no longer be just a destructor, it would |  | 
|  1055 ** also be doing work, which isn't consistent with the overall idiom. |  | 
|  1056 ** Another option would be for plwAdd() to always append any necessary |  | 
|  1057 ** terminator, so that the output is always correct.  But that would |  | 
|  1058 ** add incremental work to the common case with the only benefit being |  | 
|  1059 ** API elegance.  Punt for now. |  | 
|  1060 */ |  | 
|  1061 static void plwTerminate(PLWriter *pWriter){ |  | 
|  1062   if( pWriter->dlw->iType>DL_DOCIDS ){ |  | 
|  1063     char c[VARINT_MAX]; |  | 
|  1064     int n = fts3PutVarint(c, POS_END); |  | 
|  1065     dataBufferAppend(pWriter->dlw->b, c, n); |  | 
|  1066   } |  | 
|  1067 #ifndef NDEBUG |  | 
|  1068   /* Mark as terminated for assert in plwAdd(). */ |  | 
|  1069   pWriter->iPos = -1; |  | 
|  1070 #endif |  | 
|  1071 } |  | 
|  1072 static void plwDestroy(PLWriter *pWriter){ |  | 
|  1073   SCRAMBLE(pWriter); |  | 
|  1074 } |  | 
|  1075  |  | 
|  1076 /*******************************************************************/ |  | 
|  1077 /* DLCollector wraps PLWriter and DLWriter to provide a |  | 
|  1078 ** dynamically-allocated doclist area to use during tokenization. |  | 
|  1079 ** |  | 
|  1080 ** dlcNew - malloc up and initialize a collector. |  | 
|  1081 ** dlcDelete - destroy a collector and all contained items. |  | 
|  1082 ** dlcAddPos - append position and offset information. |  | 
|  1083 ** dlcAddDoclist - add the collected doclist to the given buffer. |  | 
|  1084 ** dlcNext - terminate the current document and open another. |  | 
|  1085 */ |  | 
|  1086 typedef struct DLCollector { |  | 
|  1087   DataBuffer b; |  | 
|  1088   DLWriter dlw; |  | 
|  1089   PLWriter plw; |  | 
|  1090 } DLCollector; |  | 
|  1091  |  | 
|  1092 /* TODO(shess) This could also be done by calling plwTerminate() and |  | 
|  1093 ** dataBufferAppend().  I tried that, expecting nominal performance |  | 
|  1094 ** differences, but it seemed to pretty reliably be worth 1% to code |  | 
|  1095 ** it this way.  I suspect it is the incremental malloc overhead (some |  | 
|  1096 ** percentage of the plwTerminate() calls will cause a realloc), so |  | 
|  1097 ** this might be worth revisiting if the DataBuffer implementation |  | 
|  1098 ** changes. |  | 
|  1099 */ |  | 
|  1100 static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){ |  | 
|  1101   if( pCollector->dlw.iType>DL_DOCIDS ){ |  | 
|  1102     char c[VARINT_MAX]; |  | 
|  1103     int n = fts3PutVarint(c, POS_END); |  | 
|  1104     dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); |  | 
|  1105   }else{ |  | 
|  1106     dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); |  | 
|  1107   } |  | 
|  1108 } |  | 
|  1109 static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){ |  | 
|  1110   plwTerminate(&pCollector->plw); |  | 
|  1111   plwDestroy(&pCollector->plw); |  | 
|  1112   plwInit(&pCollector->plw, &pCollector->dlw, iDocid); |  | 
|  1113 } |  | 
|  1114 static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, |  | 
|  1115                       int iStartOffset, int iEndOffset){ |  | 
|  1116   plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); |  | 
|  1117 } |  | 
|  1118  |  | 
|  1119 static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ |  | 
|  1120   DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector)); |  | 
|  1121   dataBufferInit(&pCollector->b, 0); |  | 
|  1122   dlwInit(&pCollector->dlw, iType, &pCollector->b); |  | 
|  1123   plwInit(&pCollector->plw, &pCollector->dlw, iDocid); |  | 
|  1124   return pCollector; |  | 
|  1125 } |  | 
|  1126 static void dlcDelete(DLCollector *pCollector){ |  | 
|  1127   plwDestroy(&pCollector->plw); |  | 
|  1128   dlwDestroy(&pCollector->dlw); |  | 
|  1129   dataBufferDestroy(&pCollector->b); |  | 
|  1130   SCRAMBLE(pCollector); |  | 
|  1131   sqlite3_free(pCollector); |  | 
|  1132 } |  | 
|  1133  |  | 
|  1134  |  | 
|  1135 /* Copy the doclist data of iType in pData/nData into *out, trimming |  | 
|  1136 ** unnecessary data as we go.  Only columns matching iColumn are |  | 
|  1137 ** copied, all columns copied if iColumn is -1.  Elements with no |  | 
|  1138 ** matching columns are dropped.  The output is an iOutType doclist. |  | 
|  1139 */ |  | 
|  1140 /* NOTE(shess) This code is only valid after all doclists are merged. |  | 
|  1141 ** If this is run before merges, then doclist items which represent |  | 
|  1142 ** deletion will be trimmed, and will thus not effect a deletion |  | 
|  1143 ** during the merge. |  | 
|  1144 */ |  | 
|  1145 static int docListTrim(DocListType iType, const char *pData, int nData, |  | 
|  1146                        int iColumn, DocListType iOutType, DataBuffer *out){ |  | 
|  1147   DLReader dlReader; |  | 
|  1148   DLWriter dlWriter; |  | 
|  1149   int rc; |  | 
|  1150  |  | 
|  1151   assert( iOutType<=iType ); |  | 
|  1152  |  | 
|  1153   rc = dlrInit(&dlReader, iType, pData, nData); |  | 
|  1154   if( rc!=SQLITE_OK ) return rc; |  | 
|  1155   dlwInit(&dlWriter, iOutType, out); |  | 
|  1156  |  | 
|  1157   while( !dlrAtEnd(&dlReader) ){ |  | 
|  1158     PLReader plReader; |  | 
|  1159     PLWriter plWriter; |  | 
|  1160     int match = 0; |  | 
|  1161  |  | 
|  1162     rc = plrInit(&plReader, &dlReader); |  | 
|  1163     if( rc!=SQLITE_OK ) break; |  | 
|  1164  |  | 
|  1165     while( !plrAtEnd(&plReader) ){ |  | 
|  1166       if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ |  | 
|  1167         if( !match ){ |  | 
|  1168           plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); |  | 
|  1169           match = 1; |  | 
|  1170         } |  | 
|  1171         plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), |  | 
|  1172                plrStartOffset(&plReader), plrEndOffset(&plReader)); |  | 
|  1173       } |  | 
|  1174       rc = plrStep(&plReader); |  | 
|  1175       if( rc!=SQLITE_OK ){ |  | 
|  1176         plrDestroy(&plReader); |  | 
|  1177         goto err; |  | 
|  1178       } |  | 
|  1179     } |  | 
|  1180     if( match ){ |  | 
|  1181       plwTerminate(&plWriter); |  | 
|  1182       plwDestroy(&plWriter); |  | 
|  1183     } |  | 
|  1184  |  | 
|  1185     plrDestroy(&plReader); |  | 
|  1186     rc = dlrStep(&dlReader); |  | 
|  1187     if( rc!=SQLITE_OK ) break; |  | 
|  1188   } |  | 
|  1189 err: |  | 
|  1190   dlwDestroy(&dlWriter); |  | 
|  1191   dlrDestroy(&dlReader); |  | 
|  1192   return rc; |  | 
|  1193 } |  | 
|  1194  |  | 
|  1195 /* Used by docListMerge() to keep doclists in the ascending order by |  | 
|  1196 ** docid, then ascending order by age (so the newest comes first). |  | 
|  1197 */ |  | 
|  1198 typedef struct OrderedDLReader { |  | 
|  1199   DLReader *pReader; |  | 
|  1200  |  | 
|  1201   /* TODO(shess) If we assume that docListMerge pReaders is ordered by |  | 
|  1202   ** age (which we do), then we could use pReader comparisons to break |  | 
|  1203   ** ties. |  | 
|  1204   */ |  | 
|  1205   int idx; |  | 
|  1206 } OrderedDLReader; |  | 
|  1207  |  | 
|  1208 /* Order eof to end, then by docid asc, idx desc. */ |  | 
|  1209 static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){ |  | 
|  1210   if( dlrAtEnd(r1->pReader) ){ |  | 
|  1211     if( dlrAtEnd(r2->pReader) ) return 0;  /* Both atEnd(). */ |  | 
|  1212     return 1;                              /* Only r1 atEnd(). */ |  | 
|  1213   } |  | 
|  1214   if( dlrAtEnd(r2->pReader) ) return -1;   /* Only r2 atEnd(). */ |  | 
|  1215  |  | 
|  1216   if( dlrDocid(r1->pReader)<dlrDocid(r2->pReader) ) return -1; |  | 
|  1217   if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1; |  | 
|  1218  |  | 
|  1219   /* Descending on idx. */ |  | 
|  1220   return r2->idx-r1->idx; |  | 
|  1221 } |  | 
|  1222  |  | 
|  1223 /* Bubble p[0] to appropriate place in p[1..n-1].  Assumes that |  | 
|  1224 ** p[1..n-1] is already sorted. |  | 
|  1225 */ |  | 
|  1226 /* TODO(shess) Is this frequent enough to warrant a binary search? |  | 
|  1227 ** Before implementing that, instrument the code to check.  In most |  | 
|  1228 ** current usage, I expect that p[0] will be less than p[1] a very |  | 
|  1229 ** high proportion of the time. |  | 
|  1230 */ |  | 
|  1231 static void orderedDLReaderReorder(OrderedDLReader *p, int n){ |  | 
|  1232   while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){ |  | 
|  1233     OrderedDLReader tmp = p[0]; |  | 
|  1234     p[0] = p[1]; |  | 
|  1235     p[1] = tmp; |  | 
|  1236     n--; |  | 
|  1237     p++; |  | 
|  1238   } |  | 
|  1239 } |  | 
|  1240  |  | 
|  1241 /* Given an array of doclist readers, merge their doclist elements |  | 
|  1242 ** into out in sorted order (by docid), dropping elements from older |  | 
|  1243 ** readers when there is a duplicate docid.  pReaders is assumed to be |  | 
|  1244 ** ordered by age, oldest first. |  | 
|  1245 */ |  | 
|  1246 /* TODO(shess) nReaders must be <= MERGE_COUNT.  This should probably |  | 
|  1247 ** be fixed. |  | 
|  1248 */ |  | 
|  1249 static int docListMerge(DataBuffer *out, |  | 
|  1250                         DLReader *pReaders, int nReaders){ |  | 
|  1251   OrderedDLReader readers[MERGE_COUNT]; |  | 
|  1252   DLWriter writer; |  | 
|  1253   int i, n; |  | 
|  1254   const char *pStart = 0; |  | 
|  1255   int nStart = 0; |  | 
|  1256   sqlite_int64 iFirstDocid = 0, iLastDocid = 0; |  | 
|  1257   int rc = SQLITE_OK; |  | 
|  1258  |  | 
|  1259   assert( nReaders>0 ); |  | 
|  1260   if( nReaders==1 ){ |  | 
|  1261     dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); |  | 
|  1262     return SQLITE_OK; |  | 
|  1263   } |  | 
|  1264  |  | 
|  1265   assert( nReaders<=MERGE_COUNT ); |  | 
|  1266   n = 0; |  | 
|  1267   for(i=0; i<nReaders; i++){ |  | 
|  1268     assert( pReaders[i].iType==pReaders[0].iType ); |  | 
|  1269     readers[i].pReader = pReaders+i; |  | 
|  1270     readers[i].idx = i; |  | 
|  1271     n += dlrAllDataBytes(&pReaders[i]); |  | 
|  1272   } |  | 
|  1273   /* Conservatively size output to sum of inputs.  Output should end |  | 
|  1274   ** up strictly smaller than input. |  | 
|  1275   */ |  | 
|  1276   dataBufferExpand(out, n); |  | 
|  1277  |  | 
|  1278   /* Get the readers into sorted order. */ |  | 
|  1279   while( i-->0 ){ |  | 
|  1280     orderedDLReaderReorder(readers+i, nReaders-i); |  | 
|  1281   } |  | 
|  1282  |  | 
|  1283   dlwInit(&writer, pReaders[0].iType, out); |  | 
|  1284   while( !dlrAtEnd(readers[0].pReader) ){ |  | 
|  1285     sqlite_int64 iDocid = dlrDocid(readers[0].pReader); |  | 
|  1286  |  | 
|  1287     /* If this is a continuation of the current buffer to copy, extend |  | 
|  1288     ** that buffer.  memcpy() seems to be more efficient if it has a |  | 
|  1289     ** lots of data to copy. |  | 
|  1290     */ |  | 
|  1291     if( dlrDocData(readers[0].pReader)==pStart+nStart ){ |  | 
|  1292       nStart += dlrDocDataBytes(readers[0].pReader); |  | 
|  1293     }else{ |  | 
|  1294       if( pStart!=0 ){ |  | 
|  1295         rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); |  | 
|  1296         if( rc!=SQLITE_OK ) goto err; |  | 
|  1297       } |  | 
|  1298       pStart = dlrDocData(readers[0].pReader); |  | 
|  1299       nStart = dlrDocDataBytes(readers[0].pReader); |  | 
|  1300       iFirstDocid = iDocid; |  | 
|  1301     } |  | 
|  1302     iLastDocid = iDocid; |  | 
|  1303     rc = dlrStep(readers[0].pReader); |  | 
|  1304     if( rc!= SQLITE_OK ) goto err; |  | 
|  1305  |  | 
|  1306     /* Drop all of the older elements with the same docid. */ |  | 
|  1307     for(i=1; i<nReaders && |  | 
|  1308              !dlrAtEnd(readers[i].pReader) && |  | 
|  1309              dlrDocid(readers[i].pReader)==iDocid; i++){ |  | 
|  1310       rc = dlrStep(readers[i].pReader); |  | 
|  1311       if( rc!=SQLITE_OK ) goto err; |  | 
|  1312     } |  | 
|  1313  |  | 
|  1314     /* Get the readers back into order. */ |  | 
|  1315     while( i-->0 ){ |  | 
|  1316       orderedDLReaderReorder(readers+i, nReaders-i); |  | 
|  1317     } |  | 
|  1318   } |  | 
|  1319  |  | 
|  1320   /* Copy over any remaining elements. */ |  | 
|  1321   if( nStart>0 ) rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid
      ); |  | 
|  1322 err: |  | 
|  1323   dlwDestroy(&writer); |  | 
|  1324   return rc; |  | 
|  1325 } |  | 
|  1326  |  | 
|  1327 /* Helper function for posListUnion().  Compares the current position |  | 
|  1328 ** between left and right, returning as standard C idiom of <0 if |  | 
|  1329 ** left<right, >0 if left>right, and 0 if left==right.  "End" always |  | 
|  1330 ** compares greater. |  | 
|  1331 */ |  | 
|  1332 static int posListCmp(PLReader *pLeft, PLReader *pRight){ |  | 
|  1333   assert( pLeft->iType==pRight->iType ); |  | 
|  1334   if( pLeft->iType==DL_DOCIDS ) return 0; |  | 
|  1335  |  | 
|  1336   if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1; |  | 
|  1337   if( plrAtEnd(pRight) ) return -1; |  | 
|  1338  |  | 
|  1339   if( plrColumn(pLeft)<plrColumn(pRight) ) return -1; |  | 
|  1340   if( plrColumn(pLeft)>plrColumn(pRight) ) return 1; |  | 
|  1341  |  | 
|  1342   if( plrPosition(pLeft)<plrPosition(pRight) ) return -1; |  | 
|  1343   if( plrPosition(pLeft)>plrPosition(pRight) ) return 1; |  | 
|  1344   if( pLeft->iType==DL_POSITIONS ) return 0; |  | 
|  1345  |  | 
|  1346   if( plrStartOffset(pLeft)<plrStartOffset(pRight) ) return -1; |  | 
|  1347   if( plrStartOffset(pLeft)>plrStartOffset(pRight) ) return 1; |  | 
|  1348  |  | 
|  1349   if( plrEndOffset(pLeft)<plrEndOffset(pRight) ) return -1; |  | 
|  1350   if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; |  | 
|  1351  |  | 
|  1352   return 0; |  | 
|  1353 } |  | 
|  1354  |  | 
|  1355 /* Write the union of position lists in pLeft and pRight to pOut. |  | 
|  1356 ** "Union" in this case meaning "All unique position tuples".  Should |  | 
|  1357 ** work with any doclist type, though both inputs and the output |  | 
|  1358 ** should be the same type. |  | 
|  1359 */ |  | 
|  1360 static int posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ |  | 
|  1361   PLReader left, right; |  | 
|  1362   PLWriter writer; |  | 
|  1363   int rc; |  | 
|  1364  |  | 
|  1365   assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |  | 
|  1366   assert( pLeft->iType==pRight->iType ); |  | 
|  1367   assert( pLeft->iType==pOut->iType ); |  | 
|  1368  |  | 
|  1369   rc = plrInit(&left, pLeft); |  | 
|  1370   if( rc!=SQLITE_OK ) return rc; |  | 
|  1371   rc = plrInit(&right, pRight); |  | 
|  1372   if( rc!=SQLITE_OK ){ |  | 
|  1373     plrDestroy(&left); |  | 
|  1374     return rc; |  | 
|  1375   } |  | 
|  1376   plwInit(&writer, pOut, dlrDocid(pLeft)); |  | 
|  1377  |  | 
|  1378   while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ |  | 
|  1379     int c = posListCmp(&left, &right); |  | 
|  1380     if( c<0 ){ |  | 
|  1381       plwCopy(&writer, &left); |  | 
|  1382       rc = plrStep(&left); |  | 
|  1383       if( rc!=SQLITE_OK ) break; |  | 
|  1384     }else if( c>0 ){ |  | 
|  1385       plwCopy(&writer, &right); |  | 
|  1386       rc = plrStep(&right); |  | 
|  1387       if( rc!=SQLITE_OK ) break; |  | 
|  1388     }else{ |  | 
|  1389       plwCopy(&writer, &left); |  | 
|  1390       rc = plrStep(&left); |  | 
|  1391       if( rc!=SQLITE_OK ) break; |  | 
|  1392       rc = plrStep(&right); |  | 
|  1393       if( rc!=SQLITE_OK ) break; |  | 
|  1394     } |  | 
|  1395   } |  | 
|  1396  |  | 
|  1397   plwTerminate(&writer); |  | 
|  1398   plwDestroy(&writer); |  | 
|  1399   plrDestroy(&left); |  | 
|  1400   plrDestroy(&right); |  | 
|  1401   return rc; |  | 
|  1402 } |  | 
|  1403  |  | 
|  1404 /* Write the union of doclists in pLeft and pRight to pOut.  For |  | 
|  1405 ** docids in common between the inputs, the union of the position |  | 
|  1406 ** lists is written.  Inputs and outputs are always type DL_DEFAULT. |  | 
|  1407 */ |  | 
|  1408 static int docListUnion( |  | 
|  1409   const char *pLeft, int nLeft, |  | 
|  1410   const char *pRight, int nRight, |  | 
|  1411   DataBuffer *pOut      /* Write the combined doclist here */ |  | 
|  1412 ){ |  | 
|  1413   DLReader left, right; |  | 
|  1414   DLWriter writer; |  | 
|  1415   int rc; |  | 
|  1416  |  | 
|  1417   if( nLeft==0 ){ |  | 
|  1418     if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); |  | 
|  1419     return SQLITE_OK; |  | 
|  1420   } |  | 
|  1421   if( nRight==0 ){ |  | 
|  1422     dataBufferAppend(pOut, pLeft, nLeft); |  | 
|  1423     return SQLITE_OK; |  | 
|  1424   } |  | 
|  1425  |  | 
|  1426   rc = dlrInit(&left, DL_DEFAULT, pLeft, nLeft); |  | 
|  1427   if( rc!=SQLITE_OK ) return rc; |  | 
|  1428   rc = dlrInit(&right, DL_DEFAULT, pRight, nRight); |  | 
|  1429   if( rc!=SQLITE_OK){ |  | 
|  1430     dlrDestroy(&left); |  | 
|  1431     return rc; |  | 
|  1432   } |  | 
|  1433   dlwInit(&writer, DL_DEFAULT, pOut); |  | 
|  1434  |  | 
|  1435   while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |  | 
|  1436     if( dlrAtEnd(&right) ){ |  | 
|  1437       rc = dlwCopy(&writer, &left); |  | 
|  1438       if( rc!=SQLITE_OK) break; |  | 
|  1439       rc = dlrStep(&left); |  | 
|  1440       if( rc!=SQLITE_OK) break; |  | 
|  1441     }else if( dlrAtEnd(&left) ){ |  | 
|  1442       rc = dlwCopy(&writer, &right); |  | 
|  1443       if( rc!=SQLITE_OK ) break; |  | 
|  1444       rc = dlrStep(&right); |  | 
|  1445       if( rc!=SQLITE_OK ) break; |  | 
|  1446     }else if( dlrDocid(&left)<dlrDocid(&right) ){ |  | 
|  1447       rc = dlwCopy(&writer, &left); |  | 
|  1448       if( rc!=SQLITE_OK ) break; |  | 
|  1449       rc = dlrStep(&left); |  | 
|  1450       if( rc!=SQLITE_OK ) break; |  | 
|  1451     }else if( dlrDocid(&left)>dlrDocid(&right) ){ |  | 
|  1452       rc = dlwCopy(&writer, &right); |  | 
|  1453       if( rc!=SQLITE_OK ) break; |  | 
|  1454       rc = dlrStep(&right); |  | 
|  1455       if( rc!=SQLITE_OK ) break; |  | 
|  1456     }else{ |  | 
|  1457       rc = posListUnion(&left, &right, &writer); |  | 
|  1458       if( rc!=SQLITE_OK ) break; |  | 
|  1459       rc = dlrStep(&left); |  | 
|  1460       if( rc!=SQLITE_OK ) break; |  | 
|  1461       rc = dlrStep(&right); |  | 
|  1462       if( rc!=SQLITE_OK ) break; |  | 
|  1463     } |  | 
|  1464   } |  | 
|  1465  |  | 
|  1466   dlrDestroy(&left); |  | 
|  1467   dlrDestroy(&right); |  | 
|  1468   dlwDestroy(&writer); |  | 
|  1469   return rc; |  | 
|  1470 } |  | 
|  1471  |  | 
|  1472 /*  |  | 
|  1473 ** This function is used as part of the implementation of phrase and |  | 
|  1474 ** NEAR matching. |  | 
|  1475 ** |  | 
|  1476 ** pLeft and pRight are DLReaders positioned to the same docid in |  | 
|  1477 ** lists of type DL_POSITION. This function writes an entry to the |  | 
|  1478 ** DLWriter pOut for each position in pRight that is less than |  | 
|  1479 ** (nNear+1) greater (but not equal to or smaller) than a position  |  | 
|  1480 ** in pLeft. For example, if nNear is 0, and the positions contained |  | 
|  1481 ** by pLeft and pRight are: |  | 
|  1482 ** |  | 
|  1483 **    pLeft:  5 10 15 20 |  | 
|  1484 **    pRight: 6  9 17 21 |  | 
|  1485 ** |  | 
|  1486 ** then the docid is added to pOut. If pOut is of type DL_POSITIONS, |  | 
|  1487 ** then a positionids "6" and "21" are also added to pOut. |  | 
|  1488 ** |  | 
|  1489 ** If boolean argument isSaveLeft is true, then positionids are copied |  | 
|  1490 ** from pLeft instead of pRight. In the example above, the positions "5" |  | 
|  1491 ** and "20" would be added instead of "6" and "21". |  | 
|  1492 */ |  | 
|  1493 static int posListPhraseMerge( |  | 
|  1494   DLReader *pLeft,  |  | 
|  1495   DLReader *pRight, |  | 
|  1496   int nNear, |  | 
|  1497   int isSaveLeft, |  | 
|  1498   DLWriter *pOut |  | 
|  1499 ){ |  | 
|  1500   PLReader left, right; |  | 
|  1501   PLWriter writer; |  | 
|  1502   int match = 0; |  | 
|  1503   int rc; |  | 
|  1504  |  | 
|  1505   assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |  | 
|  1506   assert( pOut->iType!=DL_POSITIONS_OFFSETS ); |  | 
|  1507  |  | 
|  1508   rc = plrInit(&left, pLeft); |  | 
|  1509   if( rc!=SQLITE_OK ) return rc; |  | 
|  1510   rc = plrInit(&right, pRight); |  | 
|  1511   if( rc!=SQLITE_OK ){ |  | 
|  1512     plrDestroy(&left); |  | 
|  1513     return rc; |  | 
|  1514   } |  | 
|  1515  |  | 
|  1516   while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ |  | 
|  1517     if( plrColumn(&left)<plrColumn(&right) ){ |  | 
|  1518       rc = plrStep(&left); |  | 
|  1519       if( rc!=SQLITE_OK ) break; |  | 
|  1520     }else if( plrColumn(&left)>plrColumn(&right) ){ |  | 
|  1521       rc = plrStep(&right); |  | 
|  1522       if( rc!=SQLITE_OK ) break; |  | 
|  1523     }else if( plrPosition(&left)>=plrPosition(&right) ){ |  | 
|  1524       rc = plrStep(&right); |  | 
|  1525       if( rc!=SQLITE_OK ) break; |  | 
|  1526     }else{ |  | 
|  1527       if( (plrPosition(&right)-plrPosition(&left))<=(nNear+1) ){ |  | 
|  1528         if( !match ){ |  | 
|  1529           plwInit(&writer, pOut, dlrDocid(pLeft)); |  | 
|  1530           match = 1; |  | 
|  1531         } |  | 
|  1532         if( !isSaveLeft ){ |  | 
|  1533           plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); |  | 
|  1534         }else{ |  | 
|  1535           plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0); |  | 
|  1536         } |  | 
|  1537         rc = plrStep(&right); |  | 
|  1538         if( rc!=SQLITE_OK ) break; |  | 
|  1539       }else{ |  | 
|  1540         rc = plrStep(&left); |  | 
|  1541         if( rc!=SQLITE_OK ) break; |  | 
|  1542       } |  | 
|  1543     } |  | 
|  1544   } |  | 
|  1545  |  | 
|  1546   if( match ){ |  | 
|  1547     plwTerminate(&writer); |  | 
|  1548     plwDestroy(&writer); |  | 
|  1549   } |  | 
|  1550  |  | 
|  1551   plrDestroy(&left); |  | 
|  1552   plrDestroy(&right); |  | 
|  1553   return rc; |  | 
|  1554 } |  | 
|  1555  |  | 
|  1556 /* |  | 
|  1557 ** Compare the values pointed to by the PLReaders passed as arguments.  |  | 
|  1558 ** Return -1 if the value pointed to by pLeft is considered less than |  | 
|  1559 ** the value pointed to by pRight, +1 if it is considered greater |  | 
|  1560 ** than it, or 0 if it is equal. i.e. |  | 
|  1561 ** |  | 
|  1562 **     (*pLeft - *pRight) |  | 
|  1563 ** |  | 
|  1564 ** A PLReader that is in the EOF condition is considered greater than |  | 
|  1565 ** any other. If neither argument is in EOF state, the return value of |  | 
|  1566 ** plrColumn() is used. If the plrColumn() values are equal, the |  | 
|  1567 ** comparison is on the basis of plrPosition(). |  | 
|  1568 */ |  | 
|  1569 static int plrCompare(PLReader *pLeft, PLReader *pRight){ |  | 
|  1570   assert(!plrAtEnd(pLeft) || !plrAtEnd(pRight)); |  | 
|  1571  |  | 
|  1572   if( plrAtEnd(pRight) || plrAtEnd(pLeft) ){ |  | 
|  1573     return (plrAtEnd(pRight) ? -1 : 1); |  | 
|  1574   } |  | 
|  1575   if( plrColumn(pLeft)!=plrColumn(pRight) ){ |  | 
|  1576     return ((plrColumn(pLeft)<plrColumn(pRight)) ? -1 : 1); |  | 
|  1577   } |  | 
|  1578   if( plrPosition(pLeft)!=plrPosition(pRight) ){ |  | 
|  1579     return ((plrPosition(pLeft)<plrPosition(pRight)) ? -1 : 1); |  | 
|  1580   } |  | 
|  1581   return 0; |  | 
|  1582 } |  | 
|  1583  |  | 
|  1584 /* We have two doclists with positions:  pLeft and pRight. Depending |  | 
|  1585 ** on the value of the nNear parameter, perform either a phrase |  | 
|  1586 ** intersection (if nNear==0) or a NEAR intersection (if nNear>0) |  | 
|  1587 ** and write the results into pOut. |  | 
|  1588 ** |  | 
|  1589 ** A phrase intersection means that two documents only match |  | 
|  1590 ** if pLeft.iPos+1==pRight.iPos. |  | 
|  1591 ** |  | 
|  1592 ** A NEAR intersection means that two documents only match if  |  | 
|  1593 ** (abs(pLeft.iPos-pRight.iPos)<nNear). |  | 
|  1594 ** |  | 
|  1595 ** If a NEAR intersection is requested, then the nPhrase argument should |  | 
|  1596 ** be passed the number of tokens in the two operands to the NEAR operator |  | 
|  1597 ** combined. For example: |  | 
|  1598 ** |  | 
|  1599 **       Query syntax               nPhrase |  | 
|  1600 **      ------------------------------------ |  | 
|  1601 **       "A B C" NEAR "D E"         5 |  | 
|  1602 **       A NEAR B                   2 |  | 
|  1603 ** |  | 
|  1604 ** iType controls the type of data written to pOut.  If iType is |  | 
|  1605 ** DL_POSITIONS, the positions are those from pRight. |  | 
|  1606 */ |  | 
|  1607 static int docListPhraseMerge( |  | 
|  1608   const char *pLeft, int nLeft, |  | 
|  1609   const char *pRight, int nRight, |  | 
|  1610   int nNear,            /* 0 for a phrase merge, non-zero for a NEAR merge */ |  | 
|  1611   int nPhrase,          /* Number of tokens in left+right operands to NEAR */ |  | 
|  1612   DocListType iType,    /* Type of doclist to write to pOut */ |  | 
|  1613   DataBuffer *pOut      /* Write the combined doclist here */ |  | 
|  1614 ){ |  | 
|  1615   DLReader left, right; |  | 
|  1616   DLWriter writer; |  | 
|  1617   int rc; |  | 
|  1618  |  | 
|  1619   /* These two buffers are used in the 'while', but are declared here |  | 
|  1620   ** to simplify error-handling. |  | 
|  1621   */ |  | 
|  1622   DataBuffer one = {0, 0, 0}; |  | 
|  1623   DataBuffer two = {0, 0, 0}; |  | 
|  1624  |  | 
|  1625   if( nLeft==0 || nRight==0 ) return SQLITE_OK; |  | 
|  1626  |  | 
|  1627   assert( iType!=DL_POSITIONS_OFFSETS ); |  | 
|  1628  |  | 
|  1629   rc = dlrInit(&left, DL_POSITIONS, pLeft, nLeft); |  | 
|  1630   if( rc!=SQLITE_OK ) return rc; |  | 
|  1631   rc = dlrInit(&right, DL_POSITIONS, pRight, nRight); |  | 
|  1632   if( rc!=SQLITE_OK ){ |  | 
|  1633     dlrDestroy(&left); |  | 
|  1634     return rc; |  | 
|  1635   } |  | 
|  1636   dlwInit(&writer, iType, pOut); |  | 
|  1637  |  | 
|  1638   while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |  | 
|  1639     if( dlrDocid(&left)<dlrDocid(&right) ){ |  | 
|  1640       rc = dlrStep(&left); |  | 
|  1641       if( rc!=SQLITE_OK ) goto err; |  | 
|  1642     }else if( dlrDocid(&right)<dlrDocid(&left) ){ |  | 
|  1643       rc = dlrStep(&right); |  | 
|  1644       if( rc!=SQLITE_OK ) goto err; |  | 
|  1645     }else{ |  | 
|  1646       if( nNear==0 ){ |  | 
|  1647         rc = posListPhraseMerge(&left, &right, 0, 0, &writer); |  | 
|  1648         if( rc!=SQLITE_OK ) goto err; |  | 
|  1649       }else{ |  | 
|  1650         /* This case occurs when two terms (simple terms or phrases) are |  | 
|  1651          * connected by a NEAR operator, span (nNear+1). i.e. |  | 
|  1652          * |  | 
|  1653          *     '"terrible company" NEAR widget' |  | 
|  1654          */ |  | 
|  1655         DLWriter dlwriter2; |  | 
|  1656         DLReader dr1 = {0, 0, 0, 0, 0}; |  | 
|  1657         DLReader dr2 = {0, 0, 0, 0, 0}; |  | 
|  1658  |  | 
|  1659         dlwInit(&dlwriter2, iType, &one); |  | 
|  1660         rc = posListPhraseMerge(&right, &left, nNear-3+nPhrase, 1, &dlwriter2); |  | 
|  1661         if( rc!=SQLITE_OK ) goto err; |  | 
|  1662         dlwInit(&dlwriter2, iType, &two); |  | 
|  1663         rc = posListPhraseMerge(&left, &right, nNear-1, 0, &dlwriter2); |  | 
|  1664         if( rc!=SQLITE_OK ) goto err; |  | 
|  1665  |  | 
|  1666         if( one.nData){ |  | 
|  1667           rc = dlrInit(&dr1, iType, one.pData, one.nData); |  | 
|  1668           if( rc!=SQLITE_OK ) goto err; |  | 
|  1669         } |  | 
|  1670         if( two.nData){ |  | 
|  1671           rc = dlrInit(&dr2, iType, two.pData, two.nData); |  | 
|  1672           if( rc!=SQLITE_OK ) goto err; |  | 
|  1673         } |  | 
|  1674  |  | 
|  1675         if( !dlrAtEnd(&dr1) || !dlrAtEnd(&dr2) ){ |  | 
|  1676           PLReader pr1 = {0}; |  | 
|  1677           PLReader pr2 = {0}; |  | 
|  1678  |  | 
|  1679           PLWriter plwriter; |  | 
|  1680           plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1)); |  | 
|  1681  |  | 
|  1682           if( one.nData ){ |  | 
|  1683             rc = plrInit(&pr1, &dr1); |  | 
|  1684             if( rc!=SQLITE_OK ) goto err; |  | 
|  1685           } |  | 
|  1686           if( two.nData ){ |  | 
|  1687             rc = plrInit(&pr2, &dr2); |  | 
|  1688             if( rc!=SQLITE_OK ) goto err; |  | 
|  1689           } |  | 
|  1690           while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){ |  | 
|  1691             int iCompare = plrCompare(&pr1, &pr2); |  | 
|  1692             switch( iCompare ){ |  | 
|  1693               case -1: |  | 
|  1694                 plwCopy(&plwriter, &pr1); |  | 
|  1695                 rc = plrStep(&pr1); |  | 
|  1696                 if( rc!=SQLITE_OK ) goto err; |  | 
|  1697                 break; |  | 
|  1698               case 1: |  | 
|  1699                 plwCopy(&plwriter, &pr2); |  | 
|  1700                 rc = plrStep(&pr2); |  | 
|  1701                 if( rc!=SQLITE_OK ) goto err; |  | 
|  1702                 break; |  | 
|  1703               case 0: |  | 
|  1704                 plwCopy(&plwriter, &pr1); |  | 
|  1705                 rc = plrStep(&pr1); |  | 
|  1706                 if( rc!=SQLITE_OK ) goto err; |  | 
|  1707                 rc = plrStep(&pr2); |  | 
|  1708                 if( rc!=SQLITE_OK ) goto err; |  | 
|  1709                 break; |  | 
|  1710             } |  | 
|  1711           } |  | 
|  1712           plwTerminate(&plwriter); |  | 
|  1713         } |  | 
|  1714         dataBufferReset(&one); |  | 
|  1715         dataBufferReset(&two); |  | 
|  1716       } |  | 
|  1717       rc = dlrStep(&left); |  | 
|  1718       if( rc!=SQLITE_OK ) goto err; |  | 
|  1719       rc = dlrStep(&right); |  | 
|  1720       if( rc!=SQLITE_OK ) goto err; |  | 
|  1721     } |  | 
|  1722   } |  | 
|  1723  |  | 
|  1724 err: |  | 
|  1725   dataBufferDestroy(&one); |  | 
|  1726   dataBufferDestroy(&two); |  | 
|  1727   dlrDestroy(&left); |  | 
|  1728   dlrDestroy(&right); |  | 
|  1729   dlwDestroy(&writer); |  | 
|  1730   return rc; |  | 
|  1731 } |  | 
|  1732  |  | 
|  1733 /* We have two DL_DOCIDS doclists:  pLeft and pRight. |  | 
|  1734 ** Write the intersection of these two doclists into pOut as a |  | 
|  1735 ** DL_DOCIDS doclist. |  | 
|  1736 */ |  | 
|  1737 static int docListAndMerge( |  | 
|  1738   const char *pLeft, int nLeft, |  | 
|  1739   const char *pRight, int nRight, |  | 
|  1740   DataBuffer *pOut      /* Write the combined doclist here */ |  | 
|  1741 ){ |  | 
|  1742   DLReader left, right; |  | 
|  1743   DLWriter writer; |  | 
|  1744   int rc; |  | 
|  1745  |  | 
|  1746   if( nLeft==0 || nRight==0 ) return SQLITE_OK; |  | 
|  1747  |  | 
|  1748   rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |  | 
|  1749   if( rc!=SQLITE_OK ) return rc; |  | 
|  1750   rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); |  | 
|  1751   if( rc!=SQLITE_OK ){ |  | 
|  1752     dlrDestroy(&left); |  | 
|  1753     return rc; |  | 
|  1754   } |  | 
|  1755   dlwInit(&writer, DL_DOCIDS, pOut); |  | 
|  1756  |  | 
|  1757   while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |  | 
|  1758     if( dlrDocid(&left)<dlrDocid(&right) ){ |  | 
|  1759       rc = dlrStep(&left); |  | 
|  1760       if( rc!=SQLITE_OK ) break; |  | 
|  1761     }else if( dlrDocid(&right)<dlrDocid(&left) ){ |  | 
|  1762       rc = dlrStep(&right); |  | 
|  1763       if( rc!=SQLITE_OK ) break; |  | 
|  1764     }else{ |  | 
|  1765       dlwAdd(&writer, dlrDocid(&left)); |  | 
|  1766       rc = dlrStep(&left); |  | 
|  1767       if( rc!=SQLITE_OK ) break; |  | 
|  1768       rc = dlrStep(&right); |  | 
|  1769       if( rc!=SQLITE_OK ) break; |  | 
|  1770     } |  | 
|  1771   } |  | 
|  1772  |  | 
|  1773   dlrDestroy(&left); |  | 
|  1774   dlrDestroy(&right); |  | 
|  1775   dlwDestroy(&writer); |  | 
|  1776   return rc; |  | 
|  1777 } |  | 
|  1778  |  | 
|  1779 /* We have two DL_DOCIDS doclists:  pLeft and pRight. |  | 
|  1780 ** Write the union of these two doclists into pOut as a |  | 
|  1781 ** DL_DOCIDS doclist. |  | 
|  1782 */ |  | 
|  1783 static int docListOrMerge( |  | 
|  1784   const char *pLeft, int nLeft, |  | 
|  1785   const char *pRight, int nRight, |  | 
|  1786   DataBuffer *pOut      /* Write the combined doclist here */ |  | 
|  1787 ){ |  | 
|  1788   DLReader left, right; |  | 
|  1789   DLWriter writer; |  | 
|  1790   int rc; |  | 
|  1791  |  | 
|  1792   if( nLeft==0 ){ |  | 
|  1793     if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); |  | 
|  1794     return SQLITE_OK; |  | 
|  1795   } |  | 
|  1796   if( nRight==0 ){ |  | 
|  1797     dataBufferAppend(pOut, pLeft, nLeft); |  | 
|  1798     return SQLITE_OK; |  | 
|  1799   } |  | 
|  1800  |  | 
|  1801   rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |  | 
|  1802   if( rc!=SQLITE_OK ) return rc; |  | 
|  1803   rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); |  | 
|  1804   if( rc!=SQLITE_OK ){ |  | 
|  1805     dlrDestroy(&left); |  | 
|  1806     return rc; |  | 
|  1807   } |  | 
|  1808   dlwInit(&writer, DL_DOCIDS, pOut); |  | 
|  1809  |  | 
|  1810   while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |  | 
|  1811     if( dlrAtEnd(&right) ){ |  | 
|  1812       dlwAdd(&writer, dlrDocid(&left)); |  | 
|  1813       rc = dlrStep(&left); |  | 
|  1814       if( rc!=SQLITE_OK ) break; |  | 
|  1815     }else if( dlrAtEnd(&left) ){ |  | 
|  1816       dlwAdd(&writer, dlrDocid(&right)); |  | 
|  1817       rc = dlrStep(&right); |  | 
|  1818       if( rc!=SQLITE_OK ) break; |  | 
|  1819     }else if( dlrDocid(&left)<dlrDocid(&right) ){ |  | 
|  1820       dlwAdd(&writer, dlrDocid(&left)); |  | 
|  1821       rc = dlrStep(&left); |  | 
|  1822       if( rc!=SQLITE_OK ) break; |  | 
|  1823     }else if( dlrDocid(&right)<dlrDocid(&left) ){ |  | 
|  1824       dlwAdd(&writer, dlrDocid(&right)); |  | 
|  1825       rc = dlrStep(&right); |  | 
|  1826       if( rc!=SQLITE_OK ) break; |  | 
|  1827     }else{ |  | 
|  1828       dlwAdd(&writer, dlrDocid(&left)); |  | 
|  1829       rc = dlrStep(&left); |  | 
|  1830       if( rc!=SQLITE_OK ) break; |  | 
|  1831       rc = dlrStep(&right); |  | 
|  1832       if( rc!=SQLITE_OK ) break; |  | 
|  1833     } |  | 
|  1834   } |  | 
|  1835  |  | 
|  1836   dlrDestroy(&left); |  | 
|  1837   dlrDestroy(&right); |  | 
|  1838   dlwDestroy(&writer); |  | 
|  1839   return rc; |  | 
|  1840 } |  | 
|  1841  |  | 
|  1842 /* We have two DL_DOCIDS doclists:  pLeft and pRight. |  | 
|  1843 ** Write into pOut as DL_DOCIDS doclist containing all documents that |  | 
|  1844 ** occur in pLeft but not in pRight. |  | 
|  1845 */ |  | 
|  1846 static int docListExceptMerge( |  | 
|  1847   const char *pLeft, int nLeft, |  | 
|  1848   const char *pRight, int nRight, |  | 
|  1849   DataBuffer *pOut      /* Write the combined doclist here */ |  | 
|  1850 ){ |  | 
|  1851   DLReader left, right; |  | 
|  1852   DLWriter writer; |  | 
|  1853   int rc; |  | 
|  1854  |  | 
|  1855   if( nLeft==0 ) return SQLITE_OK; |  | 
|  1856   if( nRight==0 ){ |  | 
|  1857     dataBufferAppend(pOut, pLeft, nLeft); |  | 
|  1858     return SQLITE_OK; |  | 
|  1859   } |  | 
|  1860  |  | 
|  1861   rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |  | 
|  1862   if( rc!=SQLITE_OK ) return rc; |  | 
|  1863   rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); |  | 
|  1864   if( rc!=SQLITE_OK ){ |  | 
|  1865     dlrDestroy(&left); |  | 
|  1866     return rc; |  | 
|  1867   } |  | 
|  1868   dlwInit(&writer, DL_DOCIDS, pOut); |  | 
|  1869  |  | 
|  1870   while( !dlrAtEnd(&left) ){ |  | 
|  1871     while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ |  | 
|  1872       rc = dlrStep(&right); |  | 
|  1873       if( rc!=SQLITE_OK ) goto err; |  | 
|  1874     } |  | 
|  1875     if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ |  | 
|  1876       dlwAdd(&writer, dlrDocid(&left)); |  | 
|  1877     } |  | 
|  1878     rc = dlrStep(&left); |  | 
|  1879     if( rc!=SQLITE_OK ) break; |  | 
|  1880   } |  | 
|  1881  |  | 
|  1882 err: |  | 
|  1883   dlrDestroy(&left); |  | 
|  1884   dlrDestroy(&right); |  | 
|  1885   dlwDestroy(&writer); |  | 
|  1886   return rc; |  | 
|  1887 } |  | 
|  1888  |  | 
|  1889 static char *string_dup_n(const char *s, int n){ |  | 
|  1890   char *str = sqlite3_malloc(n + 1); |  | 
|  1891   memcpy(str, s, n); |  | 
|  1892   str[n] = '\0'; |  | 
|  1893   return str; |  | 
|  1894 } |  | 
|  1895  |  | 
|  1896 /* Duplicate a string; the caller must free() the returned string. |  | 
|  1897  * (We don't use strdup() since it is not part of the standard C library and |  | 
|  1898  * may not be available everywhere.) */ |  | 
|  1899 static char *string_dup(const char *s){ |  | 
|  1900   return string_dup_n(s, strlen(s)); |  | 
|  1901 } |  | 
|  1902  |  | 
|  1903 /* Format a string, replacing each occurrence of the % character with |  | 
|  1904  * zDb.zName.  This may be more convenient than sqlite_mprintf() |  | 
|  1905  * when one string is used repeatedly in a format string. |  | 
|  1906  * The caller must free() the returned string. */ |  | 
|  1907 static char *string_format(const char *zFormat, |  | 
|  1908                            const char *zDb, const char *zName){ |  | 
|  1909   const char *p; |  | 
|  1910   size_t len = 0; |  | 
|  1911   size_t nDb = strlen(zDb); |  | 
|  1912   size_t nName = strlen(zName); |  | 
|  1913   size_t nFullTableName = nDb+1+nName; |  | 
|  1914   char *result; |  | 
|  1915   char *r; |  | 
|  1916  |  | 
|  1917   /* first compute length needed */ |  | 
|  1918   for(p = zFormat ; *p ; ++p){ |  | 
|  1919     len += (*p=='%' ? nFullTableName : 1); |  | 
|  1920   } |  | 
|  1921   len += 1;  /* for null terminator */ |  | 
|  1922  |  | 
|  1923   r = result = sqlite3_malloc(len); |  | 
|  1924   for(p = zFormat; *p; ++p){ |  | 
|  1925     if( *p=='%' ){ |  | 
|  1926       memcpy(r, zDb, nDb); |  | 
|  1927       r += nDb; |  | 
|  1928       *r++ = '.'; |  | 
|  1929       memcpy(r, zName, nName); |  | 
|  1930       r += nName; |  | 
|  1931     } else { |  | 
|  1932       *r++ = *p; |  | 
|  1933     } |  | 
|  1934   } |  | 
|  1935   *r++ = '\0'; |  | 
|  1936   assert( r == result + len ); |  | 
|  1937   return result; |  | 
|  1938 } |  | 
|  1939  |  | 
|  1940 static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, |  | 
|  1941                     const char *zFormat){ |  | 
|  1942   char *zCommand = string_format(zFormat, zDb, zName); |  | 
|  1943   int rc; |  | 
|  1944   FTSTRACE(("FTS3 sql: %s\n", zCommand)); |  | 
|  1945   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); |  | 
|  1946   sqlite3_free(zCommand); |  | 
|  1947   return rc; |  | 
|  1948 } |  | 
|  1949  |  | 
|  1950 static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, |  | 
|  1951                        sqlite3_stmt **ppStmt, const char *zFormat){ |  | 
|  1952   char *zCommand = string_format(zFormat, zDb, zName); |  | 
|  1953   int rc; |  | 
|  1954   FTSTRACE(("FTS3 prepare: %s\n", zCommand)); |  | 
|  1955   rc = sqlite3_prepare_v2(db, zCommand, -1, ppStmt, NULL); |  | 
|  1956   sqlite3_free(zCommand); |  | 
|  1957   return rc; |  | 
|  1958 } |  | 
|  1959  |  | 
|  1960 /* end utility functions */ |  | 
|  1961  |  | 
|  1962 /* Forward reference */ |  | 
|  1963 typedef struct fulltext_vtab fulltext_vtab; |  | 
|  1964  |  | 
|  1965 /* |  | 
|  1966 ** An instance of the following structure keeps track of generated |  | 
|  1967 ** matching-word offset information and snippets. |  | 
|  1968 */ |  | 
|  1969 typedef struct Snippet { |  | 
|  1970   int nMatch;     /* Total number of matches */ |  | 
|  1971   int nAlloc;     /* Space allocated for aMatch[] */ |  | 
|  1972   struct snippetMatch { /* One entry for each matching term */ |  | 
|  1973     char snStatus;       /* Status flag for use while constructing snippets */ |  | 
|  1974     short int iCol;      /* The column that contains the match */ |  | 
|  1975     short int iTerm;     /* The index in Query.pTerms[] of the matching term */ |  | 
|  1976     int iToken;          /* The index of the matching document token */ |  | 
|  1977     short int nByte;     /* Number of bytes in the term */ |  | 
|  1978     int iStart;          /* The offset to the first character of the term */ |  | 
|  1979   } *aMatch;      /* Points to space obtained from malloc */ |  | 
|  1980   char *zOffset;  /* Text rendering of aMatch[] */ |  | 
|  1981   int nOffset;    /* strlen(zOffset) */ |  | 
|  1982   char *zSnippet; /* Snippet text */ |  | 
|  1983   int nSnippet;   /* strlen(zSnippet) */ |  | 
|  1984 } Snippet; |  | 
|  1985  |  | 
|  1986  |  | 
|  1987 typedef enum QueryType { |  | 
|  1988   QUERY_GENERIC,   /* table scan */ |  | 
|  1989   QUERY_DOCID,     /* lookup by docid */ |  | 
|  1990   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ |  | 
|  1991 } QueryType; |  | 
|  1992  |  | 
|  1993 typedef enum fulltext_statement { |  | 
|  1994   CONTENT_INSERT_STMT, |  | 
|  1995   CONTENT_SELECT_STMT, |  | 
|  1996   CONTENT_UPDATE_STMT, |  | 
|  1997   CONTENT_DELETE_STMT, |  | 
|  1998   CONTENT_EXISTS_STMT, |  | 
|  1999  |  | 
|  2000   BLOCK_INSERT_STMT, |  | 
|  2001   BLOCK_SELECT_STMT, |  | 
|  2002   BLOCK_DELETE_STMT, |  | 
|  2003   BLOCK_DELETE_ALL_STMT, |  | 
|  2004  |  | 
|  2005   SEGDIR_MAX_INDEX_STMT, |  | 
|  2006   SEGDIR_SET_STMT, |  | 
|  2007   SEGDIR_SELECT_LEVEL_STMT, |  | 
|  2008   SEGDIR_SPAN_STMT, |  | 
|  2009   SEGDIR_DELETE_STMT, |  | 
|  2010   SEGDIR_SELECT_SEGMENT_STMT, |  | 
|  2011   SEGDIR_SELECT_ALL_STMT, |  | 
|  2012   SEGDIR_DELETE_ALL_STMT, |  | 
|  2013   SEGDIR_COUNT_STMT, |  | 
|  2014  |  | 
|  2015   MAX_STMT                     /* Always at end! */ |  | 
|  2016 } fulltext_statement; |  | 
|  2017  |  | 
|  2018 /* These must exactly match the enum above. */ |  | 
|  2019 /* TODO(shess): Is there some risk that a statement will be used in two |  | 
|  2020 ** cursors at once, e.g.  if a query joins a virtual table to itself? |  | 
|  2021 ** If so perhaps we should move some of these to the cursor object. |  | 
|  2022 */ |  | 
|  2023 static const char *const fulltext_zStatement[MAX_STMT] = { |  | 
|  2024   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */ |  | 
|  2025   /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */ |  | 
|  2026   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */ |  | 
|  2027   /* CONTENT_DELETE */ "delete from %_content where docid = ?", |  | 
|  2028   /* CONTENT_EXISTS */ "select docid from %_content limit 1", |  | 
|  2029  |  | 
|  2030   /* BLOCK_INSERT */ |  | 
|  2031   "insert into %_segments (blockid, block) values (null, ?)", |  | 
|  2032   /* BLOCK_SELECT */ "select block from %_segments where blockid = ?", |  | 
|  2033   /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?", |  | 
|  2034   /* BLOCK_DELETE_ALL */ "delete from %_segments", |  | 
|  2035  |  | 
|  2036   /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", |  | 
|  2037   /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", |  | 
|  2038   /* SEGDIR_SELECT_LEVEL */ |  | 
|  2039   "select start_block, leaves_end_block, root, idx from %_segdir " |  | 
|  2040   " where level = ? order by idx", |  | 
|  2041   /* SEGDIR_SPAN */ |  | 
|  2042   "select min(start_block), max(end_block) from %_segdir " |  | 
|  2043   " where level = ? and start_block <> 0", |  | 
|  2044   /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", |  | 
|  2045  |  | 
|  2046   /* NOTE(shess): The first three results of the following two |  | 
|  2047   ** statements must match. |  | 
|  2048   */ |  | 
|  2049   /* SEGDIR_SELECT_SEGMENT */ |  | 
|  2050   "select start_block, leaves_end_block, root from %_segdir " |  | 
|  2051   " where level = ? and idx = ?", |  | 
|  2052   /* SEGDIR_SELECT_ALL */ |  | 
|  2053   "select start_block, leaves_end_block, root from %_segdir " |  | 
|  2054   " order by level desc, idx asc", |  | 
|  2055   /* SEGDIR_DELETE_ALL */ "delete from %_segdir", |  | 
|  2056   /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir", |  | 
|  2057 }; |  | 
|  2058  |  | 
|  2059 /* |  | 
|  2060 ** A connection to a fulltext index is an instance of the following |  | 
|  2061 ** structure.  The xCreate and xConnect methods create an instance |  | 
|  2062 ** of this structure and xDestroy and xDisconnect free that instance. |  | 
|  2063 ** All other methods receive a pointer to the structure as one of their |  | 
|  2064 ** arguments. |  | 
|  2065 */ |  | 
|  2066 struct fulltext_vtab { |  | 
|  2067   sqlite3_vtab base;               /* Base class used by SQLite core */ |  | 
|  2068   sqlite3 *db;                     /* The database connection */ |  | 
|  2069   const char *zDb;                 /* logical database name */ |  | 
|  2070   const char *zName;               /* virtual table name */ |  | 
|  2071   int nColumn;                     /* number of columns in virtual table */ |  | 
|  2072   char **azColumn;                 /* column names.  malloced */ |  | 
|  2073   char **azContentColumn;          /* column names in content table; malloced */ |  | 
|  2074   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */ |  | 
|  2075  |  | 
|  2076   /* Precompiled statements which we keep as long as the table is |  | 
|  2077   ** open. |  | 
|  2078   */ |  | 
|  2079   sqlite3_stmt *pFulltextStatements[MAX_STMT]; |  | 
|  2080  |  | 
|  2081   /* Precompiled statements used for segment merges.  We run a |  | 
|  2082   ** separate select across the leaf level of each tree being merged. |  | 
|  2083   */ |  | 
|  2084   sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT]; |  | 
|  2085   /* The statement used to prepare pLeafSelectStmts. */ |  | 
|  2086 #define LEAF_SELECT \ |  | 
|  2087   "select block from %_segments where blockid between ? and ? order by blockid" |  | 
|  2088  |  | 
|  2089   /* These buffer pending index updates during transactions. |  | 
|  2090   ** nPendingData estimates the memory size of the pending data.  It |  | 
|  2091   ** doesn't include the hash-bucket overhead, nor any malloc |  | 
|  2092   ** overhead.  When nPendingData exceeds kPendingThreshold, the |  | 
|  2093   ** buffer is flushed even before the transaction closes. |  | 
|  2094   ** pendingTerms stores the data, and is only valid when nPendingData |  | 
|  2095   ** is >=0 (nPendingData<0 means pendingTerms has not been |  | 
|  2096   ** initialized).  iPrevDocid is the last docid written, used to make |  | 
|  2097   ** certain we're inserting in sorted order. |  | 
|  2098   */ |  | 
|  2099   int nPendingData; |  | 
|  2100 #define kPendingThreshold (1*1024*1024) |  | 
|  2101   sqlite_int64 iPrevDocid; |  | 
|  2102   fts3Hash pendingTerms; |  | 
|  2103 }; |  | 
|  2104  |  | 
|  2105 /* |  | 
|  2106 ** When the core wants to do a query, it create a cursor using a |  | 
|  2107 ** call to xOpen.  This structure is an instance of a cursor.  It |  | 
|  2108 ** is destroyed by xClose. |  | 
|  2109 */ |  | 
|  2110 typedef struct fulltext_cursor { |  | 
|  2111   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */ |  | 
|  2112   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */ |  | 
|  2113   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */ |  | 
|  2114   int eof;                         /* True if at End Of Results */ |  | 
|  2115   Fts3Expr *pExpr;                 /* Parsed MATCH query string */ |  | 
|  2116   Snippet snippet;                 /* Cached snippet for the current row */ |  | 
|  2117   int iColumn;                     /* Column being searched */ |  | 
|  2118   DataBuffer result;               /* Doclist results from fulltextQuery */ |  | 
|  2119   DLReader reader;                 /* Result reader if result not empty */ |  | 
|  2120 } fulltext_cursor; |  | 
|  2121  |  | 
|  2122 static fulltext_vtab *cursor_vtab(fulltext_cursor *c){ |  | 
|  2123   return (fulltext_vtab *) c->base.pVtab; |  | 
|  2124 } |  | 
|  2125  |  | 
|  2126 static const sqlite3_module fts3Module;   /* forward declaration */ |  | 
|  2127  |  | 
|  2128 /* Return a dynamically generated statement of the form |  | 
|  2129  *   insert into %_content (docid, ...) values (?, ...) |  | 
|  2130  */ |  | 
|  2131 static const char *contentInsertStatement(fulltext_vtab *v){ |  | 
|  2132   StringBuffer sb; |  | 
|  2133   int i; |  | 
|  2134  |  | 
|  2135   initStringBuffer(&sb); |  | 
|  2136   append(&sb, "insert into %_content (docid, "); |  | 
|  2137   appendList(&sb, v->nColumn, v->azContentColumn); |  | 
|  2138   append(&sb, ") values (?"); |  | 
|  2139   for(i=0; i<v->nColumn; ++i) |  | 
|  2140     append(&sb, ", ?"); |  | 
|  2141   append(&sb, ")"); |  | 
|  2142   return stringBufferData(&sb); |  | 
|  2143 } |  | 
|  2144  |  | 
|  2145 /* Return a dynamically generated statement of the form |  | 
|  2146  *   select <content columns> from %_content where docid = ? |  | 
|  2147  */ |  | 
|  2148 static const char *contentSelectStatement(fulltext_vtab *v){ |  | 
|  2149   StringBuffer sb; |  | 
|  2150   initStringBuffer(&sb); |  | 
|  2151   append(&sb, "SELECT "); |  | 
|  2152   appendList(&sb, v->nColumn, v->azContentColumn); |  | 
|  2153   append(&sb, " FROM %_content WHERE docid = ?"); |  | 
|  2154   return stringBufferData(&sb); |  | 
|  2155 } |  | 
|  2156  |  | 
|  2157 /* Return a dynamically generated statement of the form |  | 
|  2158  *   update %_content set [col_0] = ?, [col_1] = ?, ... |  | 
|  2159  *                    where docid = ? |  | 
|  2160  */ |  | 
|  2161 static const char *contentUpdateStatement(fulltext_vtab *v){ |  | 
|  2162   StringBuffer sb; |  | 
|  2163   int i; |  | 
|  2164  |  | 
|  2165   initStringBuffer(&sb); |  | 
|  2166   append(&sb, "update %_content set "); |  | 
|  2167   for(i=0; i<v->nColumn; ++i) { |  | 
|  2168     if( i>0 ){ |  | 
|  2169       append(&sb, ", "); |  | 
|  2170     } |  | 
|  2171     append(&sb, v->azContentColumn[i]); |  | 
|  2172     append(&sb, " = ?"); |  | 
|  2173   } |  | 
|  2174   append(&sb, " where docid = ?"); |  | 
|  2175   return stringBufferData(&sb); |  | 
|  2176 } |  | 
|  2177  |  | 
|  2178 /* Puts a freshly-prepared statement determined by iStmt in *ppStmt. |  | 
|  2179 ** If the indicated statement has never been prepared, it is prepared |  | 
|  2180 ** and cached, otherwise the cached version is reset. |  | 
|  2181 */ |  | 
|  2182 static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, |  | 
|  2183                              sqlite3_stmt **ppStmt){ |  | 
|  2184   assert( iStmt<MAX_STMT ); |  | 
|  2185   if( v->pFulltextStatements[iStmt]==NULL ){ |  | 
|  2186     const char *zStmt; |  | 
|  2187     int rc; |  | 
|  2188     switch( iStmt ){ |  | 
|  2189       case CONTENT_INSERT_STMT: |  | 
|  2190         zStmt = contentInsertStatement(v); break; |  | 
|  2191       case CONTENT_SELECT_STMT: |  | 
|  2192         zStmt = contentSelectStatement(v); break; |  | 
|  2193       case CONTENT_UPDATE_STMT: |  | 
|  2194         zStmt = contentUpdateStatement(v); break; |  | 
|  2195       default: |  | 
|  2196         zStmt = fulltext_zStatement[iStmt]; |  | 
|  2197     } |  | 
|  2198     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], |  | 
|  2199                          zStmt); |  | 
|  2200     if( zStmt != fulltext_zStatement[iStmt]) sqlite3_free((void *) zStmt); |  | 
|  2201     if( rc!=SQLITE_OK ) return rc; |  | 
|  2202   } else { |  | 
|  2203     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); |  | 
|  2204     if( rc!=SQLITE_OK ) return rc; |  | 
|  2205   } |  | 
|  2206  |  | 
|  2207   *ppStmt = v->pFulltextStatements[iStmt]; |  | 
|  2208   return SQLITE_OK; |  | 
|  2209 } |  | 
|  2210  |  | 
|  2211 /* Like sqlite3_step(), but convert SQLITE_DONE to SQLITE_OK and |  | 
|  2212 ** SQLITE_ROW to SQLITE_ERROR.  Useful for statements like UPDATE, |  | 
|  2213 ** where we expect no results. |  | 
|  2214 */ |  | 
|  2215 static int sql_single_step(sqlite3_stmt *s){ |  | 
|  2216   int rc = sqlite3_step(s); |  | 
|  2217   return (rc==SQLITE_DONE) ? SQLITE_OK : rc; |  | 
|  2218 } |  | 
|  2219  |  | 
|  2220 /* Like sql_get_statement(), but for special replicated LEAF_SELECT |  | 
|  2221 ** statements.  idx -1 is a special case for an uncached version of |  | 
|  2222 ** the statement (used in the optimize implementation). |  | 
|  2223 */ |  | 
|  2224 /* TODO(shess) Write version for generic statements and then share |  | 
|  2225 ** that between the cached-statement functions. |  | 
|  2226 */ |  | 
|  2227 static int sql_get_leaf_statement(fulltext_vtab *v, int idx, |  | 
|  2228                                   sqlite3_stmt **ppStmt){ |  | 
|  2229   assert( idx>=-1 && idx<MERGE_COUNT ); |  | 
|  2230   if( idx==-1 ){ |  | 
|  2231     return sql_prepare(v->db, v->zDb, v->zName, ppStmt, LEAF_SELECT); |  | 
|  2232   }else if( v->pLeafSelectStmts[idx]==NULL ){ |  | 
|  2233     int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx], |  | 
|  2234                          LEAF_SELECT); |  | 
|  2235     if( rc!=SQLITE_OK ) return rc; |  | 
|  2236   }else{ |  | 
|  2237     int rc = sqlite3_reset(v->pLeafSelectStmts[idx]); |  | 
|  2238     if( rc!=SQLITE_OK ) return rc; |  | 
|  2239   } |  | 
|  2240  |  | 
|  2241   *ppStmt = v->pLeafSelectStmts[idx]; |  | 
|  2242   return SQLITE_OK; |  | 
|  2243 } |  | 
|  2244  |  | 
|  2245 /* insert into %_content (docid, ...) values ([docid], [pValues]) |  | 
|  2246 ** If the docid contains SQL NULL, then a unique docid will be |  | 
|  2247 ** generated. |  | 
|  2248 */ |  | 
|  2249 static int content_insert(fulltext_vtab *v, sqlite3_value *docid, |  | 
|  2250                           sqlite3_value **pValues){ |  | 
|  2251   sqlite3_stmt *s; |  | 
|  2252   int i; |  | 
|  2253   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); |  | 
|  2254   if( rc!=SQLITE_OK ) return rc; |  | 
|  2255  |  | 
|  2256   rc = sqlite3_bind_value(s, 1, docid); |  | 
|  2257   if( rc!=SQLITE_OK ) return rc; |  | 
|  2258  |  | 
|  2259   for(i=0; i<v->nColumn; ++i){ |  | 
|  2260     rc = sqlite3_bind_value(s, 2+i, pValues[i]); |  | 
|  2261     if( rc!=SQLITE_OK ) return rc; |  | 
|  2262   } |  | 
|  2263  |  | 
|  2264   return sql_single_step(s); |  | 
|  2265 } |  | 
|  2266  |  | 
|  2267 /* update %_content set col0 = pValues[0], col1 = pValues[1], ... |  | 
|  2268  *                  where docid = [iDocid] */ |  | 
|  2269 static int content_update(fulltext_vtab *v, sqlite3_value **pValues, |  | 
|  2270                           sqlite_int64 iDocid){ |  | 
|  2271   sqlite3_stmt *s; |  | 
|  2272   int i; |  | 
|  2273   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); |  | 
|  2274   if( rc!=SQLITE_OK ) return rc; |  | 
|  2275  |  | 
|  2276   for(i=0; i<v->nColumn; ++i){ |  | 
|  2277     rc = sqlite3_bind_value(s, 1+i, pValues[i]); |  | 
|  2278     if( rc!=SQLITE_OK ) return rc; |  | 
|  2279   } |  | 
|  2280  |  | 
|  2281   rc = sqlite3_bind_int64(s, 1+v->nColumn, iDocid); |  | 
|  2282   if( rc!=SQLITE_OK ) return rc; |  | 
|  2283  |  | 
|  2284   return sql_single_step(s); |  | 
|  2285 } |  | 
|  2286  |  | 
|  2287 static void freeStringArray(int nString, const char **pString){ |  | 
|  2288   int i; |  | 
|  2289  |  | 
|  2290   for (i=0 ; i < nString ; ++i) { |  | 
|  2291     if( pString[i]!=NULL ) sqlite3_free((void *) pString[i]); |  | 
|  2292   } |  | 
|  2293   sqlite3_free((void *) pString); |  | 
|  2294 } |  | 
|  2295  |  | 
|  2296 /* select * from %_content where docid = [iDocid] |  | 
|  2297  * The caller must delete the returned array and all strings in it. |  | 
|  2298  * null fields will be NULL in the returned array. |  | 
|  2299  * |  | 
|  2300  * TODO: Perhaps we should return pointer/length strings here for consistency |  | 
|  2301  * with other code which uses pointer/length. */ |  | 
|  2302 static int content_select(fulltext_vtab *v, sqlite_int64 iDocid, |  | 
|  2303                           const char ***pValues){ |  | 
|  2304   sqlite3_stmt *s; |  | 
|  2305   const char **values; |  | 
|  2306   int i; |  | 
|  2307   int rc; |  | 
|  2308  |  | 
|  2309   *pValues = NULL; |  | 
|  2310  |  | 
|  2311   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); |  | 
|  2312   if( rc!=SQLITE_OK ) return rc; |  | 
|  2313  |  | 
|  2314   rc = sqlite3_bind_int64(s, 1, iDocid); |  | 
|  2315   if( rc!=SQLITE_OK ) return rc; |  | 
|  2316  |  | 
|  2317   rc = sqlite3_step(s); |  | 
|  2318   if( rc!=SQLITE_ROW ) return rc; |  | 
|  2319  |  | 
|  2320   values = (const char **) sqlite3_malloc(v->nColumn * sizeof(const char *)); |  | 
|  2321   for(i=0; i<v->nColumn; ++i){ |  | 
|  2322     if( sqlite3_column_type(s, i)==SQLITE_NULL ){ |  | 
|  2323       values[i] = NULL; |  | 
|  2324     }else{ |  | 
|  2325       values[i] = string_dup((char*)sqlite3_column_text(s, i)); |  | 
|  2326     } |  | 
|  2327   } |  | 
|  2328  |  | 
|  2329   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2330    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2331   rc = sqlite3_step(s); |  | 
|  2332   if( rc==SQLITE_DONE ){ |  | 
|  2333     *pValues = values; |  | 
|  2334     return SQLITE_OK; |  | 
|  2335   } |  | 
|  2336  |  | 
|  2337   freeStringArray(v->nColumn, values); |  | 
|  2338   return rc; |  | 
|  2339 } |  | 
|  2340  |  | 
|  2341 /* delete from %_content where docid = [iDocid ] */ |  | 
|  2342 static int content_delete(fulltext_vtab *v, sqlite_int64 iDocid){ |  | 
|  2343   sqlite3_stmt *s; |  | 
|  2344   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); |  | 
|  2345   if( rc!=SQLITE_OK ) return rc; |  | 
|  2346  |  | 
|  2347   rc = sqlite3_bind_int64(s, 1, iDocid); |  | 
|  2348   if( rc!=SQLITE_OK ) return rc; |  | 
|  2349  |  | 
|  2350   return sql_single_step(s); |  | 
|  2351 } |  | 
|  2352  |  | 
|  2353 /* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if |  | 
|  2354 ** no rows exist, and any error in case of failure. |  | 
|  2355 */ |  | 
|  2356 static int content_exists(fulltext_vtab *v){ |  | 
|  2357   sqlite3_stmt *s; |  | 
|  2358   int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s); |  | 
|  2359   if( rc!=SQLITE_OK ) return rc; |  | 
|  2360  |  | 
|  2361   rc = sqlite3_step(s); |  | 
|  2362   if( rc!=SQLITE_ROW ) return rc; |  | 
|  2363  |  | 
|  2364   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2365    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2366   rc = sqlite3_step(s); |  | 
|  2367   if( rc==SQLITE_DONE ) return SQLITE_ROW; |  | 
|  2368   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2369   return rc; |  | 
|  2370 } |  | 
|  2371  |  | 
|  2372 /* insert into %_segments values ([pData]) |  | 
|  2373 **   returns assigned blockid in *piBlockid |  | 
|  2374 */ |  | 
|  2375 static int block_insert(fulltext_vtab *v, const char *pData, int nData, |  | 
|  2376                         sqlite_int64 *piBlockid){ |  | 
|  2377   sqlite3_stmt *s; |  | 
|  2378   int rc = sql_get_statement(v, BLOCK_INSERT_STMT, &s); |  | 
|  2379   if( rc!=SQLITE_OK ) return rc; |  | 
|  2380  |  | 
|  2381   rc = sqlite3_bind_blob(s, 1, pData, nData, SQLITE_STATIC); |  | 
|  2382   if( rc!=SQLITE_OK ) return rc; |  | 
|  2383  |  | 
|  2384   rc = sqlite3_step(s); |  | 
|  2385   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2386   if( rc!=SQLITE_DONE ) return rc; |  | 
|  2387  |  | 
|  2388   /* blockid column is an alias for rowid. */ |  | 
|  2389   *piBlockid = sqlite3_last_insert_rowid(v->db); |  | 
|  2390   return SQLITE_OK; |  | 
|  2391 } |  | 
|  2392  |  | 
|  2393 /* delete from %_segments |  | 
|  2394 **   where blockid between [iStartBlockid] and [iEndBlockid] |  | 
|  2395 ** |  | 
|  2396 ** Deletes the range of blocks, inclusive, used to delete the blocks |  | 
|  2397 ** which form a segment. |  | 
|  2398 */ |  | 
|  2399 static int block_delete(fulltext_vtab *v, |  | 
|  2400                         sqlite_int64 iStartBlockid, sqlite_int64 iEndBlockid){ |  | 
|  2401   sqlite3_stmt *s; |  | 
|  2402   int rc = sql_get_statement(v, BLOCK_DELETE_STMT, &s); |  | 
|  2403   if( rc!=SQLITE_OK ) return rc; |  | 
|  2404  |  | 
|  2405   rc = sqlite3_bind_int64(s, 1, iStartBlockid); |  | 
|  2406   if( rc!=SQLITE_OK ) return rc; |  | 
|  2407  |  | 
|  2408   rc = sqlite3_bind_int64(s, 2, iEndBlockid); |  | 
|  2409   if( rc!=SQLITE_OK ) return rc; |  | 
|  2410  |  | 
|  2411   return sql_single_step(s); |  | 
|  2412 } |  | 
|  2413  |  | 
|  2414 /* Returns SQLITE_ROW with *pidx set to the maximum segment idx found |  | 
|  2415 ** at iLevel.  Returns SQLITE_DONE if there are no segments at |  | 
|  2416 ** iLevel.  Otherwise returns an error. |  | 
|  2417 */ |  | 
|  2418 static int segdir_max_index(fulltext_vtab *v, int iLevel, int *pidx){ |  | 
|  2419   sqlite3_stmt *s; |  | 
|  2420   int rc = sql_get_statement(v, SEGDIR_MAX_INDEX_STMT, &s); |  | 
|  2421   if( rc!=SQLITE_OK ) return rc; |  | 
|  2422  |  | 
|  2423   rc = sqlite3_bind_int(s, 1, iLevel); |  | 
|  2424   if( rc!=SQLITE_OK ) return rc; |  | 
|  2425  |  | 
|  2426   rc = sqlite3_step(s); |  | 
|  2427   /* Should always get at least one row due to how max() works. */ |  | 
|  2428   if( rc==SQLITE_DONE ) return SQLITE_DONE; |  | 
|  2429   if( rc!=SQLITE_ROW ) return rc; |  | 
|  2430  |  | 
|  2431   /* NULL means that there were no inputs to max(). */ |  | 
|  2432   if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ |  | 
|  2433     rc = sqlite3_step(s); |  | 
|  2434     if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2435     return rc; |  | 
|  2436   } |  | 
|  2437  |  | 
|  2438   *pidx = sqlite3_column_int(s, 0); |  | 
|  2439  |  | 
|  2440   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2441    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2442   rc = sqlite3_step(s); |  | 
|  2443   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2444   if( rc!=SQLITE_DONE ) return rc; |  | 
|  2445   return SQLITE_ROW; |  | 
|  2446 } |  | 
|  2447  |  | 
|  2448 /* insert into %_segdir values ( |  | 
|  2449 **   [iLevel], [idx], |  | 
|  2450 **   [iStartBlockid], [iLeavesEndBlockid], [iEndBlockid], |  | 
|  2451 **   [pRootData] |  | 
|  2452 ** ) |  | 
|  2453 */ |  | 
|  2454 static int segdir_set(fulltext_vtab *v, int iLevel, int idx, |  | 
|  2455                       sqlite_int64 iStartBlockid, |  | 
|  2456                       sqlite_int64 iLeavesEndBlockid, |  | 
|  2457                       sqlite_int64 iEndBlockid, |  | 
|  2458                       const char *pRootData, int nRootData){ |  | 
|  2459   sqlite3_stmt *s; |  | 
|  2460   int rc = sql_get_statement(v, SEGDIR_SET_STMT, &s); |  | 
|  2461   if( rc!=SQLITE_OK ) return rc; |  | 
|  2462  |  | 
|  2463   rc = sqlite3_bind_int(s, 1, iLevel); |  | 
|  2464   if( rc!=SQLITE_OK ) return rc; |  | 
|  2465  |  | 
|  2466   rc = sqlite3_bind_int(s, 2, idx); |  | 
|  2467   if( rc!=SQLITE_OK ) return rc; |  | 
|  2468  |  | 
|  2469   rc = sqlite3_bind_int64(s, 3, iStartBlockid); |  | 
|  2470   if( rc!=SQLITE_OK ) return rc; |  | 
|  2471  |  | 
|  2472   rc = sqlite3_bind_int64(s, 4, iLeavesEndBlockid); |  | 
|  2473   if( rc!=SQLITE_OK ) return rc; |  | 
|  2474  |  | 
|  2475   rc = sqlite3_bind_int64(s, 5, iEndBlockid); |  | 
|  2476   if( rc!=SQLITE_OK ) return rc; |  | 
|  2477  |  | 
|  2478   rc = sqlite3_bind_blob(s, 6, pRootData, nRootData, SQLITE_STATIC); |  | 
|  2479   if( rc!=SQLITE_OK ) return rc; |  | 
|  2480  |  | 
|  2481   return sql_single_step(s); |  | 
|  2482 } |  | 
|  2483  |  | 
|  2484 /* Queries %_segdir for the block span of the segments in level |  | 
|  2485 ** iLevel.  Returns SQLITE_DONE if there are no blocks for iLevel, |  | 
|  2486 ** SQLITE_ROW if there are blocks, else an error. |  | 
|  2487 */ |  | 
|  2488 static int segdir_span(fulltext_vtab *v, int iLevel, |  | 
|  2489                        sqlite_int64 *piStartBlockid, |  | 
|  2490                        sqlite_int64 *piEndBlockid){ |  | 
|  2491   sqlite3_stmt *s; |  | 
|  2492   int rc = sql_get_statement(v, SEGDIR_SPAN_STMT, &s); |  | 
|  2493   if( rc!=SQLITE_OK ) return rc; |  | 
|  2494  |  | 
|  2495   rc = sqlite3_bind_int(s, 1, iLevel); |  | 
|  2496   if( rc!=SQLITE_OK ) return rc; |  | 
|  2497  |  | 
|  2498   rc = sqlite3_step(s); |  | 
|  2499   if( rc==SQLITE_DONE ) return SQLITE_DONE;  /* Should never happen */ |  | 
|  2500   if( rc!=SQLITE_ROW ) return rc; |  | 
|  2501  |  | 
|  2502   /* This happens if all segments at this level are entirely inline. */ |  | 
|  2503   if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ |  | 
|  2504     /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2505      * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2506     int rc2 = sqlite3_step(s); |  | 
|  2507     if( rc2==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2508     return rc2; |  | 
|  2509   } |  | 
|  2510  |  | 
|  2511   *piStartBlockid = sqlite3_column_int64(s, 0); |  | 
|  2512   *piEndBlockid = sqlite3_column_int64(s, 1); |  | 
|  2513  |  | 
|  2514   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2515    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2516   rc = sqlite3_step(s); |  | 
|  2517   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2518   if( rc!=SQLITE_DONE ) return rc; |  | 
|  2519   return SQLITE_ROW; |  | 
|  2520 } |  | 
|  2521  |  | 
|  2522 /* Delete the segment blocks and segment directory records for all |  | 
|  2523 ** segments at iLevel. |  | 
|  2524 */ |  | 
|  2525 static int segdir_delete(fulltext_vtab *v, int iLevel){ |  | 
|  2526   sqlite3_stmt *s; |  | 
|  2527   sqlite_int64 iStartBlockid, iEndBlockid; |  | 
|  2528   int rc = segdir_span(v, iLevel, &iStartBlockid, &iEndBlockid); |  | 
|  2529   if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; |  | 
|  2530  |  | 
|  2531   if( rc==SQLITE_ROW ){ |  | 
|  2532     rc = block_delete(v, iStartBlockid, iEndBlockid); |  | 
|  2533     if( rc!=SQLITE_OK ) return rc; |  | 
|  2534   } |  | 
|  2535  |  | 
|  2536   /* Delete the segment directory itself. */ |  | 
|  2537   rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s); |  | 
|  2538   if( rc!=SQLITE_OK ) return rc; |  | 
|  2539  |  | 
|  2540   rc = sqlite3_bind_int64(s, 1, iLevel); |  | 
|  2541   if( rc!=SQLITE_OK ) return rc; |  | 
|  2542  |  | 
|  2543   return sql_single_step(s); |  | 
|  2544 } |  | 
|  2545  |  | 
|  2546 /* Delete entire fts index, SQLITE_OK on success, relevant error on |  | 
|  2547 ** failure. |  | 
|  2548 */ |  | 
|  2549 static int segdir_delete_all(fulltext_vtab *v){ |  | 
|  2550   sqlite3_stmt *s; |  | 
|  2551   int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s); |  | 
|  2552   if( rc!=SQLITE_OK ) return rc; |  | 
|  2553  |  | 
|  2554   rc = sql_single_step(s); |  | 
|  2555   if( rc!=SQLITE_OK ) return rc; |  | 
|  2556  |  | 
|  2557   rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); |  | 
|  2558   if( rc!=SQLITE_OK ) return rc; |  | 
|  2559  |  | 
|  2560   return sql_single_step(s); |  | 
|  2561 } |  | 
|  2562  |  | 
|  2563 /* Returns SQLITE_OK with *pnSegments set to the number of entries in |  | 
|  2564 ** %_segdir and *piMaxLevel set to the highest level which has a |  | 
|  2565 ** segment.  Otherwise returns the SQLite error which caused failure. |  | 
|  2566 */ |  | 
|  2567 static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){ |  | 
|  2568   sqlite3_stmt *s; |  | 
|  2569   int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s); |  | 
|  2570   if( rc!=SQLITE_OK ) return rc; |  | 
|  2571  |  | 
|  2572   rc = sqlite3_step(s); |  | 
|  2573   /* TODO(shess): This case should not be possible?  Should stronger |  | 
|  2574   ** measures be taken if it happens? |  | 
|  2575   */ |  | 
|  2576   if( rc==SQLITE_DONE ){ |  | 
|  2577     *pnSegments = 0; |  | 
|  2578     *piMaxLevel = 0; |  | 
|  2579     return SQLITE_OK; |  | 
|  2580   } |  | 
|  2581   if( rc!=SQLITE_ROW ) return rc; |  | 
|  2582  |  | 
|  2583   *pnSegments = sqlite3_column_int(s, 0); |  | 
|  2584   *piMaxLevel = sqlite3_column_int(s, 1); |  | 
|  2585  |  | 
|  2586   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  2587    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  2588   rc = sqlite3_step(s); |  | 
|  2589   if( rc==SQLITE_DONE ) return SQLITE_OK; |  | 
|  2590   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  2591   return rc; |  | 
|  2592 } |  | 
|  2593  |  | 
|  2594 /* TODO(shess) clearPendingTerms() is far down the file because |  | 
|  2595 ** writeZeroSegment() is far down the file because LeafWriter is far |  | 
|  2596 ** down the file.  Consider refactoring the code to move the non-vtab |  | 
|  2597 ** code above the vtab code so that we don't need this forward |  | 
|  2598 ** reference. |  | 
|  2599 */ |  | 
|  2600 static int clearPendingTerms(fulltext_vtab *v); |  | 
|  2601  |  | 
|  2602 /* |  | 
|  2603 ** Free the memory used to contain a fulltext_vtab structure. |  | 
|  2604 */ |  | 
|  2605 static void fulltext_vtab_destroy(fulltext_vtab *v){ |  | 
|  2606   int iStmt, i; |  | 
|  2607  |  | 
|  2608   FTSTRACE(("FTS3 Destroy %p\n", v)); |  | 
|  2609   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){ |  | 
|  2610     if( v->pFulltextStatements[iStmt]!=NULL ){ |  | 
|  2611       sqlite3_finalize(v->pFulltextStatements[iStmt]); |  | 
|  2612       v->pFulltextStatements[iStmt] = NULL; |  | 
|  2613     } |  | 
|  2614   } |  | 
|  2615  |  | 
|  2616   for( i=0; i<MERGE_COUNT; i++ ){ |  | 
|  2617     if( v->pLeafSelectStmts[i]!=NULL ){ |  | 
|  2618       sqlite3_finalize(v->pLeafSelectStmts[i]); |  | 
|  2619       v->pLeafSelectStmts[i] = NULL; |  | 
|  2620     } |  | 
|  2621   } |  | 
|  2622  |  | 
|  2623   if( v->pTokenizer!=NULL ){ |  | 
|  2624     v->pTokenizer->pModule->xDestroy(v->pTokenizer); |  | 
|  2625     v->pTokenizer = NULL; |  | 
|  2626   } |  | 
|  2627  |  | 
|  2628   clearPendingTerms(v); |  | 
|  2629  |  | 
|  2630   sqlite3_free(v->azColumn); |  | 
|  2631   for(i = 0; i < v->nColumn; ++i) { |  | 
|  2632     sqlite3_free(v->azContentColumn[i]); |  | 
|  2633   } |  | 
|  2634   sqlite3_free(v->azContentColumn); |  | 
|  2635   sqlite3_free(v); |  | 
|  2636 } |  | 
|  2637  |  | 
|  2638 /* |  | 
|  2639 ** Token types for parsing the arguments to xConnect or xCreate. |  | 
|  2640 */ |  | 
|  2641 #define TOKEN_EOF         0    /* End of file */ |  | 
|  2642 #define TOKEN_SPACE       1    /* Any kind of whitespace */ |  | 
|  2643 #define TOKEN_ID          2    /* An identifier */ |  | 
|  2644 #define TOKEN_STRING      3    /* A string literal */ |  | 
|  2645 #define TOKEN_PUNCT       4    /* A single punctuation character */ |  | 
|  2646  |  | 
|  2647 /* |  | 
|  2648 ** If X is a character that can be used in an identifier then |  | 
|  2649 ** ftsIdChar(X) will be true.  Otherwise it is false. |  | 
|  2650 ** |  | 
|  2651 ** For ASCII, any character with the high-order bit set is |  | 
|  2652 ** allowed in an identifier.  For 7-bit characters,  |  | 
|  2653 ** isFtsIdChar[X] must be 1. |  | 
|  2654 ** |  | 
|  2655 ** Ticket #1066.  the SQL standard does not allow '$' in the |  | 
|  2656 ** middle of identfiers.  But many SQL implementations do.  |  | 
|  2657 ** SQLite will allow '$' in identifiers for compatibility. |  | 
|  2658 ** But the feature is undocumented. |  | 
|  2659 */ |  | 
|  2660 static const char isFtsIdChar[] = { |  | 
|  2661 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |  | 
|  2662     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */ |  | 
|  2663     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */ |  | 
|  2664     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */ |  | 
|  2665     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */ |  | 
|  2666     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */ |  | 
|  2667     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */ |  | 
|  2668 }; |  | 
|  2669 #define ftsIdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isFtsIdChar[c-0x20])) |  | 
|  2670  |  | 
|  2671  |  | 
|  2672 /* |  | 
|  2673 ** Return the length of the token that begins at z[0].  |  | 
|  2674 ** Store the token type in *tokenType before returning. |  | 
|  2675 */ |  | 
|  2676 static int ftsGetToken(const char *z, int *tokenType){ |  | 
|  2677   int i, c; |  | 
|  2678   switch( *z ){ |  | 
|  2679     case 0: { |  | 
|  2680       *tokenType = TOKEN_EOF; |  | 
|  2681       return 0; |  | 
|  2682     } |  | 
|  2683     case ' ': case '\t': case '\n': case '\f': case '\r': { |  | 
|  2684       for(i=1; safe_isspace(z[i]); i++){} |  | 
|  2685       *tokenType = TOKEN_SPACE; |  | 
|  2686       return i; |  | 
|  2687     } |  | 
|  2688     case '`': |  | 
|  2689     case '\'': |  | 
|  2690     case '"': { |  | 
|  2691       int delim = z[0]; |  | 
|  2692       for(i=1; (c=z[i])!=0; i++){ |  | 
|  2693         if( c==delim ){ |  | 
|  2694           if( z[i+1]==delim ){ |  | 
|  2695             i++; |  | 
|  2696           }else{ |  | 
|  2697             break; |  | 
|  2698           } |  | 
|  2699         } |  | 
|  2700       } |  | 
|  2701       *tokenType = TOKEN_STRING; |  | 
|  2702       return i + (c!=0); |  | 
|  2703     } |  | 
|  2704     case '[': { |  | 
|  2705       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |  | 
|  2706       *tokenType = TOKEN_ID; |  | 
|  2707       return i; |  | 
|  2708     } |  | 
|  2709     default: { |  | 
|  2710       if( !ftsIdChar(*z) ){ |  | 
|  2711         break; |  | 
|  2712       } |  | 
|  2713       for(i=1; ftsIdChar(z[i]); i++){} |  | 
|  2714       *tokenType = TOKEN_ID; |  | 
|  2715       return i; |  | 
|  2716     } |  | 
|  2717   } |  | 
|  2718   *tokenType = TOKEN_PUNCT; |  | 
|  2719   return 1; |  | 
|  2720 } |  | 
|  2721  |  | 
|  2722 /* |  | 
|  2723 ** A token extracted from a string is an instance of the following |  | 
|  2724 ** structure. |  | 
|  2725 */ |  | 
|  2726 typedef struct FtsToken { |  | 
|  2727   const char *z;       /* Pointer to token text.  Not '\000' terminated */ |  | 
|  2728   short int n;         /* Length of the token text in bytes. */ |  | 
|  2729 } FtsToken; |  | 
|  2730  |  | 
|  2731 /* |  | 
|  2732 ** Given a input string (which is really one of the argv[] parameters |  | 
|  2733 ** passed into xConnect or xCreate) split the string up into tokens. |  | 
|  2734 ** Return an array of pointers to '\000' terminated strings, one string |  | 
|  2735 ** for each non-whitespace token. |  | 
|  2736 ** |  | 
|  2737 ** The returned array is terminated by a single NULL pointer. |  | 
|  2738 ** |  | 
|  2739 ** Space to hold the returned array is obtained from a single |  | 
|  2740 ** malloc and should be freed by passing the return value to free(). |  | 
|  2741 ** The individual strings within the token list are all a part of |  | 
|  2742 ** the single memory allocation and will all be freed at once. |  | 
|  2743 */ |  | 
|  2744 static char **tokenizeString(const char *z, int *pnToken){ |  | 
|  2745   int nToken = 0; |  | 
|  2746   FtsToken *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) ); |  | 
|  2747   int n = 1; |  | 
|  2748   int e, i; |  | 
|  2749   int totalSize = 0; |  | 
|  2750   char **azToken; |  | 
|  2751   char *zCopy; |  | 
|  2752   while( n>0 ){ |  | 
|  2753     n = ftsGetToken(z, &e); |  | 
|  2754     if( e!=TOKEN_SPACE ){ |  | 
|  2755       aToken[nToken].z = z; |  | 
|  2756       aToken[nToken].n = n; |  | 
|  2757       nToken++; |  | 
|  2758       totalSize += n+1; |  | 
|  2759     } |  | 
|  2760     z += n; |  | 
|  2761   } |  | 
|  2762   azToken = (char**)sqlite3_malloc( nToken*sizeof(char*) + totalSize ); |  | 
|  2763   zCopy = (char*)&azToken[nToken]; |  | 
|  2764   nToken--; |  | 
|  2765   for(i=0; i<nToken; i++){ |  | 
|  2766     azToken[i] = zCopy; |  | 
|  2767     n = aToken[i].n; |  | 
|  2768     memcpy(zCopy, aToken[i].z, n); |  | 
|  2769     zCopy[n] = 0; |  | 
|  2770     zCopy += n+1; |  | 
|  2771   } |  | 
|  2772   azToken[nToken] = 0; |  | 
|  2773   sqlite3_free(aToken); |  | 
|  2774   *pnToken = nToken; |  | 
|  2775   return azToken; |  | 
|  2776 } |   372 } | 
|  2777  |   373  | 
|  2778 /* |   374 /* | 
|  2779 ** Convert an SQL-style quoted string into a normal string by removing |   375 ** Convert an SQL-style quoted string into a normal string by removing | 
|  2780 ** the quote characters.  The conversion is done in-place.  If the |   376 ** the quote characters.  The conversion is done in-place.  If the | 
|  2781 ** input does not begin with a quote character, then this routine |   377 ** input does not begin with a quote character, then this routine | 
|  2782 ** is a no-op. |   378 ** is a no-op. | 
|  2783 ** |   379 ** | 
|  2784 ** Examples: |   380 ** Examples: | 
|  2785 ** |   381 ** | 
|  2786 **     "abc"   becomes   abc |   382 **     "abc"   becomes   abc | 
|  2787 **     'xyz'   becomes   xyz |   383 **     'xyz'   becomes   xyz | 
|  2788 **     [pqr]   becomes   pqr |   384 **     [pqr]   becomes   pqr | 
|  2789 **     `mno`   becomes   mno |   385 **     `mno`   becomes   mno | 
|  2790 */ |   386 ** | 
|  2791 static void dequoteString(char *z){ |   387 */ | 
|  2792   int quote; |   388 void sqlite3Fts3Dequote(char *z){ | 
|  2793   int i, j; |   389   char quote;                     /* Quote character (if any ) */ | 
|  2794   if( z==0 ) return; |   390  | 
|  2795   quote = z[0]; |   391   quote = z[0]; | 
|  2796   switch( quote ){ |   392   if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ | 
|  2797     case '\'':  break; |   393     int iIn = 1;                  /* Index of next byte to read from input */ | 
|  2798     case '"':   break; |   394     int iOut = 0;                 /* Index of next byte to write to output */ | 
|  2799     case '`':   break;                /* For MySQL compatibility */ |   395  | 
|  2800     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */ |   396     /* If the first byte was a '[', then the close-quote character is a ']' */ | 
|  2801     default:    return; |   397     if( quote=='[' ) quote = ']';   | 
|  2802   } |   398  | 
|  2803   for(i=1, j=0; z[i]; i++){ |   399     while( ALWAYS(z[iIn]) ){ | 
|  2804     if( z[i]==quote ){ |   400       if( z[iIn]==quote ){ | 
|  2805       if( z[i+1]==quote ){ |   401         if( z[iIn+1]!=quote ) break; | 
|  2806         z[j++] = quote; |   402         z[iOut++] = quote; | 
|  2807         i++; |   403         iIn += 2; | 
|  2808       }else{ |   404       }else{ | 
|  2809         z[j++] = 0; |   405         z[iOut++] = z[iIn++]; | 
 |   406       } | 
 |   407     } | 
 |   408     z[iOut] = '\0'; | 
 |   409   } | 
 |   410 } | 
 |   411  | 
 |   412 /* | 
 |   413 ** Read a single varint from the doclist at *pp and advance *pp to point | 
 |   414 ** to the first byte past the end of the varint.  Add the value of the varint | 
 |   415 ** to *pVal. | 
 |   416 */ | 
 |   417 static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ | 
 |   418   sqlite3_int64 iVal; | 
 |   419   *pp += sqlite3Fts3GetVarint(*pp, &iVal); | 
 |   420   *pVal += iVal; | 
 |   421 } | 
 |   422  | 
 |   423 /* | 
 |   424 ** As long as *pp has not reached its end (pEnd), then do the same | 
 |   425 ** as fts3GetDeltaVarint(): read a single varint and add it to *pVal. | 
 |   426 ** But if we have reached the end of the varint, just set *pp=0 and | 
 |   427 ** leave *pVal unchanged. | 
 |   428 */ | 
 |   429 static void fts3GetDeltaVarint2(char **pp, char *pEnd, sqlite3_int64 *pVal){ | 
 |   430   if( *pp>=pEnd ){ | 
 |   431     *pp = 0; | 
 |   432   }else{ | 
 |   433     fts3GetDeltaVarint(pp, pVal); | 
 |   434   } | 
 |   435 } | 
 |   436  | 
 |   437 /* | 
 |   438 ** The xDisconnect() virtual table method. | 
 |   439 */ | 
 |   440 static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ | 
 |   441   Fts3Table *p = (Fts3Table *)pVtab; | 
 |   442   int i; | 
 |   443  | 
 |   444   assert( p->nPendingData==0 ); | 
 |   445   assert( p->pSegments==0 ); | 
 |   446  | 
 |   447   /* Free any prepared statements held */ | 
 |   448   for(i=0; i<SizeofArray(p->aStmt); i++){ | 
 |   449     sqlite3_finalize(p->aStmt[i]); | 
 |   450   } | 
 |   451   sqlite3_free(p->zSegmentsTbl); | 
 |   452   sqlite3_free(p->zReadExprlist); | 
 |   453   sqlite3_free(p->zWriteExprlist); | 
 |   454  | 
 |   455   /* Invoke the tokenizer destructor to free the tokenizer. */ | 
 |   456   p->pTokenizer->pModule->xDestroy(p->pTokenizer); | 
 |   457  | 
 |   458   sqlite3_free(p); | 
 |   459   return SQLITE_OK; | 
 |   460 } | 
 |   461  | 
 |   462 /* | 
 |   463 ** Construct one or more SQL statements from the format string given | 
 |   464 ** and then evaluate those statements. The success code is written | 
 |   465 ** into *pRc. | 
 |   466 ** | 
 |   467 ** If *pRc is initially non-zero then this routine is a no-op. | 
 |   468 */ | 
 |   469 static void fts3DbExec( | 
 |   470   int *pRc,              /* Success code */ | 
 |   471   sqlite3 *db,           /* Database in which to run SQL */ | 
 |   472   const char *zFormat,   /* Format string for SQL */ | 
 |   473   ...                    /* Arguments to the format string */ | 
 |   474 ){ | 
 |   475   va_list ap; | 
 |   476   char *zSql; | 
 |   477   if( *pRc ) return; | 
 |   478   va_start(ap, zFormat); | 
 |   479   zSql = sqlite3_vmprintf(zFormat, ap); | 
 |   480   va_end(ap); | 
 |   481   if( zSql==0 ){ | 
 |   482     *pRc = SQLITE_NOMEM; | 
 |   483   }else{ | 
 |   484     *pRc = sqlite3_exec(db, zSql, 0, 0, 0); | 
 |   485     sqlite3_free(zSql); | 
 |   486   } | 
 |   487 } | 
 |   488  | 
 |   489 /* | 
 |   490 ** The xDestroy() virtual table method. | 
 |   491 */ | 
 |   492 static int fts3DestroyMethod(sqlite3_vtab *pVtab){ | 
 |   493   int rc = SQLITE_OK;              /* Return code */ | 
 |   494   Fts3Table *p = (Fts3Table *)pVtab; | 
 |   495   sqlite3 *db = p->db; | 
 |   496  | 
 |   497   /* Drop the shadow tables */ | 
 |   498   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", p->zDb, p->zName); | 
 |   499   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", p->zDb,p->zName); | 
 |   500   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", p->zDb, p->zName); | 
 |   501   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", p->zDb, p->zName); | 
 |   502   fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", p->zDb, p->zName); | 
 |   503  | 
 |   504   /* If everything has worked, invoke fts3DisconnectMethod() to free the | 
 |   505   ** memory associated with the Fts3Table structure and return SQLITE_OK. | 
 |   506   ** Otherwise, return an SQLite error code. | 
 |   507   */ | 
 |   508   return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc); | 
 |   509 } | 
 |   510  | 
 |   511  | 
 |   512 /* | 
 |   513 ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table | 
 |   514 ** passed as the first argument. This is done as part of the xConnect() | 
 |   515 ** and xCreate() methods. | 
 |   516 ** | 
 |   517 ** If *pRc is non-zero when this function is called, it is a no-op.  | 
 |   518 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc | 
 |   519 ** before returning. | 
 |   520 */ | 
 |   521 static void fts3DeclareVtab(int *pRc, Fts3Table *p){ | 
 |   522   if( *pRc==SQLITE_OK ){ | 
 |   523     int i;                        /* Iterator variable */ | 
 |   524     int rc;                       /* Return code */ | 
 |   525     char *zSql;                   /* SQL statement passed to declare_vtab() */ | 
 |   526     char *zCols;                  /* List of user defined columns */ | 
 |   527  | 
 |   528     /* Create a list of user columns for the virtual table */ | 
 |   529     zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); | 
 |   530     for(i=1; zCols && i<p->nColumn; i++){ | 
 |   531       zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); | 
 |   532     } | 
 |   533  | 
 |   534     /* Create the whole "CREATE TABLE" statement to pass to SQLite */ | 
 |   535     zSql = sqlite3_mprintf( | 
 |   536         "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName | 
 |   537     ); | 
 |   538     if( !zCols || !zSql ){ | 
 |   539       rc = SQLITE_NOMEM; | 
 |   540     }else{ | 
 |   541       rc = sqlite3_declare_vtab(p->db, zSql); | 
 |   542     } | 
 |   543  | 
 |   544     sqlite3_free(zSql); | 
 |   545     sqlite3_free(zCols); | 
 |   546     *pRc = rc; | 
 |   547   } | 
 |   548 } | 
 |   549  | 
 |   550 /* | 
 |   551 ** Create the backing store tables (%_content, %_segments and %_segdir) | 
 |   552 ** required by the FTS3 table passed as the only argument. This is done | 
 |   553 ** as part of the vtab xCreate() method. | 
 |   554 ** | 
 |   555 ** If the p->bHasDocsize boolean is true (indicating that this is an | 
 |   556 ** FTS4 table, not an FTS3 table) then also create the %_docsize and | 
 |   557 ** %_stat tables required by FTS4. | 
 |   558 */ | 
 |   559 static int fts3CreateTables(Fts3Table *p){ | 
 |   560   int rc = SQLITE_OK;             /* Return code */ | 
 |   561   int i;                          /* Iterator variable */ | 
 |   562   char *zContentCols;             /* Columns of %_content table */ | 
 |   563   sqlite3 *db = p->db;            /* The database connection */ | 
 |   564  | 
 |   565   /* Create a list of user columns for the content table */ | 
 |   566   zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); | 
 |   567   for(i=0; zContentCols && i<p->nColumn; i++){ | 
 |   568     char *z = p->azColumn[i]; | 
 |   569     zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); | 
 |   570   } | 
 |   571   if( zContentCols==0 ) rc = SQLITE_NOMEM; | 
 |   572  | 
 |   573   /* Create the content table */ | 
 |   574   fts3DbExec(&rc, db,  | 
 |   575      "CREATE TABLE %Q.'%q_content'(%s)", | 
 |   576      p->zDb, p->zName, zContentCols | 
 |   577   ); | 
 |   578   sqlite3_free(zContentCols); | 
 |   579   /* Create other tables */ | 
 |   580   fts3DbExec(&rc, db,  | 
 |   581       "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", | 
 |   582       p->zDb, p->zName | 
 |   583   ); | 
 |   584   fts3DbExec(&rc, db,  | 
 |   585       "CREATE TABLE %Q.'%q_segdir'(" | 
 |   586         "level INTEGER," | 
 |   587         "idx INTEGER," | 
 |   588         "start_block INTEGER," | 
 |   589         "leaves_end_block INTEGER," | 
 |   590         "end_block INTEGER," | 
 |   591         "root BLOB," | 
 |   592         "PRIMARY KEY(level, idx)" | 
 |   593       ");", | 
 |   594       p->zDb, p->zName | 
 |   595   ); | 
 |   596   if( p->bHasDocsize ){ | 
 |   597     fts3DbExec(&rc, db,  | 
 |   598         "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);", | 
 |   599         p->zDb, p->zName | 
 |   600     ); | 
 |   601   } | 
 |   602   if( p->bHasStat ){ | 
 |   603     fts3DbExec(&rc, db,  | 
 |   604         "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);", | 
 |   605         p->zDb, p->zName | 
 |   606     ); | 
 |   607   } | 
 |   608   return rc; | 
 |   609 } | 
 |   610  | 
 |   611 /* | 
 |   612 ** Store the current database page-size in bytes in p->nPgsz. | 
 |   613 ** | 
 |   614 ** If *pRc is non-zero when this function is called, it is a no-op.  | 
 |   615 ** Otherwise, if an error occurs, an SQLite error code is stored in *pRc | 
 |   616 ** before returning. | 
 |   617 */ | 
 |   618 static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ | 
 |   619   if( *pRc==SQLITE_OK ){ | 
 |   620     int rc;                       /* Return code */ | 
 |   621     char *zSql;                   /* SQL text "PRAGMA %Q.page_size" */ | 
 |   622     sqlite3_stmt *pStmt;          /* Compiled "PRAGMA %Q.page_size" statement */ | 
 |   623    | 
 |   624     zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); | 
 |   625     if( !zSql ){ | 
 |   626       rc = SQLITE_NOMEM; | 
 |   627     }else{ | 
 |   628       rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); | 
 |   629       if( rc==SQLITE_OK ){ | 
 |   630         sqlite3_step(pStmt); | 
 |   631         p->nPgsz = sqlite3_column_int(pStmt, 0); | 
 |   632         rc = sqlite3_finalize(pStmt); | 
 |   633       } | 
 |   634     } | 
 |   635     assert( p->nPgsz>0 || rc!=SQLITE_OK ); | 
 |   636     sqlite3_free(zSql); | 
 |   637     *pRc = rc; | 
 |   638   } | 
 |   639 } | 
 |   640  | 
 |   641 /* | 
 |   642 ** "Special" FTS4 arguments are column specifications of the following form: | 
 |   643 ** | 
 |   644 **   <key> = <value> | 
 |   645 ** | 
 |   646 ** There may not be whitespace surrounding the "=" character. The <value>  | 
 |   647 ** term may be quoted, but the <key> may not. | 
 |   648 */ | 
 |   649 static int fts3IsSpecialColumn( | 
 |   650   const char *z,  | 
 |   651   int *pnKey, | 
 |   652   char **pzValue | 
 |   653 ){ | 
 |   654   char *zValue; | 
 |   655   const char *zCsr = z; | 
 |   656  | 
 |   657   while( *zCsr!='=' ){ | 
 |   658     if( *zCsr=='\0' ) return 0; | 
 |   659     zCsr++; | 
 |   660   } | 
 |   661  | 
 |   662   *pnKey = (int)(zCsr-z); | 
 |   663   zValue = sqlite3_mprintf("%s", &zCsr[1]); | 
 |   664   if( zValue ){ | 
 |   665     sqlite3Fts3Dequote(zValue); | 
 |   666   } | 
 |   667   *pzValue = zValue; | 
 |   668   return 1; | 
 |   669 } | 
 |   670  | 
 |   671 /* | 
 |   672 ** Append the output of a printf() style formatting to an existing string. | 
 |   673 */ | 
 |   674 static void fts3Appendf( | 
 |   675   int *pRc,                       /* IN/OUT: Error code */ | 
 |   676   char **pz,                      /* IN/OUT: Pointer to string buffer */ | 
 |   677   const char *zFormat,            /* Printf format string to append */ | 
 |   678   ...                             /* Arguments for printf format string */ | 
 |   679 ){ | 
 |   680   if( *pRc==SQLITE_OK ){ | 
 |   681     va_list ap; | 
 |   682     char *z; | 
 |   683     va_start(ap, zFormat); | 
 |   684     z = sqlite3_vmprintf(zFormat, ap); | 
 |   685     if( z && *pz ){ | 
 |   686       char *z2 = sqlite3_mprintf("%s%s", *pz, z); | 
 |   687       sqlite3_free(z); | 
 |   688       z = z2; | 
 |   689     } | 
 |   690     if( z==0 ) *pRc = SQLITE_NOMEM; | 
 |   691     sqlite3_free(*pz); | 
 |   692     *pz = z; | 
 |   693   } | 
 |   694 } | 
 |   695  | 
 |   696 /* | 
 |   697 ** Return a copy of input string zInput enclosed in double-quotes (") and | 
 |   698 ** with all double quote characters escaped. For example: | 
 |   699 ** | 
 |   700 **     fts3QuoteId("un \"zip\"")   ->    "un \"\"zip\"\"" | 
 |   701 ** | 
 |   702 ** The pointer returned points to memory obtained from sqlite3_malloc(). It | 
 |   703 ** is the callers responsibility to call sqlite3_free() to release this | 
 |   704 ** memory. | 
 |   705 */ | 
 |   706 static char *fts3QuoteId(char const *zInput){ | 
 |   707   int nRet; | 
 |   708   char *zRet; | 
 |   709   nRet = 2 + strlen(zInput)*2 + 1; | 
 |   710   zRet = sqlite3_malloc(nRet); | 
 |   711   if( zRet ){ | 
 |   712     int i; | 
 |   713     char *z = zRet; | 
 |   714     *(z++) = '"'; | 
 |   715     for(i=0; zInput[i]; i++){ | 
 |   716       if( zInput[i]=='"' ) *(z++) = '"'; | 
 |   717       *(z++) = zInput[i]; | 
 |   718     } | 
 |   719     *(z++) = '"'; | 
 |   720     *(z++) = '\0'; | 
 |   721   } | 
 |   722   return zRet; | 
 |   723 } | 
 |   724  | 
 |   725 /* | 
 |   726 ** Return a list of comma separated SQL expressions that could be used | 
 |   727 ** in a SELECT statement such as the following: | 
 |   728 ** | 
 |   729 **     SELECT <list of expressions> FROM %_content AS x ... | 
 |   730 ** | 
 |   731 ** to return the docid, followed by each column of text data in order | 
 |   732 ** from left to write. If parameter zFunc is not NULL, then instead of | 
 |   733 ** being returned directly each column of text data is passed to an SQL | 
 |   734 ** function named zFunc first. For example, if zFunc is "unzip" and the | 
 |   735 ** table has the three user-defined columns "a", "b", and "c", the following | 
 |   736 ** string is returned: | 
 |   737 ** | 
 |   738 **     "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c')" | 
 |   739 ** | 
 |   740 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It | 
 |   741 ** is the responsibility of the caller to eventually free it. | 
 |   742 ** | 
 |   743 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and | 
 |   744 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered | 
 |   745 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If | 
 |   746 ** no error occurs, *pRc is left unmodified. | 
 |   747 */ | 
 |   748 static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ | 
 |   749   char *zRet = 0; | 
 |   750   char *zFree = 0; | 
 |   751   char *zFunction; | 
 |   752   int i; | 
 |   753  | 
 |   754   if( !zFunc ){ | 
 |   755     zFunction = ""; | 
 |   756   }else{ | 
 |   757     zFree = zFunction = fts3QuoteId(zFunc); | 
 |   758   } | 
 |   759   fts3Appendf(pRc, &zRet, "docid"); | 
 |   760   for(i=0; i<p->nColumn; i++){ | 
 |   761     fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); | 
 |   762   } | 
 |   763   sqlite3_free(zFree); | 
 |   764   return zRet; | 
 |   765 } | 
 |   766  | 
 |   767 /* | 
 |   768 ** Return a list of N comma separated question marks, where N is the number | 
 |   769 ** of columns in the %_content table (one for the docid plus one for each | 
 |   770 ** user-defined text column). | 
 |   771 ** | 
 |   772 ** If argument zFunc is not NULL, then all but the first question mark | 
 |   773 ** is preceded by zFunc and an open bracket, and followed by a closed | 
 |   774 ** bracket. For example, if zFunc is "zip" and the FTS3 table has three  | 
 |   775 ** user-defined text columns, the following string is returned: | 
 |   776 ** | 
 |   777 **     "?, zip(?), zip(?), zip(?)" | 
 |   778 ** | 
 |   779 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It | 
 |   780 ** is the responsibility of the caller to eventually free it. | 
 |   781 ** | 
 |   782 ** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and | 
 |   783 ** a NULL pointer is returned). Otherwise, if an OOM error is encountered | 
 |   784 ** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If | 
 |   785 ** no error occurs, *pRc is left unmodified. | 
 |   786 */ | 
 |   787 static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ | 
 |   788   char *zRet = 0; | 
 |   789   char *zFree = 0; | 
 |   790   char *zFunction; | 
 |   791   int i; | 
 |   792  | 
 |   793   if( !zFunc ){ | 
 |   794     zFunction = ""; | 
 |   795   }else{ | 
 |   796     zFree = zFunction = fts3QuoteId(zFunc); | 
 |   797   } | 
 |   798   fts3Appendf(pRc, &zRet, "?"); | 
 |   799   for(i=0; i<p->nColumn; i++){ | 
 |   800     fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); | 
 |   801   } | 
 |   802   sqlite3_free(zFree); | 
 |   803   return zRet; | 
 |   804 } | 
 |   805  | 
 |   806 /* | 
 |   807 ** This function is the implementation of both the xConnect and xCreate | 
 |   808 ** methods of the FTS3 virtual table. | 
 |   809 ** | 
 |   810 ** The argv[] array contains the following: | 
 |   811 ** | 
 |   812 **   argv[0]   -> module name  ("fts3" or "fts4") | 
 |   813 **   argv[1]   -> database name | 
 |   814 **   argv[2]   -> table name | 
 |   815 **   argv[...] -> "column name" and other module argument fields. | 
 |   816 */ | 
 |   817 static int fts3InitVtab( | 
 |   818   int isCreate,                   /* True for xCreate, false for xConnect */ | 
 |   819   sqlite3 *db,                    /* The SQLite database connection */ | 
 |   820   void *pAux,                     /* Hash table containing tokenizers */ | 
 |   821   int argc,                       /* Number of elements in argv array */ | 
 |   822   const char * const *argv,       /* xCreate/xConnect argument array */ | 
 |   823   sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */ | 
 |   824   char **pzErr                    /* Write any error message here */ | 
 |   825 ){ | 
 |   826   Fts3Hash *pHash = (Fts3Hash *)pAux; | 
 |   827   Fts3Table *p = 0;               /* Pointer to allocated vtab */ | 
 |   828   int rc = SQLITE_OK;             /* Return code */ | 
 |   829   int i;                          /* Iterator variable */ | 
 |   830   int nByte;                      /* Size of allocation used for *p */ | 
 |   831   int iCol;                       /* Column index */ | 
 |   832   int nString = 0;                /* Bytes required to hold all column names */ | 
 |   833   int nCol = 0;                   /* Number of columns in the FTS table */ | 
 |   834   char *zCsr;                     /* Space for holding column names */ | 
 |   835   int nDb;                        /* Bytes required to hold database name */ | 
 |   836   int nName;                      /* Bytes required to hold table name */ | 
 |   837   int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ | 
 |   838   int bNoDocsize = 0;             /* True to omit %_docsize table */ | 
 |   839   const char **aCol;              /* Array of column names */ | 
 |   840   sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */ | 
 |   841  | 
 |   842   char *zCompress = 0; | 
 |   843   char *zUncompress = 0; | 
 |   844  | 
 |   845   assert( strlen(argv[0])==4 ); | 
 |   846   assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) | 
 |   847        || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4) | 
 |   848   ); | 
 |   849  | 
 |   850   nDb = (int)strlen(argv[1]) + 1; | 
 |   851   nName = (int)strlen(argv[2]) + 1; | 
 |   852  | 
 |   853   aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) ); | 
 |   854   if( !aCol ) return SQLITE_NOMEM; | 
 |   855   memset((void *)aCol, 0, sizeof(const char *) * (argc-2)); | 
 |   856  | 
 |   857   /* Loop through all of the arguments passed by the user to the FTS3/4 | 
 |   858   ** module (i.e. all the column names and special arguments). This loop | 
 |   859   ** does the following: | 
 |   860   ** | 
 |   861   **   + Figures out the number of columns the FTSX table will have, and | 
 |   862   **     the number of bytes of space that must be allocated to store copies | 
 |   863   **     of the column names. | 
 |   864   ** | 
 |   865   **   + If there is a tokenizer specification included in the arguments, | 
 |   866   **     initializes the tokenizer pTokenizer. | 
 |   867   */ | 
 |   868   for(i=3; rc==SQLITE_OK && i<argc; i++){ | 
 |   869     char const *z = argv[i]; | 
 |   870     int nKey; | 
 |   871     char *zVal; | 
 |   872  | 
 |   873     /* Check if this is a tokenizer specification */ | 
 |   874     if( !pTokenizer  | 
 |   875      && strlen(z)>8 | 
 |   876      && 0==sqlite3_strnicmp(z, "tokenize", 8)  | 
 |   877      && 0==sqlite3Fts3IsIdChar(z[8]) | 
 |   878     ){ | 
 |   879       rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr); | 
 |   880     } | 
 |   881  | 
 |   882     /* Check if it is an FTS4 special argument. */ | 
 |   883     else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ | 
 |   884       if( !zVal ){ | 
 |   885         rc = SQLITE_NOMEM; | 
 |   886         goto fts3_init_out; | 
 |   887       } | 
 |   888       if( nKey==9 && 0==sqlite3_strnicmp(z, "matchinfo", 9) ){ | 
 |   889         if( strlen(zVal)==4 && 0==sqlite3_strnicmp(zVal, "fts3", 4) ){ | 
 |   890           bNoDocsize = 1; | 
 |   891         }else{ | 
 |   892           *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); | 
 |   893           rc = SQLITE_ERROR; | 
 |   894         } | 
 |   895       }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ | 
 |   896         zCompress = zVal; | 
 |   897         zVal = 0; | 
 |   898       }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ | 
 |   899         zUncompress = zVal; | 
 |   900         zVal = 0; | 
 |   901       }else{ | 
 |   902         *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); | 
 |   903         rc = SQLITE_ERROR; | 
 |   904       } | 
 |   905       sqlite3_free(zVal); | 
 |   906     } | 
 |   907  | 
 |   908     /* Otherwise, the argument is a column name. */ | 
 |   909     else { | 
 |   910       nString += (int)(strlen(z) + 1); | 
 |   911       aCol[nCol++] = z; | 
 |   912     } | 
 |   913   } | 
 |   914   if( rc!=SQLITE_OK ) goto fts3_init_out; | 
 |   915  | 
 |   916   if( nCol==0 ){ | 
 |   917     assert( nString==0 ); | 
 |   918     aCol[0] = "content"; | 
 |   919     nString = 8; | 
 |   920     nCol = 1; | 
 |   921   } | 
 |   922  | 
 |   923   if( pTokenizer==0 ){ | 
 |   924     rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr); | 
 |   925     if( rc!=SQLITE_OK ) goto fts3_init_out; | 
 |   926   } | 
 |   927   assert( pTokenizer ); | 
 |   928  | 
 |   929  | 
 |   930   /* Allocate and populate the Fts3Table structure. */ | 
 |   931   nByte = sizeof(Fts3Table) +              /* Fts3Table */ | 
 |   932           nCol * sizeof(char *) +              /* azColumn */ | 
 |   933           nName +                              /* zName */ | 
 |   934           nDb +                                /* zDb */ | 
 |   935           nString;                             /* Space for azColumn strings */ | 
 |   936   p = (Fts3Table*)sqlite3_malloc(nByte); | 
 |   937   if( p==0 ){ | 
 |   938     rc = SQLITE_NOMEM; | 
 |   939     goto fts3_init_out; | 
 |   940   } | 
 |   941   memset(p, 0, nByte); | 
 |   942   p->db = db; | 
 |   943   p->nColumn = nCol; | 
 |   944   p->nPendingData = 0; | 
 |   945   p->azColumn = (char **)&p[1]; | 
 |   946   p->pTokenizer = pTokenizer; | 
 |   947   p->nNodeSize = 1000; | 
 |   948   p->nMaxPendingData = FTS3_MAX_PENDING_DATA; | 
 |   949   p->bHasDocsize = (isFts4 && bNoDocsize==0); | 
 |   950   p->bHasStat = isFts4; | 
 |   951   fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1); | 
 |   952  | 
 |   953   /* Fill in the zName and zDb fields of the vtab structure. */ | 
 |   954   zCsr = (char *)&p->azColumn[nCol]; | 
 |   955   p->zName = zCsr; | 
 |   956   memcpy(zCsr, argv[2], nName); | 
 |   957   zCsr += nName; | 
 |   958   p->zDb = zCsr; | 
 |   959   memcpy(zCsr, argv[1], nDb); | 
 |   960   zCsr += nDb; | 
 |   961  | 
 |   962   /* Fill in the azColumn array */ | 
 |   963   for(iCol=0; iCol<nCol; iCol++){ | 
 |   964     char *z;  | 
 |   965     int n; | 
 |   966     z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n); | 
 |   967     memcpy(zCsr, z, n); | 
 |   968     zCsr[n] = '\0'; | 
 |   969     sqlite3Fts3Dequote(zCsr); | 
 |   970     p->azColumn[iCol] = zCsr; | 
 |   971     zCsr += n+1; | 
 |   972     assert( zCsr <= &((char *)p)[nByte] ); | 
 |   973   } | 
 |   974  | 
 |   975   if( (zCompress==0)!=(zUncompress==0) ){ | 
 |   976     char const *zMiss = (zCompress==0 ? "compress" : "uncompress"); | 
 |   977     rc = SQLITE_ERROR; | 
 |   978     *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss); | 
 |   979   } | 
 |   980   p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc); | 
 |   981   p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc); | 
 |   982   if( rc!=SQLITE_OK ) goto fts3_init_out; | 
 |   983  | 
 |   984   /* If this is an xCreate call, create the underlying tables in the  | 
 |   985   ** database. TODO: For xConnect(), it could verify that said tables exist. | 
 |   986   */ | 
 |   987   if( isCreate ){ | 
 |   988     rc = fts3CreateTables(p); | 
 |   989   } | 
 |   990  | 
 |   991   /* Figure out the page-size for the database. This is required in order to | 
 |   992   ** estimate the cost of loading large doclists from the database (see  | 
 |   993   ** function sqlite3Fts3SegReaderCost() for details). | 
 |   994   */ | 
 |   995   fts3DatabasePageSize(&rc, p); | 
 |   996  | 
 |   997   /* Declare the table schema to SQLite. */ | 
 |   998   fts3DeclareVtab(&rc, p); | 
 |   999  | 
 |  1000 fts3_init_out: | 
 |  1001   sqlite3_free(zCompress); | 
 |  1002   sqlite3_free(zUncompress); | 
 |  1003   sqlite3_free((void *)aCol); | 
 |  1004   if( rc!=SQLITE_OK ){ | 
 |  1005     if( p ){ | 
 |  1006       fts3DisconnectMethod((sqlite3_vtab *)p); | 
 |  1007     }else if( pTokenizer ){ | 
 |  1008       pTokenizer->pModule->xDestroy(pTokenizer); | 
 |  1009     } | 
 |  1010   }else{ | 
 |  1011     *ppVTab = &p->base; | 
 |  1012   } | 
 |  1013   return rc; | 
 |  1014 } | 
 |  1015  | 
 |  1016 /* | 
 |  1017 ** The xConnect() and xCreate() methods for the virtual table. All the | 
 |  1018 ** work is done in function fts3InitVtab(). | 
 |  1019 */ | 
 |  1020 static int fts3ConnectMethod( | 
 |  1021   sqlite3 *db,                    /* Database connection */ | 
 |  1022   void *pAux,                     /* Pointer to tokenizer hash table */ | 
 |  1023   int argc,                       /* Number of elements in argv array */ | 
 |  1024   const char * const *argv,       /* xCreate/xConnect argument array */ | 
 |  1025   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */ | 
 |  1026   char **pzErr                    /* OUT: sqlite3_malloc'd error message */ | 
 |  1027 ){ | 
 |  1028   return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); | 
 |  1029 } | 
 |  1030 static int fts3CreateMethod( | 
 |  1031   sqlite3 *db,                    /* Database connection */ | 
 |  1032   void *pAux,                     /* Pointer to tokenizer hash table */ | 
 |  1033   int argc,                       /* Number of elements in argv array */ | 
 |  1034   const char * const *argv,       /* xCreate/xConnect argument array */ | 
 |  1035   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */ | 
 |  1036   char **pzErr                    /* OUT: sqlite3_malloc'd error message */ | 
 |  1037 ){ | 
 |  1038   return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); | 
 |  1039 } | 
 |  1040  | 
 |  1041 /*  | 
 |  1042 ** Implementation of the xBestIndex method for FTS3 tables. There | 
 |  1043 ** are three possible strategies, in order of preference: | 
 |  1044 ** | 
 |  1045 **   1. Direct lookup by rowid or docid.  | 
 |  1046 **   2. Full-text search using a MATCH operator on a non-docid column. | 
 |  1047 **   3. Linear scan of %_content table. | 
 |  1048 */ | 
 |  1049 static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ | 
 |  1050   Fts3Table *p = (Fts3Table *)pVTab; | 
 |  1051   int i;                          /* Iterator variable */ | 
 |  1052   int iCons = -1;                 /* Index of constraint to use */ | 
 |  1053  | 
 |  1054   /* By default use a full table scan. This is an expensive option, | 
 |  1055   ** so search through the constraints to see if a more efficient  | 
 |  1056   ** strategy is possible. | 
 |  1057   */ | 
 |  1058   pInfo->idxNum = FTS3_FULLSCAN_SEARCH; | 
 |  1059   pInfo->estimatedCost = 500000; | 
 |  1060   for(i=0; i<pInfo->nConstraint; i++){ | 
 |  1061     struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; | 
 |  1062     if( pCons->usable==0 ) continue; | 
 |  1063  | 
 |  1064     /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ | 
 |  1065     if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ  | 
 |  1066      && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) | 
 |  1067     ){ | 
 |  1068       pInfo->idxNum = FTS3_DOCID_SEARCH; | 
 |  1069       pInfo->estimatedCost = 1.0; | 
 |  1070       iCons = i; | 
 |  1071     } | 
 |  1072  | 
 |  1073     /* A MATCH constraint. Use a full-text search. | 
 |  1074     ** | 
 |  1075     ** If there is more than one MATCH constraint available, use the first | 
 |  1076     ** one encountered. If there is both a MATCH constraint and a direct | 
 |  1077     ** rowid/docid lookup, prefer the MATCH strategy. This is done even  | 
 |  1078     ** though the rowid/docid lookup is faster than a MATCH query, selecting | 
 |  1079     ** it would lead to an "unable to use function MATCH in the requested  | 
 |  1080     ** context" error. | 
 |  1081     */ | 
 |  1082     if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH  | 
 |  1083      && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn | 
 |  1084     ){ | 
 |  1085       pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; | 
 |  1086       pInfo->estimatedCost = 2.0; | 
 |  1087       iCons = i; | 
 |  1088       break; | 
 |  1089     } | 
 |  1090   } | 
 |  1091  | 
 |  1092   if( iCons>=0 ){ | 
 |  1093     pInfo->aConstraintUsage[iCons].argvIndex = 1; | 
 |  1094     pInfo->aConstraintUsage[iCons].omit = 1; | 
 |  1095   }  | 
 |  1096   return SQLITE_OK; | 
 |  1097 } | 
 |  1098  | 
 |  1099 /* | 
 |  1100 ** Implementation of xOpen method. | 
 |  1101 */ | 
 |  1102 static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ | 
 |  1103   sqlite3_vtab_cursor *pCsr;               /* Allocated cursor */ | 
 |  1104  | 
 |  1105   UNUSED_PARAMETER(pVTab); | 
 |  1106  | 
 |  1107   /* Allocate a buffer large enough for an Fts3Cursor structure. If the | 
 |  1108   ** allocation succeeds, zero it and return SQLITE_OK. Otherwise,  | 
 |  1109   ** if the allocation fails, return SQLITE_NOMEM. | 
 |  1110   */ | 
 |  1111   *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor)); | 
 |  1112   if( !pCsr ){ | 
 |  1113     return SQLITE_NOMEM; | 
 |  1114   } | 
 |  1115   memset(pCsr, 0, sizeof(Fts3Cursor)); | 
 |  1116   return SQLITE_OK; | 
 |  1117 } | 
 |  1118  | 
 |  1119 /* | 
 |  1120 ** Close the cursor.  For additional information see the documentation | 
 |  1121 ** on the xClose method of the virtual table interface. | 
 |  1122 */ | 
 |  1123 static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ | 
 |  1124   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; | 
 |  1125   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); | 
 |  1126   sqlite3_finalize(pCsr->pStmt); | 
 |  1127   sqlite3Fts3ExprFree(pCsr->pExpr); | 
 |  1128   sqlite3Fts3FreeDeferredTokens(pCsr); | 
 |  1129   sqlite3_free(pCsr->aDoclist); | 
 |  1130   sqlite3_free(pCsr->aMatchinfo); | 
 |  1131   sqlite3_free(pCsr); | 
 |  1132   return SQLITE_OK; | 
 |  1133 } | 
 |  1134  | 
 |  1135 /* | 
 |  1136 ** Position the pCsr->pStmt statement so that it is on the row | 
 |  1137 ** of the %_content table that contains the last match.  Return | 
 |  1138 ** SQLITE_OK on success.   | 
 |  1139 */ | 
 |  1140 static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ | 
 |  1141   if( pCsr->isRequireSeek ){ | 
 |  1142     pCsr->isRequireSeek = 0; | 
 |  1143     sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); | 
 |  1144     if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ | 
 |  1145       return SQLITE_OK; | 
 |  1146     }else{ | 
 |  1147       int rc = sqlite3_reset(pCsr->pStmt); | 
 |  1148       if( rc==SQLITE_OK ){ | 
 |  1149         /* If no row was found and no error has occured, then the %_content | 
 |  1150         ** table is missing a row that is present in the full-text index. | 
 |  1151         ** The data structures are corrupt. | 
 |  1152         */ | 
 |  1153         rc = SQLITE_CORRUPT; | 
 |  1154       } | 
 |  1155       pCsr->isEof = 1; | 
 |  1156       if( pContext ){ | 
 |  1157         sqlite3_result_error_code(pContext, rc); | 
 |  1158       } | 
 |  1159       return rc; | 
 |  1160     } | 
 |  1161   }else{ | 
 |  1162     return SQLITE_OK; | 
 |  1163   } | 
 |  1164 } | 
 |  1165  | 
 |  1166 /* | 
 |  1167 ** This function is used to process a single interior node when searching | 
 |  1168 ** a b-tree for a term or term prefix. The node data is passed to this  | 
 |  1169 ** function via the zNode/nNode parameters. The term to search for is | 
 |  1170 ** passed in zTerm/nTerm. | 
 |  1171 ** | 
 |  1172 ** If piFirst is not NULL, then this function sets *piFirst to the blockid | 
 |  1173 ** of the child node that heads the sub-tree that may contain the term. | 
 |  1174 ** | 
 |  1175 ** If piLast is not NULL, then *piLast is set to the right-most child node | 
 |  1176 ** that heads a sub-tree that may contain a term for which zTerm/nTerm is | 
 |  1177 ** a prefix. | 
 |  1178 ** | 
 |  1179 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. | 
 |  1180 */ | 
 |  1181 static int fts3ScanInteriorNode( | 
 |  1182   const char *zTerm,              /* Term to select leaves for */ | 
 |  1183   int nTerm,                      /* Size of term zTerm in bytes */ | 
 |  1184   const char *zNode,              /* Buffer containing segment interior node */ | 
 |  1185   int nNode,                      /* Size of buffer at zNode */ | 
 |  1186   sqlite3_int64 *piFirst,         /* OUT: Selected child node */ | 
 |  1187   sqlite3_int64 *piLast           /* OUT: Selected child node */ | 
 |  1188 ){ | 
 |  1189   int rc = SQLITE_OK;             /* Return code */ | 
 |  1190   const char *zCsr = zNode;       /* Cursor to iterate through node */ | 
 |  1191   const char *zEnd = &zCsr[nNode];/* End of interior node buffer */ | 
 |  1192   char *zBuffer = 0;              /* Buffer to load terms into */ | 
 |  1193   int nAlloc = 0;                 /* Size of allocated buffer */ | 
 |  1194   int isFirstTerm = 1;            /* True when processing first term on page */ | 
 |  1195   sqlite3_int64 iChild;           /* Block id of child node to descend to */ | 
 |  1196  | 
 |  1197   /* Skip over the 'height' varint that occurs at the start of every  | 
 |  1198   ** interior node. Then load the blockid of the left-child of the b-tree | 
 |  1199   ** node into variable iChild.   | 
 |  1200   ** | 
 |  1201   ** Even if the data structure on disk is corrupted, this (reading two | 
 |  1202   ** varints from the buffer) does not risk an overread. If zNode is a | 
 |  1203   ** root node, then the buffer comes from a SELECT statement. SQLite does | 
 |  1204   ** not make this guarantee explicitly, but in practice there are always | 
 |  1205   ** either more than 20 bytes of allocated space following the nNode bytes of | 
 |  1206   ** contents, or two zero bytes. Or, if the node is read from the %_segments | 
 |  1207   ** table, then there are always 20 bytes of zeroed padding following the | 
 |  1208   ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details). | 
 |  1209   */ | 
 |  1210   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); | 
 |  1211   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); | 
 |  1212   if( zCsr>zEnd ){ | 
 |  1213     return SQLITE_CORRUPT; | 
 |  1214   } | 
 |  1215    | 
 |  1216   while( zCsr<zEnd && (piFirst || piLast) ){ | 
 |  1217     int cmp;                      /* memcmp() result */ | 
 |  1218     int nSuffix;                  /* Size of term suffix */ | 
 |  1219     int nPrefix = 0;              /* Size of term prefix */ | 
 |  1220     int nBuffer;                  /* Total term size */ | 
 |  1221    | 
 |  1222     /* Load the next term on the node into zBuffer. Use realloc() to expand | 
 |  1223     ** the size of zBuffer if required.  */ | 
 |  1224     if( !isFirstTerm ){ | 
 |  1225       zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); | 
 |  1226     } | 
 |  1227     isFirstTerm = 0; | 
 |  1228     zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); | 
 |  1229      | 
 |  1230     /* NOTE(shess): Previous code checked for negative nPrefix and | 
 |  1231     ** nSuffix and suffix overrunning zEnd.  Additionally corrupt if | 
 |  1232     ** the prefix is longer than the previous term, or if the suffix | 
 |  1233     ** causes overflow. | 
 |  1234     */ | 
 |  1235     if( nPrefix<0 || nSuffix<0 || nPrefix>nBuffer | 
 |  1236      || &zCsr[nSuffix]<zCsr || &zCsr[nSuffix]>zEnd ){ | 
 |  1237       rc = SQLITE_CORRUPT; | 
 |  1238       goto finish_scan; | 
 |  1239     } | 
 |  1240     if( nPrefix+nSuffix>nAlloc ){ | 
 |  1241       char *zNew; | 
 |  1242       nAlloc = (nPrefix+nSuffix) * 2; | 
 |  1243       zNew = (char *)sqlite3_realloc(zBuffer, nAlloc); | 
 |  1244       if( !zNew ){ | 
 |  1245         rc = SQLITE_NOMEM; | 
 |  1246         goto finish_scan; | 
 |  1247       } | 
 |  1248       zBuffer = zNew; | 
 |  1249     } | 
 |  1250     memcpy(&zBuffer[nPrefix], zCsr, nSuffix); | 
 |  1251     nBuffer = nPrefix + nSuffix; | 
 |  1252     zCsr += nSuffix; | 
 |  1253  | 
 |  1254     /* Compare the term we are searching for with the term just loaded from | 
 |  1255     ** the interior node. If the specified term is greater than or equal | 
 |  1256     ** to the term from the interior node, then all terms on the sub-tree  | 
 |  1257     ** headed by node iChild are smaller than zTerm. No need to search  | 
 |  1258     ** iChild. | 
 |  1259     ** | 
 |  1260     ** If the interior node term is larger than the specified term, then | 
 |  1261     ** the tree headed by iChild may contain the specified term. | 
 |  1262     */ | 
 |  1263     cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer)); | 
 |  1264     if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){ | 
 |  1265       *piFirst = iChild; | 
 |  1266       piFirst = 0; | 
 |  1267     } | 
 |  1268  | 
 |  1269     if( piLast && cmp<0 ){ | 
 |  1270       *piLast = iChild; | 
 |  1271       piLast = 0; | 
 |  1272     } | 
 |  1273  | 
 |  1274     iChild++; | 
 |  1275   }; | 
 |  1276  | 
 |  1277   if( piFirst ) *piFirst = iChild; | 
 |  1278   if( piLast ) *piLast = iChild; | 
 |  1279  | 
 |  1280  finish_scan: | 
 |  1281   sqlite3_free(zBuffer); | 
 |  1282   return rc; | 
 |  1283 } | 
 |  1284  | 
 |  1285  | 
 |  1286 /* | 
 |  1287 ** The buffer pointed to by argument zNode (size nNode bytes) contains an | 
 |  1288 ** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes) | 
 |  1289 ** contains a term. This function searches the sub-tree headed by the zNode | 
 |  1290 ** node for the range of leaf nodes that may contain the specified term | 
 |  1291 ** or terms for which the specified term is a prefix. | 
 |  1292 ** | 
 |  1293 ** If piLeaf is not NULL, then *piLeaf is set to the blockid of the  | 
 |  1294 ** left-most leaf node in the tree that may contain the specified term. | 
 |  1295 ** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the | 
 |  1296 ** right-most leaf node that may contain a term for which the specified | 
 |  1297 ** term is a prefix. | 
 |  1298 ** | 
 |  1299 ** It is possible that the range of returned leaf nodes does not contain  | 
 |  1300 ** the specified term or any terms for which it is a prefix. However, if the  | 
 |  1301 ** segment does contain any such terms, they are stored within the identified | 
 |  1302 ** range. Because this function only inspects interior segment nodes (and | 
 |  1303 ** never loads leaf nodes into memory), it is not possible to be sure. | 
 |  1304 ** | 
 |  1305 ** If an error occurs, an error code other than SQLITE_OK is returned. | 
 |  1306 */  | 
 |  1307 static int fts3SelectLeaf( | 
 |  1308   Fts3Table *p,                   /* Virtual table handle */ | 
 |  1309   const char *zTerm,              /* Term to select leaves for */ | 
 |  1310   int nTerm,                      /* Size of term zTerm in bytes */ | 
 |  1311   const char *zNode,              /* Buffer containing segment interior node */ | 
 |  1312   int nNode,                      /* Size of buffer at zNode */ | 
 |  1313   sqlite3_int64 *piLeaf,          /* Selected leaf node */ | 
 |  1314   sqlite3_int64 *piLeaf2          /* Selected leaf node */ | 
 |  1315 ){ | 
 |  1316   int rc;                         /* Return code */ | 
 |  1317   int iHeight;                    /* Height of this node in tree */ | 
 |  1318  | 
 |  1319   assert( piLeaf || piLeaf2 ); | 
 |  1320  | 
 |  1321   sqlite3Fts3GetVarint32(zNode, &iHeight); | 
 |  1322   rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); | 
 |  1323   assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); | 
 |  1324  | 
 |  1325   if( rc==SQLITE_OK && iHeight>1 ){ | 
 |  1326     char *zBlob = 0;              /* Blob read from %_segments table */ | 
 |  1327     int nBlob;                    /* Size of zBlob in bytes */ | 
 |  1328  | 
 |  1329     if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ | 
 |  1330       rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob); | 
 |  1331       if( rc==SQLITE_OK ){ | 
 |  1332         rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); | 
 |  1333       } | 
 |  1334       sqlite3_free(zBlob); | 
 |  1335       piLeaf = 0; | 
 |  1336       zBlob = 0; | 
 |  1337     } | 
 |  1338  | 
 |  1339     if( rc==SQLITE_OK ){ | 
 |  1340       rc = sqlite3Fts3ReadBlock(p, piLeaf ? *piLeaf : *piLeaf2, &zBlob, &nBlob); | 
 |  1341     } | 
 |  1342     if( rc==SQLITE_OK ){ | 
 |  1343       rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); | 
 |  1344     } | 
 |  1345     sqlite3_free(zBlob); | 
 |  1346   } | 
 |  1347  | 
 |  1348   return rc; | 
 |  1349 } | 
 |  1350  | 
 |  1351 /* | 
 |  1352 ** This function is used to create delta-encoded serialized lists of FTS3  | 
 |  1353 ** varints. Each call to this function appends a single varint to a list. | 
 |  1354 */ | 
 |  1355 static void fts3PutDeltaVarint( | 
 |  1356   char **pp,                      /* IN/OUT: Output pointer */ | 
 |  1357   sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */ | 
 |  1358   sqlite3_int64 iVal              /* Write this value to the list */ | 
 |  1359 ){ | 
 |  1360   assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) ); | 
 |  1361   *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev); | 
 |  1362   *piPrev = iVal; | 
 |  1363 } | 
 |  1364  | 
 |  1365 /* | 
 |  1366 ** When this function is called, *ppPoslist is assumed to point to the  | 
 |  1367 ** start of a position-list. After it returns, *ppPoslist points to the | 
 |  1368 ** first byte after the position-list. | 
 |  1369 ** | 
 |  1370 ** A position list is list of positions (delta encoded) and columns for  | 
 |  1371 ** a single document record of a doclist.  So, in other words, this | 
 |  1372 ** routine advances *ppPoslist so that it points to the next docid in | 
 |  1373 ** the doclist, or to the first byte past the end of the doclist. | 
 |  1374 ** | 
 |  1375 ** If pp is not NULL, then the contents of the position list are copied | 
 |  1376 ** to *pp. *pp is set to point to the first byte past the last byte copied | 
 |  1377 ** before this function returns. | 
 |  1378 */ | 
 |  1379 static void fts3PoslistCopy(char **pp, char **ppPoslist){ | 
 |  1380   char *pEnd = *ppPoslist; | 
 |  1381   char c = 0; | 
 |  1382  | 
 |  1383   /* The end of a position list is marked by a zero encoded as an FTS3  | 
 |  1384   ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by | 
 |  1385   ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail | 
 |  1386   ** of some other, multi-byte, value. | 
 |  1387   ** | 
 |  1388   ** The following while-loop moves pEnd to point to the first byte that is not  | 
 |  1389   ** immediately preceded by a byte with the 0x80 bit set. Then increments | 
 |  1390   ** pEnd once more so that it points to the byte immediately following the | 
 |  1391   ** last byte in the position-list. | 
 |  1392   */ | 
 |  1393   while( *pEnd | c ){ | 
 |  1394     c = *pEnd++ & 0x80; | 
 |  1395     testcase( c!=0 && (*pEnd)==0 ); | 
 |  1396   } | 
 |  1397   pEnd++;  /* Advance past the POS_END terminator byte */ | 
 |  1398  | 
 |  1399   if( pp ){ | 
 |  1400     int n = (int)(pEnd - *ppPoslist); | 
 |  1401     char *p = *pp; | 
 |  1402     memcpy(p, *ppPoslist, n); | 
 |  1403     p += n; | 
 |  1404     *pp = p; | 
 |  1405   } | 
 |  1406   *ppPoslist = pEnd; | 
 |  1407 } | 
 |  1408  | 
 |  1409 /* | 
 |  1410 ** When this function is called, *ppPoslist is assumed to point to the  | 
 |  1411 ** start of a column-list. After it returns, *ppPoslist points to the | 
 |  1412 ** to the terminator (POS_COLUMN or POS_END) byte of the column-list. | 
 |  1413 ** | 
 |  1414 ** A column-list is list of delta-encoded positions for a single column | 
 |  1415 ** within a single document within a doclist. | 
 |  1416 ** | 
 |  1417 ** The column-list is terminated either by a POS_COLUMN varint (1) or | 
 |  1418 ** a POS_END varint (0).  This routine leaves *ppPoslist pointing to | 
 |  1419 ** the POS_COLUMN or POS_END that terminates the column-list. | 
 |  1420 ** | 
 |  1421 ** If pp is not NULL, then the contents of the column-list are copied | 
 |  1422 ** to *pp. *pp is set to point to the first byte past the last byte copied | 
 |  1423 ** before this function returns.  The POS_COLUMN or POS_END terminator | 
 |  1424 ** is not copied into *pp. | 
 |  1425 */ | 
 |  1426 static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ | 
 |  1427   char *pEnd = *ppPoslist; | 
 |  1428   char c = 0; | 
 |  1429  | 
 |  1430   /* A column-list is terminated by either a 0x01 or 0x00 byte that is | 
 |  1431   ** not part of a multi-byte varint. | 
 |  1432   */ | 
 |  1433   while( 0xFE & (*pEnd | c) ){ | 
 |  1434     c = *pEnd++ & 0x80; | 
 |  1435     testcase( c!=0 && ((*pEnd)&0xfe)==0 ); | 
 |  1436   } | 
 |  1437   if( pp ){ | 
 |  1438     int n = (int)(pEnd - *ppPoslist); | 
 |  1439     char *p = *pp; | 
 |  1440     memcpy(p, *ppPoslist, n); | 
 |  1441     p += n; | 
 |  1442     *pp = p; | 
 |  1443   } | 
 |  1444   *ppPoslist = pEnd; | 
 |  1445 } | 
 |  1446  | 
 |  1447 /* | 
 |  1448 ** Value used to signify the end of an position-list. This is safe because | 
 |  1449 ** it is not possible to have a document with 2^31 terms. | 
 |  1450 */ | 
 |  1451 #define POSITION_LIST_END 0x7fffffff | 
 |  1452  | 
 |  1453 /* | 
 |  1454 ** This function is used to help parse position-lists. When this function is | 
 |  1455 ** called, *pp may point to the start of the next varint in the position-list | 
 |  1456 ** being parsed, or it may point to 1 byte past the end of the position-list | 
 |  1457 ** (in which case **pp will be a terminator bytes POS_END (0) or | 
 |  1458 ** (1)). | 
 |  1459 ** | 
 |  1460 ** If *pp points past the end of the current position-list, set *pi to  | 
 |  1461 ** POSITION_LIST_END and return. Otherwise, read the next varint from *pp, | 
 |  1462 ** increment the current value of *pi by the value read, and set *pp to | 
 |  1463 ** point to the next value before returning. | 
 |  1464 ** | 
 |  1465 ** Before calling this routine *pi must be initialized to the value of | 
 |  1466 ** the previous position, or zero if we are reading the first position | 
 |  1467 ** in the position-list.  Because positions are delta-encoded, the value | 
 |  1468 ** of the previous position is needed in order to compute the value of | 
 |  1469 ** the next position. | 
 |  1470 */ | 
 |  1471 static void fts3ReadNextPos( | 
 |  1472   char **pp,                    /* IN/OUT: Pointer into position-list buffer */ | 
 |  1473   sqlite3_int64 *pi             /* IN/OUT: Value read from position-list */ | 
 |  1474 ){ | 
 |  1475   if( (**pp)&0xFE ){ | 
 |  1476     fts3GetDeltaVarint(pp, pi); | 
 |  1477     *pi -= 2; | 
 |  1478   }else{ | 
 |  1479     *pi = POSITION_LIST_END; | 
 |  1480   } | 
 |  1481 } | 
 |  1482  | 
 |  1483 /* | 
 |  1484 ** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by | 
 |  1485 ** the value of iCol encoded as a varint to *pp.   This will start a new | 
 |  1486 ** column list. | 
 |  1487 ** | 
 |  1488 ** Set *pp to point to the byte just after the last byte written before  | 
 |  1489 ** returning (do not modify it if iCol==0). Return the total number of bytes | 
 |  1490 ** written (0 if iCol==0). | 
 |  1491 */ | 
 |  1492 static int fts3PutColNumber(char **pp, int iCol){ | 
 |  1493   int n = 0;                      /* Number of bytes written */ | 
 |  1494   if( iCol ){ | 
 |  1495     char *p = *pp;                /* Output pointer */ | 
 |  1496     n = 1 + sqlite3Fts3PutVarint(&p[1], iCol); | 
 |  1497     *p = 0x01; | 
 |  1498     *pp = &p[n]; | 
 |  1499   } | 
 |  1500   return n; | 
 |  1501 } | 
 |  1502  | 
 |  1503 /* | 
 |  1504 ** Compute the union of two position lists.  The output written | 
 |  1505 ** into *pp contains all positions of both *pp1 and *pp2 in sorted | 
 |  1506 ** order and with any duplicates removed.  All pointers are | 
 |  1507 ** updated appropriately.   The caller is responsible for insuring | 
 |  1508 ** that there is enough space in *pp to hold the complete output. | 
 |  1509 */ | 
 |  1510 static void fts3PoslistMerge( | 
 |  1511   char **pp,                      /* Output buffer */ | 
 |  1512   char **pp1,                     /* Left input list */ | 
 |  1513   char **pp2                      /* Right input list */ | 
 |  1514 ){ | 
 |  1515   char *p = *pp; | 
 |  1516   char *p1 = *pp1; | 
 |  1517   char *p2 = *pp2; | 
 |  1518  | 
 |  1519   while( *p1 || *p2 ){ | 
 |  1520     int iCol1;         /* The current column index in pp1 */ | 
 |  1521     int iCol2;         /* The current column index in pp2 */ | 
 |  1522  | 
 |  1523     if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1); | 
 |  1524     else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; | 
 |  1525     else iCol1 = 0; | 
 |  1526  | 
 |  1527     if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2); | 
 |  1528     else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; | 
 |  1529     else iCol2 = 0; | 
 |  1530  | 
 |  1531     if( iCol1==iCol2 ){ | 
 |  1532       sqlite3_int64 i1 = 0;       /* Last position from pp1 */ | 
 |  1533       sqlite3_int64 i2 = 0;       /* Last position from pp2 */ | 
 |  1534       sqlite3_int64 iPrev = 0; | 
 |  1535       int n = fts3PutColNumber(&p, iCol1); | 
 |  1536       p1 += n; | 
 |  1537       p2 += n; | 
 |  1538  | 
 |  1539       /* At this point, both p1 and p2 point to the start of column-lists | 
 |  1540       ** for the same column (the column with index iCol1 and iCol2). | 
 |  1541       ** A column-list is a list of non-negative delta-encoded varints, each  | 
 |  1542       ** incremented by 2 before being stored. Each list is terminated by a | 
 |  1543       ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists | 
 |  1544       ** and writes the results to buffer p. p is left pointing to the byte | 
 |  1545       ** after the list written. No terminator (POS_END or POS_COLUMN) is | 
 |  1546       ** written to the output. | 
 |  1547       */ | 
 |  1548       fts3GetDeltaVarint(&p1, &i1); | 
 |  1549       fts3GetDeltaVarint(&p2, &i2); | 
 |  1550       do { | 
 |  1551         fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2);  | 
 |  1552         iPrev -= 2; | 
 |  1553         if( i1==i2 ){ | 
 |  1554           fts3ReadNextPos(&p1, &i1); | 
 |  1555           fts3ReadNextPos(&p2, &i2); | 
 |  1556         }else if( i1<i2 ){ | 
 |  1557           fts3ReadNextPos(&p1, &i1); | 
 |  1558         }else{ | 
 |  1559           fts3ReadNextPos(&p2, &i2); | 
 |  1560         } | 
 |  1561       }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END ); | 
 |  1562     }else if( iCol1<iCol2 ){ | 
 |  1563       p1 += fts3PutColNumber(&p, iCol1); | 
 |  1564       fts3ColumnlistCopy(&p, &p1); | 
 |  1565     }else{ | 
 |  1566       p2 += fts3PutColNumber(&p, iCol2); | 
 |  1567       fts3ColumnlistCopy(&p, &p2); | 
 |  1568     } | 
 |  1569   } | 
 |  1570  | 
 |  1571   *p++ = POS_END; | 
 |  1572   *pp = p; | 
 |  1573   *pp1 = p1 + 1; | 
 |  1574   *pp2 = p2 + 1; | 
 |  1575 } | 
 |  1576  | 
 |  1577 /* | 
 |  1578 ** nToken==1 searches for adjacent positions. | 
 |  1579 ** | 
 |  1580 ** This function is used to merge two position lists into one. When it is | 
 |  1581 ** called, *pp1 and *pp2 must both point to position lists. A position-list is | 
 |  1582 ** the part of a doclist that follows each document id. For example, if a row | 
 |  1583 ** contains: | 
 |  1584 ** | 
 |  1585 **     'a b c'|'x y z'|'a b b a' | 
 |  1586 ** | 
 |  1587 ** Then the position list for this row for token 'b' would consist of: | 
 |  1588 ** | 
 |  1589 **     0x02 0x01 0x02 0x03 0x03 0x00 | 
 |  1590 ** | 
 |  1591 ** When this function returns, both *pp1 and *pp2 are left pointing to the | 
 |  1592 ** byte following the 0x00 terminator of their respective position lists. | 
 |  1593 ** | 
 |  1594 ** If isSaveLeft is 0, an entry is added to the output position list for  | 
 |  1595 ** each position in *pp2 for which there exists one or more positions in | 
 |  1596 ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. | 
 |  1597 ** when the *pp1 token appears before the *pp2 token, but not more than nToken | 
 |  1598 ** slots before it. | 
 |  1599 */ | 
 |  1600 static int fts3PoslistPhraseMerge( | 
 |  1601   char **pp,                      /* IN/OUT: Preallocated output buffer */ | 
 |  1602   int nToken,                     /* Maximum difference in token positions */ | 
 |  1603   int isSaveLeft,                 /* Save the left position */ | 
 |  1604   int isExact,                    /* If *pp1 is exactly nTokens before *pp2 */ | 
 |  1605   char **pp1,                     /* IN/OUT: Left input list */ | 
 |  1606   char **pp2                      /* IN/OUT: Right input list */ | 
 |  1607 ){ | 
 |  1608   char *p = (pp ? *pp : 0); | 
 |  1609   char *p1 = *pp1; | 
 |  1610   char *p2 = *pp2; | 
 |  1611   int iCol1 = 0; | 
 |  1612   int iCol2 = 0; | 
 |  1613  | 
 |  1614   /* Never set both isSaveLeft and isExact for the same invocation. */ | 
 |  1615   assert( isSaveLeft==0 || isExact==0 ); | 
 |  1616  | 
 |  1617   assert( *p1!=0 && *p2!=0 ); | 
 |  1618   if( *p1==POS_COLUMN ){  | 
 |  1619     p1++; | 
 |  1620     p1 += sqlite3Fts3GetVarint32(p1, &iCol1); | 
 |  1621   } | 
 |  1622   if( *p2==POS_COLUMN ){  | 
 |  1623     p2++; | 
 |  1624     p2 += sqlite3Fts3GetVarint32(p2, &iCol2); | 
 |  1625   } | 
 |  1626  | 
 |  1627   while( 1 ){ | 
 |  1628     if( iCol1==iCol2 ){ | 
 |  1629       char *pSave = p; | 
 |  1630       sqlite3_int64 iPrev = 0; | 
 |  1631       sqlite3_int64 iPos1 = 0; | 
 |  1632       sqlite3_int64 iPos2 = 0; | 
 |  1633  | 
 |  1634       if( pp && iCol1 ){ | 
 |  1635         *p++ = POS_COLUMN; | 
 |  1636         p += sqlite3Fts3PutVarint(p, iCol1); | 
 |  1637       } | 
 |  1638  | 
 |  1639       assert( *p1!=POS_END && *p1!=POS_COLUMN ); | 
 |  1640       assert( *p2!=POS_END && *p2!=POS_COLUMN ); | 
 |  1641       fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; | 
 |  1642       fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; | 
 |  1643  | 
 |  1644       while( 1 ){ | 
 |  1645         if( iPos2==iPos1+nToken  | 
 |  1646          || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)  | 
 |  1647         ){ | 
 |  1648           sqlite3_int64 iSave; | 
 |  1649           if( !pp ){ | 
 |  1650             fts3PoslistCopy(0, &p2); | 
 |  1651             fts3PoslistCopy(0, &p1); | 
 |  1652             *pp1 = p1; | 
 |  1653             *pp2 = p2; | 
 |  1654             return 1; | 
 |  1655           } | 
 |  1656           iSave = isSaveLeft ? iPos1 : iPos2; | 
 |  1657           fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; | 
 |  1658           pSave = 0; | 
 |  1659         } | 
 |  1660         if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ | 
 |  1661           if( (*p2&0xFE)==0 ) break; | 
 |  1662           fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; | 
 |  1663         }else{ | 
 |  1664           if( (*p1&0xFE)==0 ) break; | 
 |  1665           fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2; | 
 |  1666         } | 
 |  1667       } | 
 |  1668  | 
 |  1669       if( pSave ){ | 
 |  1670         assert( pp && p ); | 
 |  1671         p = pSave; | 
 |  1672       } | 
 |  1673  | 
 |  1674       fts3ColumnlistCopy(0, &p1); | 
 |  1675       fts3ColumnlistCopy(0, &p2); | 
 |  1676       assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 ); | 
 |  1677       if( 0==*p1 || 0==*p2 ) break; | 
 |  1678  | 
 |  1679       p1++; | 
 |  1680       p1 += sqlite3Fts3GetVarint32(p1, &iCol1); | 
 |  1681       p2++; | 
 |  1682       p2 += sqlite3Fts3GetVarint32(p2, &iCol2); | 
 |  1683     } | 
 |  1684  | 
 |  1685     /* Advance pointer p1 or p2 (whichever corresponds to the smaller of | 
 |  1686     ** iCol1 and iCol2) so that it points to either the 0x00 that marks the | 
 |  1687     ** end of the position list, or the 0x01 that precedes the next  | 
 |  1688     ** column-number in the position list.  | 
 |  1689     */ | 
 |  1690     else if( iCol1<iCol2 ){ | 
 |  1691       fts3ColumnlistCopy(0, &p1); | 
 |  1692       if( 0==*p1 ) break; | 
 |  1693       p1++; | 
 |  1694       p1 += sqlite3Fts3GetVarint32(p1, &iCol1); | 
 |  1695     }else{ | 
 |  1696       fts3ColumnlistCopy(0, &p2); | 
 |  1697       if( 0==*p2 ) break; | 
 |  1698       p2++; | 
 |  1699       p2 += sqlite3Fts3GetVarint32(p2, &iCol2); | 
 |  1700     } | 
 |  1701   } | 
 |  1702  | 
 |  1703   fts3PoslistCopy(0, &p2); | 
 |  1704   fts3PoslistCopy(0, &p1); | 
 |  1705   *pp1 = p1; | 
 |  1706   *pp2 = p2; | 
 |  1707   if( !pp || *pp==p ){ | 
 |  1708     return 0; | 
 |  1709   } | 
 |  1710   *p++ = 0x00; | 
 |  1711   *pp = p; | 
 |  1712   return 1; | 
 |  1713 } | 
 |  1714  | 
 |  1715 /* | 
 |  1716 ** Merge two position-lists as required by the NEAR operator. | 
 |  1717 */ | 
 |  1718 static int fts3PoslistNearMerge( | 
 |  1719   char **pp,                      /* Output buffer */ | 
 |  1720   char *aTmp,                     /* Temporary buffer space */ | 
 |  1721   int nRight,                     /* Maximum difference in token positions */ | 
 |  1722   int nLeft,                      /* Maximum difference in token positions */ | 
 |  1723   char **pp1,                     /* IN/OUT: Left input list */ | 
 |  1724   char **pp2                      /* IN/OUT: Right input list */ | 
 |  1725 ){ | 
 |  1726   char *p1 = *pp1; | 
 |  1727   char *p2 = *pp2; | 
 |  1728  | 
 |  1729   if( !pp ){ | 
 |  1730     if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1; | 
 |  1731     *pp1 = p1; | 
 |  1732     *pp2 = p2; | 
 |  1733     return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1); | 
 |  1734   }else{ | 
 |  1735     char *pTmp1 = aTmp; | 
 |  1736     char *pTmp2; | 
 |  1737     char *aTmp2; | 
 |  1738     int res = 1; | 
 |  1739  | 
 |  1740     fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); | 
 |  1741     aTmp2 = pTmp2 = pTmp1; | 
 |  1742     *pp1 = p1; | 
 |  1743     *pp2 = p2; | 
 |  1744     fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); | 
 |  1745     if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ | 
 |  1746       fts3PoslistMerge(pp, &aTmp, &aTmp2); | 
 |  1747     }else if( pTmp1!=aTmp ){ | 
 |  1748       fts3PoslistCopy(pp, &aTmp); | 
 |  1749     }else if( pTmp2!=aTmp2 ){ | 
 |  1750       fts3PoslistCopy(pp, &aTmp2); | 
 |  1751     }else{ | 
 |  1752       res = 0; | 
 |  1753     } | 
 |  1754  | 
 |  1755     return res; | 
 |  1756   } | 
 |  1757 } | 
 |  1758  | 
 |  1759 /* | 
 |  1760 ** Values that may be used as the first parameter to fts3DoclistMerge(). | 
 |  1761 */ | 
 |  1762 #define MERGE_NOT        2        /* D + D -> D */ | 
 |  1763 #define MERGE_AND        3        /* D + D -> D */ | 
 |  1764 #define MERGE_OR         4        /* D + D -> D */ | 
 |  1765 #define MERGE_POS_OR     5        /* P + P -> P */ | 
 |  1766 #define MERGE_PHRASE     6        /* P + P -> D */ | 
 |  1767 #define MERGE_POS_PHRASE 7        /* P + P -> P */ | 
 |  1768 #define MERGE_NEAR       8        /* P + P -> D */ | 
 |  1769 #define MERGE_POS_NEAR   9        /* P + P -> P */ | 
 |  1770  | 
 |  1771 /* | 
 |  1772 ** Merge the two doclists passed in buffer a1 (size n1 bytes) and a2 | 
 |  1773 ** (size n2 bytes). The output is written to pre-allocated buffer aBuffer, | 
 |  1774 ** which is guaranteed to be large enough to hold the results. The number | 
 |  1775 ** of bytes written to aBuffer is stored in *pnBuffer before returning. | 
 |  1776 ** | 
 |  1777 ** If successful, SQLITE_OK is returned. Otherwise, if a malloc error | 
 |  1778 ** occurs while allocating a temporary buffer as part of the merge operation, | 
 |  1779 ** SQLITE_NOMEM is returned. | 
 |  1780 */ | 
 |  1781 static int fts3DoclistMerge( | 
 |  1782   int mergetype,                  /* One of the MERGE_XXX constants */ | 
 |  1783   int nParam1,                    /* Used by MERGE_NEAR and MERGE_POS_NEAR */ | 
 |  1784   int nParam2,                    /* Used by MERGE_NEAR and MERGE_POS_NEAR */ | 
 |  1785   char *aBuffer,                  /* Pre-allocated output buffer */ | 
 |  1786   int *pnBuffer,                  /* OUT: Bytes written to aBuffer */ | 
 |  1787   char *a1,                       /* Buffer containing first doclist */ | 
 |  1788   int n1,                         /* Size of buffer a1 */ | 
 |  1789   char *a2,                       /* Buffer containing second doclist */ | 
 |  1790   int n2,                         /* Size of buffer a2 */ | 
 |  1791   int *pnDoc                      /* OUT: Number of docids in output */ | 
 |  1792 ){ | 
 |  1793   sqlite3_int64 i1 = 0; | 
 |  1794   sqlite3_int64 i2 = 0; | 
 |  1795   sqlite3_int64 iPrev = 0; | 
 |  1796  | 
 |  1797   char *p = aBuffer; | 
 |  1798   char *p1 = a1; | 
 |  1799   char *p2 = a2; | 
 |  1800   char *pEnd1 = &a1[n1]; | 
 |  1801   char *pEnd2 = &a2[n2]; | 
 |  1802   int nDoc = 0; | 
 |  1803  | 
 |  1804   assert( mergetype==MERGE_OR     || mergetype==MERGE_POS_OR  | 
 |  1805        || mergetype==MERGE_AND    || mergetype==MERGE_NOT | 
 |  1806        || mergetype==MERGE_PHRASE || mergetype==MERGE_POS_PHRASE | 
 |  1807        || mergetype==MERGE_NEAR   || mergetype==MERGE_POS_NEAR | 
 |  1808   ); | 
 |  1809  | 
 |  1810   if( !aBuffer ){ | 
 |  1811     *pnBuffer = 0; | 
 |  1812     return SQLITE_NOMEM; | 
 |  1813   } | 
 |  1814  | 
 |  1815   /* Read the first docid from each doclist */ | 
 |  1816   fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1817   fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1818  | 
 |  1819   switch( mergetype ){ | 
 |  1820     case MERGE_OR: | 
 |  1821     case MERGE_POS_OR: | 
 |  1822       while( p1 || p2 ){ | 
 |  1823         if( p2 && p1 && i1==i2 ){ | 
 |  1824           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1825           if( mergetype==MERGE_POS_OR ) fts3PoslistMerge(&p, &p1, &p2); | 
 |  1826           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1827           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1828         }else if( !p2 || (p1 && i1<i2) ){ | 
 |  1829           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1830           if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p1); | 
 |  1831           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1832         }else{ | 
 |  1833           fts3PutDeltaVarint(&p, &iPrev, i2); | 
 |  1834           if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p2); | 
 |  1835           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1836         } | 
 |  1837       } | 
 |  1838       break; | 
 |  1839  | 
 |  1840     case MERGE_AND: | 
 |  1841       while( p1 && p2 ){ | 
 |  1842         if( i1==i2 ){ | 
 |  1843           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1844           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1845           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1846           nDoc++; | 
 |  1847         }else if( i1<i2 ){ | 
 |  1848           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1849         }else{ | 
 |  1850           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1851         } | 
 |  1852       } | 
 |  1853       break; | 
 |  1854  | 
 |  1855     case MERGE_NOT: | 
 |  1856       while( p1 ){ | 
 |  1857         if( p2 && i1==i2 ){ | 
 |  1858           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1859           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1860         }else if( !p2 || i1<i2 ){ | 
 |  1861           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1862           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1863         }else{ | 
 |  1864           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1865         } | 
 |  1866       } | 
 |  1867       break; | 
 |  1868  | 
 |  1869     case MERGE_POS_PHRASE: | 
 |  1870     case MERGE_PHRASE: { | 
 |  1871       char **ppPos = (mergetype==MERGE_PHRASE ? 0 : &p); | 
 |  1872       while( p1 && p2 ){ | 
 |  1873         if( i1==i2 ){ | 
 |  1874           char *pSave = p; | 
 |  1875           sqlite3_int64 iPrevSave = iPrev; | 
 |  1876           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1877           if( 0==fts3PoslistPhraseMerge(ppPos, nParam1, 0, 1, &p1, &p2) ){ | 
 |  1878             p = pSave; | 
 |  1879             iPrev = iPrevSave; | 
 |  1880           }else{ | 
 |  1881             nDoc++; | 
 |  1882           } | 
 |  1883           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1884           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1885         }else if( i1<i2 ){ | 
 |  1886           fts3PoslistCopy(0, &p1); | 
 |  1887           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1888         }else{ | 
 |  1889           fts3PoslistCopy(0, &p2); | 
 |  1890           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1891         } | 
 |  1892       } | 
 |  1893       break; | 
 |  1894     } | 
 |  1895  | 
 |  1896     default: assert( mergetype==MERGE_POS_NEAR || mergetype==MERGE_NEAR ); { | 
 |  1897       char *aTmp = 0; | 
 |  1898       char **ppPos = 0; | 
 |  1899  | 
 |  1900       if( mergetype==MERGE_POS_NEAR ){ | 
 |  1901         ppPos = &p; | 
 |  1902         aTmp = sqlite3_malloc(2*(n1+n2+1)); | 
 |  1903         if( !aTmp ){ | 
 |  1904           return SQLITE_NOMEM; | 
 |  1905         } | 
 |  1906       } | 
 |  1907  | 
 |  1908       while( p1 && p2 ){ | 
 |  1909         if( i1==i2 ){ | 
 |  1910           char *pSave = p; | 
 |  1911           sqlite3_int64 iPrevSave = iPrev; | 
 |  1912           fts3PutDeltaVarint(&p, &iPrev, i1); | 
 |  1913  | 
 |  1914           if( !fts3PoslistNearMerge(ppPos, aTmp, nParam1, nParam2, &p1, &p2) ){ | 
 |  1915             iPrev = iPrevSave; | 
 |  1916             p = pSave; | 
 |  1917           } | 
 |  1918  | 
 |  1919           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1920           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1921         }else if( i1<i2 ){ | 
 |  1922           fts3PoslistCopy(0, &p1); | 
 |  1923           fts3GetDeltaVarint2(&p1, pEnd1, &i1); | 
 |  1924         }else{ | 
 |  1925           fts3PoslistCopy(0, &p2); | 
 |  1926           fts3GetDeltaVarint2(&p2, pEnd2, &i2); | 
 |  1927         } | 
 |  1928       } | 
 |  1929       sqlite3_free(aTmp); | 
 |  1930       break; | 
 |  1931     } | 
 |  1932   } | 
 |  1933  | 
 |  1934   if( pnDoc ) *pnDoc = nDoc; | 
 |  1935   *pnBuffer = (int)(p-aBuffer); | 
 |  1936   return SQLITE_OK; | 
 |  1937 } | 
 |  1938  | 
 |  1939 /*  | 
 |  1940 ** A pointer to an instance of this structure is used as the context  | 
 |  1941 ** argument to sqlite3Fts3SegReaderIterate() | 
 |  1942 */ | 
 |  1943 typedef struct TermSelect TermSelect; | 
 |  1944 struct TermSelect { | 
 |  1945   int isReqPos; | 
 |  1946   char *aaOutput[16];             /* Malloc'd output buffer */ | 
 |  1947   int anOutput[16];               /* Size of output in bytes */ | 
 |  1948 }; | 
 |  1949  | 
 |  1950 /* | 
 |  1951 ** Merge all doclists in the TermSelect.aaOutput[] array into a single | 
 |  1952 ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all | 
 |  1953 ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. | 
 |  1954 ** | 
 |  1955 ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is | 
 |  1956 ** the responsibility of the caller to free any doclists left in the | 
 |  1957 ** TermSelect.aaOutput[] array. | 
 |  1958 */ | 
 |  1959 static int fts3TermSelectMerge(TermSelect *pTS){ | 
 |  1960   int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); | 
 |  1961   char *aOut = 0; | 
 |  1962   int nOut = 0; | 
 |  1963   int i; | 
 |  1964  | 
 |  1965   /* Loop through the doclists in the aaOutput[] array. Merge them all | 
 |  1966   ** into a single doclist. | 
 |  1967   */ | 
 |  1968   for(i=0; i<SizeofArray(pTS->aaOutput); i++){ | 
 |  1969     if( pTS->aaOutput[i] ){ | 
 |  1970       if( !aOut ){ | 
 |  1971         aOut = pTS->aaOutput[i]; | 
 |  1972         nOut = pTS->anOutput[i]; | 
 |  1973         pTS->aaOutput[i] = 0; | 
 |  1974       }else{ | 
 |  1975         int nNew = nOut + pTS->anOutput[i]; | 
 |  1976         char *aNew = sqlite3_malloc(nNew); | 
 |  1977         if( !aNew ){ | 
 |  1978           sqlite3_free(aOut); | 
 |  1979           return SQLITE_NOMEM; | 
 |  1980         } | 
 |  1981         fts3DoclistMerge(mergetype, 0, 0, | 
 |  1982             aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, 0 | 
 |  1983         ); | 
 |  1984         sqlite3_free(pTS->aaOutput[i]); | 
 |  1985         sqlite3_free(aOut); | 
 |  1986         pTS->aaOutput[i] = 0; | 
 |  1987         aOut = aNew; | 
 |  1988         nOut = nNew; | 
 |  1989       } | 
 |  1990     } | 
 |  1991   } | 
 |  1992  | 
 |  1993   pTS->aaOutput[0] = aOut; | 
 |  1994   pTS->anOutput[0] = nOut; | 
 |  1995   return SQLITE_OK; | 
 |  1996 } | 
 |  1997  | 
 |  1998 /* | 
 |  1999 ** This function is used as the sqlite3Fts3SegReaderIterate() callback when | 
 |  2000 ** querying the full-text index for a doclist associated with a term or | 
 |  2001 ** term-prefix. | 
 |  2002 */ | 
 |  2003 static int fts3TermSelectCb( | 
 |  2004   Fts3Table *p,                   /* Virtual table object */ | 
 |  2005   void *pContext,                 /* Pointer to TermSelect structure */ | 
 |  2006   char *zTerm, | 
 |  2007   int nTerm, | 
 |  2008   char *aDoclist, | 
 |  2009   int nDoclist | 
 |  2010 ){ | 
 |  2011   TermSelect *pTS = (TermSelect *)pContext; | 
 |  2012  | 
 |  2013   UNUSED_PARAMETER(p); | 
 |  2014   UNUSED_PARAMETER(zTerm); | 
 |  2015   UNUSED_PARAMETER(nTerm); | 
 |  2016  | 
 |  2017   if( pTS->aaOutput[0]==0 ){ | 
 |  2018     /* If this is the first term selected, copy the doclist to the output | 
 |  2019     ** buffer using memcpy(). TODO: Add a way to transfer control of the | 
 |  2020     ** aDoclist buffer from the caller so as to avoid the memcpy(). | 
 |  2021     */ | 
 |  2022     pTS->aaOutput[0] = sqlite3_malloc(nDoclist); | 
 |  2023     pTS->anOutput[0] = nDoclist; | 
 |  2024     if( pTS->aaOutput[0] ){ | 
 |  2025       memcpy(pTS->aaOutput[0], aDoclist, nDoclist); | 
 |  2026     }else{ | 
 |  2027       return SQLITE_NOMEM; | 
 |  2028     } | 
 |  2029   }else{ | 
 |  2030     int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); | 
 |  2031     char *aMerge = aDoclist; | 
 |  2032     int nMerge = nDoclist; | 
 |  2033     int iOut; | 
 |  2034  | 
 |  2035     for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ | 
 |  2036       char *aNew; | 
 |  2037       int nNew; | 
 |  2038       if( pTS->aaOutput[iOut]==0 ){ | 
 |  2039         assert( iOut>0 ); | 
 |  2040         pTS->aaOutput[iOut] = aMerge; | 
 |  2041         pTS->anOutput[iOut] = nMerge; | 
|  2810         break; |  2042         break; | 
|  2811       } |  2043       } | 
 |  2044  | 
 |  2045       nNew = nMerge + pTS->anOutput[iOut]; | 
 |  2046       aNew = sqlite3_malloc(nNew); | 
 |  2047       if( !aNew ){ | 
 |  2048         if( aMerge!=aDoclist ){ | 
 |  2049           sqlite3_free(aMerge); | 
 |  2050         } | 
 |  2051         return SQLITE_NOMEM; | 
 |  2052       } | 
 |  2053       fts3DoclistMerge(mergetype, 0, 0, aNew, &nNew,  | 
 |  2054           pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge, 0 | 
 |  2055       ); | 
 |  2056  | 
 |  2057       if( iOut>0 ) sqlite3_free(aMerge); | 
 |  2058       sqlite3_free(pTS->aaOutput[iOut]); | 
 |  2059       pTS->aaOutput[iOut] = 0; | 
 |  2060  | 
 |  2061       aMerge = aNew; | 
 |  2062       nMerge = nNew; | 
 |  2063       if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ | 
 |  2064         pTS->aaOutput[iOut] = aMerge; | 
 |  2065         pTS->anOutput[iOut] = nMerge; | 
 |  2066       } | 
 |  2067     } | 
 |  2068   } | 
 |  2069   return SQLITE_OK; | 
 |  2070 } | 
 |  2071  | 
 |  2072 static int fts3DeferredTermSelect( | 
 |  2073   Fts3DeferredToken *pToken,      /* Phrase token */ | 
 |  2074   int isTermPos,                  /* True to include positions */ | 
 |  2075   int *pnOut,                     /* OUT: Size of list */ | 
 |  2076   char **ppOut                    /* OUT: Body of list */ | 
 |  2077 ){ | 
 |  2078   char *aSource; | 
 |  2079   int nSource; | 
 |  2080  | 
 |  2081   aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource); | 
 |  2082   if( !aSource ){ | 
 |  2083     *pnOut = 0; | 
 |  2084     *ppOut = 0; | 
 |  2085   }else if( isTermPos ){ | 
 |  2086     *ppOut = sqlite3_malloc(nSource); | 
 |  2087     if( !*ppOut ) return SQLITE_NOMEM; | 
 |  2088     memcpy(*ppOut, aSource, nSource); | 
 |  2089     *pnOut = nSource; | 
 |  2090   }else{ | 
 |  2091     sqlite3_int64 docid; | 
 |  2092     *pnOut = sqlite3Fts3GetVarint(aSource, &docid); | 
 |  2093     *ppOut = sqlite3_malloc(*pnOut); | 
 |  2094     if( !*ppOut ) return SQLITE_NOMEM; | 
 |  2095     sqlite3Fts3PutVarint(*ppOut, docid); | 
 |  2096   } | 
 |  2097  | 
 |  2098   return SQLITE_OK; | 
 |  2099 } | 
 |  2100  | 
 |  2101 int sqlite3Fts3SegReaderCursor( | 
 |  2102   Fts3Table *p,                   /* FTS3 table handle */ | 
 |  2103   int iLevel,                     /* Level of segments to scan */ | 
 |  2104   const char *zTerm,              /* Term to query for */ | 
 |  2105   int nTerm,                      /* Size of zTerm in bytes */ | 
 |  2106   int isPrefix,                   /* True for a prefix search */ | 
 |  2107   int isScan,                     /* True to scan from zTerm to EOF */ | 
 |  2108   Fts3SegReaderCursor *pCsr       /* Cursor object to populate */ | 
 |  2109 ){ | 
 |  2110   int rc = SQLITE_OK; | 
 |  2111   int rc2; | 
 |  2112   int iAge = 0; | 
 |  2113   sqlite3_stmt *pStmt = 0; | 
 |  2114   Fts3SegReader *pPending = 0; | 
 |  2115  | 
 |  2116   assert( iLevel==FTS3_SEGCURSOR_ALL  | 
 |  2117       ||  iLevel==FTS3_SEGCURSOR_PENDING  | 
 |  2118       ||  iLevel>=0 | 
 |  2119   ); | 
 |  2120   assert( FTS3_SEGCURSOR_PENDING<0 ); | 
 |  2121   assert( FTS3_SEGCURSOR_ALL<0 ); | 
 |  2122   assert( iLevel==FTS3_SEGCURSOR_ALL || (zTerm==0 && isPrefix==1) ); | 
 |  2123   assert( isPrefix==0 || isScan==0 ); | 
 |  2124  | 
 |  2125  | 
 |  2126   memset(pCsr, 0, sizeof(Fts3SegReaderCursor)); | 
 |  2127  | 
 |  2128   /* If iLevel is less than 0, include a seg-reader for the pending-terms. */ | 
 |  2129   assert( isScan==0 || fts3HashCount(&p->pendingTerms)==0 ); | 
 |  2130   if( iLevel<0 && isScan==0 ){ | 
 |  2131     rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pPending); | 
 |  2132     if( rc==SQLITE_OK && pPending ){ | 
 |  2133       int nByte = (sizeof(Fts3SegReader *) * 16); | 
 |  2134       pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); | 
 |  2135       if( pCsr->apSegment==0 ){ | 
 |  2136         rc = SQLITE_NOMEM; | 
 |  2137       }else{ | 
 |  2138         pCsr->apSegment[0] = pPending; | 
 |  2139         pCsr->nSegment = 1; | 
 |  2140         pPending = 0; | 
 |  2141       } | 
 |  2142     } | 
 |  2143   } | 
 |  2144  | 
 |  2145   if( iLevel!=FTS3_SEGCURSOR_PENDING ){ | 
 |  2146     if( rc==SQLITE_OK ){ | 
 |  2147       rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt); | 
 |  2148     } | 
 |  2149     while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ | 
 |  2150  | 
 |  2151       /* Read the values returned by the SELECT into local variables. */ | 
 |  2152       sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); | 
 |  2153       sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2); | 
 |  2154       sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3); | 
 |  2155       int nRoot = sqlite3_column_bytes(pStmt, 4); | 
 |  2156       char const *zRoot = sqlite3_column_blob(pStmt, 4); | 
 |  2157  | 
 |  2158       /* If nSegment is a multiple of 16 the array needs to be extended. */ | 
 |  2159       if( (pCsr->nSegment%16)==0 ){ | 
 |  2160         Fts3SegReader **apNew; | 
 |  2161         int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); | 
 |  2162         apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); | 
 |  2163         if( !apNew ){ | 
 |  2164           rc = SQLITE_NOMEM; | 
 |  2165           goto finished; | 
 |  2166         } | 
 |  2167         pCsr->apSegment = apNew; | 
 |  2168       } | 
 |  2169  | 
 |  2170       /* If zTerm is not NULL, and this segment is not stored entirely on its | 
 |  2171       ** root node, the range of leaves scanned can be reduced. Do this. */ | 
 |  2172       if( iStartBlock && zTerm ){ | 
 |  2173         sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0); | 
 |  2174         rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi); | 
 |  2175         if( rc!=SQLITE_OK ) goto finished; | 
 |  2176         if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; | 
 |  2177       } | 
 |  2178   | 
 |  2179       rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock, | 
 |  2180           iEndBlock, zRoot, nRoot, &pCsr->apSegment[pCsr->nSegment] | 
 |  2181       ); | 
 |  2182       if( rc!=SQLITE_OK ) goto finished; | 
 |  2183       pCsr->nSegment++; | 
 |  2184       iAge++; | 
 |  2185     } | 
 |  2186   } | 
 |  2187  | 
 |  2188  finished: | 
 |  2189   rc2 = sqlite3_reset(pStmt); | 
 |  2190   if( rc==SQLITE_DONE ) rc = rc2; | 
 |  2191   sqlite3Fts3SegReaderFree(pPending); | 
 |  2192  | 
 |  2193   return rc; | 
 |  2194 } | 
 |  2195  | 
 |  2196  | 
 |  2197 static int fts3TermSegReaderCursor( | 
 |  2198   Fts3Cursor *pCsr,               /* Virtual table cursor handle */ | 
 |  2199   const char *zTerm,              /* Term to query for */ | 
 |  2200   int nTerm,                      /* Size of zTerm in bytes */ | 
 |  2201   int isPrefix,                   /* True for a prefix search */ | 
 |  2202   Fts3SegReaderCursor **ppSegcsr  /* OUT: Allocated seg-reader cursor */ | 
 |  2203 ){ | 
 |  2204   Fts3SegReaderCursor *pSegcsr;   /* Object to allocate and return */ | 
 |  2205   int rc = SQLITE_NOMEM;          /* Return code */ | 
 |  2206  | 
 |  2207   pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor)); | 
 |  2208   if( pSegcsr ){ | 
 |  2209     Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; | 
 |  2210     int i; | 
 |  2211     int nCost = 0; | 
 |  2212     rc = sqlite3Fts3SegReaderCursor( | 
 |  2213         p, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr); | 
 |  2214    | 
 |  2215     for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){ | 
 |  2216       rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); | 
 |  2217     } | 
 |  2218     pSegcsr->nCost = nCost; | 
 |  2219   } | 
 |  2220  | 
 |  2221   *ppSegcsr = pSegcsr; | 
 |  2222   return rc; | 
 |  2223 } | 
 |  2224  | 
 |  2225 static void fts3SegReaderCursorFree(Fts3SegReaderCursor *pSegcsr){ | 
 |  2226   sqlite3Fts3SegReaderFinish(pSegcsr); | 
 |  2227   sqlite3_free(pSegcsr); | 
 |  2228 } | 
 |  2229  | 
 |  2230 /* | 
 |  2231 ** This function retreives the doclist for the specified term (or term | 
 |  2232 ** prefix) from the database.  | 
 |  2233 ** | 
 |  2234 ** The returned doclist may be in one of two formats, depending on the  | 
 |  2235 ** value of parameter isReqPos. If isReqPos is zero, then the doclist is | 
 |  2236 ** a sorted list of delta-compressed docids (a bare doclist). If isReqPos | 
 |  2237 ** is non-zero, then the returned list is in the same format as is stored  | 
 |  2238 ** in the database without the found length specifier at the start of on-disk | 
 |  2239 ** doclists. | 
 |  2240 */ | 
 |  2241 static int fts3TermSelect( | 
 |  2242   Fts3Table *p,                   /* Virtual table handle */ | 
 |  2243   Fts3PhraseToken *pTok,          /* Token to query for */ | 
 |  2244   int iColumn,                    /* Column to query (or -ve for all columns) */ | 
 |  2245   int isReqPos,                   /* True to include position lists in output */ | 
 |  2246   int *pnOut,                     /* OUT: Size of buffer at *ppOut */ | 
 |  2247   char **ppOut                    /* OUT: Malloced result buffer */ | 
 |  2248 ){ | 
 |  2249   int rc;                         /* Return code */ | 
 |  2250   Fts3SegReaderCursor *pSegcsr;   /* Seg-reader cursor for this term */ | 
 |  2251   TermSelect tsc;                 /* Context object for fts3TermSelectCb() */ | 
 |  2252   Fts3SegFilter filter;           /* Segment term filter configuration */ | 
 |  2253  | 
 |  2254   pSegcsr = pTok->pSegcsr; | 
 |  2255   memset(&tsc, 0, sizeof(TermSelect)); | 
 |  2256   tsc.isReqPos = isReqPos; | 
 |  2257  | 
 |  2258   filter.flags = FTS3_SEGMENT_IGNORE_EMPTY  | 
 |  2259         | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | 
 |  2260         | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) | 
 |  2261         | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); | 
 |  2262   filter.iCol = iColumn; | 
 |  2263   filter.zTerm = pTok->z; | 
 |  2264   filter.nTerm = pTok->n; | 
 |  2265  | 
 |  2266   rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter); | 
 |  2267   while( SQLITE_OK==rc | 
 |  2268       && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))  | 
 |  2269   ){ | 
 |  2270     rc = fts3TermSelectCb(p, (void *)&tsc,  | 
 |  2271         pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist | 
 |  2272     ); | 
 |  2273   } | 
 |  2274  | 
 |  2275   if( rc==SQLITE_OK ){ | 
 |  2276     rc = fts3TermSelectMerge(&tsc); | 
 |  2277   } | 
 |  2278   if( rc==SQLITE_OK ){ | 
 |  2279     *ppOut = tsc.aaOutput[0]; | 
 |  2280     *pnOut = tsc.anOutput[0]; | 
 |  2281   }else{ | 
 |  2282     int i; | 
 |  2283     for(i=0; i<SizeofArray(tsc.aaOutput); i++){ | 
 |  2284       sqlite3_free(tsc.aaOutput[i]); | 
 |  2285     } | 
 |  2286   } | 
 |  2287  | 
 |  2288   fts3SegReaderCursorFree(pSegcsr); | 
 |  2289   pTok->pSegcsr = 0; | 
 |  2290   return rc; | 
 |  2291 } | 
 |  2292  | 
 |  2293 /* | 
 |  2294 ** This function counts the total number of docids in the doclist stored | 
 |  2295 ** in buffer aList[], size nList bytes. | 
 |  2296 ** | 
 |  2297 ** If the isPoslist argument is true, then it is assumed that the doclist | 
 |  2298 ** contains a position-list following each docid. Otherwise, it is assumed | 
 |  2299 ** that the doclist is simply a list of docids stored as delta encoded  | 
 |  2300 ** varints. | 
 |  2301 */ | 
 |  2302 static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ | 
 |  2303   int nDoc = 0;                   /* Return value */ | 
 |  2304   if( aList ){ | 
 |  2305     char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */ | 
 |  2306     char *p = aList;              /* Cursor */ | 
 |  2307     if( !isPoslist ){ | 
 |  2308       /* The number of docids in the list is the same as the number of  | 
 |  2309       ** varints. In FTS3 a varint consists of a single byte with the 0x80  | 
 |  2310       ** bit cleared and zero or more bytes with the 0x80 bit set. So to | 
 |  2311       ** count the varints in the buffer, just count the number of bytes | 
 |  2312       ** with the 0x80 bit clear.  */ | 
 |  2313       while( p<aEnd ) nDoc += (((*p++)&0x80)==0); | 
|  2812     }else{ |  2314     }else{ | 
|  2813       z[j++] = z[i]; |  2315       while( p<aEnd ){ | 
|  2814     } |  2316         nDoc++; | 
|  2815   } |  2317         while( (*p++)&0x80 );     /* Skip docid varint */ | 
|  2816 } |  2318         fts3PoslistCopy(0, &p);   /* Skip over position list */ | 
|  2817  |  2319       } | 
|  2818 /* |  2320     } | 
|  2819 ** The input azIn is a NULL-terminated list of tokens.  Remove the first |  2321   } | 
|  2820 ** token and all punctuation tokens.  Remove the quotes from |  2322  | 
|  2821 ** around string literal tokens. |  2323   return nDoc; | 
|  2822 ** |  2324 } | 
|  2823 ** Example: |  2325  | 
|  2824 ** |  2326 /* | 
|  2825 **     input:      tokenize chinese ( 'simplifed' , 'mixed' ) |  2327 ** Call sqlite3Fts3DeferToken() for each token in the expression pExpr. | 
|  2826 **     output:     chinese simplifed mixed |  2328 */ | 
|  2827 ** |  2329 static int fts3DeferExpression(Fts3Cursor *pCsr, Fts3Expr *pExpr){ | 
|  2828 ** Another example: |  2330   int rc = SQLITE_OK; | 
|  2829 ** |  2331   if( pExpr ){ | 
|  2830 **     input:      delimiters ( '[' , ']' , '...' ) |  2332     rc = fts3DeferExpression(pCsr, pExpr->pLeft); | 
|  2831 **     output:     [ ] ... |  2333     if( rc==SQLITE_OK ){ | 
|  2832 */ |  2334       rc = fts3DeferExpression(pCsr, pExpr->pRight); | 
|  2833 static void tokenListToIdList(char **azIn){ |  2335     } | 
|  2834   int i, j; |  2336     if( pExpr->eType==FTSQUERY_PHRASE ){ | 
|  2835   if( azIn ){ |  2337       int iCol = pExpr->pPhrase->iColumn; | 
|  2836     for(i=0, j=-1; azIn[i]; i++){ |  2338       int i; | 
|  2837       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){ |  2339       for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){ | 
|  2838         dequoteString(azIn[i]); |  2340         Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; | 
|  2839         if( j>=0 ){ |  2341         if( pToken->pDeferred==0 ){ | 
|  2840           azIn[j] = azIn[i]; |  2342           rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); | 
|  2841         } |  2343         } | 
|  2842         j++; |  2344       } | 
|  2843       } |  2345     } | 
|  2844     } |  2346   } | 
|  2845     azIn[j] = 0; |  2347   return rc; | 
|  2846   } |  2348 } | 
|  2847 } |  2349  | 
|  2848  |  2350 /* | 
|  2849  |  2351 ** This function removes the position information from a doclist. When | 
|  2850 /* |  2352 ** called, buffer aList (size *pnList bytes) contains a doclist that includes | 
|  2851 ** Find the first alphanumeric token in the string zIn.  Null-terminate |  2353 ** position information. This function removes the position information so | 
|  2852 ** this token.  Remove any quotation marks.  And return a pointer to |  2354 ** that aList contains only docids, and adjusts *pnList to reflect the new | 
|  2853 ** the result. |  2355 ** (possibly reduced) size of the doclist. | 
|  2854 */ |  2356 */ | 
|  2855 static char *firstToken(char *zIn, char **pzTail){ |  2357 static void fts3DoclistStripPositions( | 
|  2856   int n, ttype; |  2358   char *aList,                    /* IN/OUT: Buffer containing doclist */ | 
|  2857   while(1){ |  2359   int *pnList                     /* IN/OUT: Size of doclist in bytes */ | 
|  2858     n = ftsGetToken(zIn, &ttype); |  2360 ){ | 
|  2859     if( ttype==TOKEN_SPACE ){ |  2361   if( aList ){ | 
|  2860       zIn += n; |  2362     char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */ | 
|  2861     }else if( ttype==TOKEN_EOF ){ |  2363     char *p = aList;              /* Input cursor */ | 
|  2862       *pzTail = zIn; |  2364     char *pOut = aList;           /* Output cursor */ | 
|  2863       return 0; |  2365    | 
 |  2366     while( p<aEnd ){ | 
 |  2367       sqlite3_int64 delta; | 
 |  2368       p += sqlite3Fts3GetVarint(p, &delta); | 
 |  2369       fts3PoslistCopy(0, &p); | 
 |  2370       pOut += sqlite3Fts3PutVarint(pOut, delta); | 
 |  2371     } | 
 |  2372  | 
 |  2373     *pnList = (int)(pOut - aList); | 
 |  2374   } | 
 |  2375 } | 
 |  2376  | 
 |  2377 /*  | 
 |  2378 ** Return a DocList corresponding to the phrase *pPhrase. | 
 |  2379 ** | 
 |  2380 ** If this function returns SQLITE_OK, but *pnOut is set to a negative value, | 
 |  2381 ** then no tokens in the phrase were looked up in the full-text index. This | 
 |  2382 ** is only possible when this function is called from within xFilter(). The | 
 |  2383 ** caller should assume that all documents match the phrase. The actual | 
 |  2384 ** filtering will take place in xNext(). | 
 |  2385 */ | 
 |  2386 static int fts3PhraseSelect( | 
 |  2387   Fts3Cursor *pCsr,               /* Virtual table cursor handle */ | 
 |  2388   Fts3Phrase *pPhrase,            /* Phrase to return a doclist for */ | 
 |  2389   int isReqPos,                   /* True if output should contain positions */ | 
 |  2390   char **paOut,                   /* OUT: Pointer to malloc'd result buffer */ | 
 |  2391   int *pnOut                      /* OUT: Size of buffer at *paOut */ | 
 |  2392 ){ | 
 |  2393   char *pOut = 0; | 
 |  2394   int nOut = 0; | 
 |  2395   int rc = SQLITE_OK; | 
 |  2396   int ii; | 
 |  2397   int iCol = pPhrase->iColumn; | 
 |  2398   int isTermPos = (pPhrase->nToken>1 || isReqPos); | 
 |  2399   Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; | 
 |  2400   int isFirst = 1; | 
 |  2401  | 
 |  2402   int iPrevTok = 0; | 
 |  2403   int nDoc = 0; | 
 |  2404  | 
 |  2405   /* If this is an xFilter() evaluation, create a segment-reader for each | 
 |  2406   ** phrase token. Or, if this is an xNext() or snippet/offsets/matchinfo | 
 |  2407   ** evaluation, only create segment-readers if there are no Fts3DeferredToken | 
 |  2408   ** objects attached to the phrase-tokens. | 
 |  2409   */ | 
 |  2410   for(ii=0; ii<pPhrase->nToken; ii++){ | 
 |  2411     Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; | 
 |  2412     if( pTok->pSegcsr==0 ){ | 
 |  2413       if( (pCsr->eEvalmode==FTS3_EVAL_FILTER) | 
 |  2414        || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0)  | 
 |  2415        || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext)  | 
 |  2416       ){ | 
 |  2417         rc = fts3TermSegReaderCursor( | 
 |  2418             pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr | 
 |  2419         ); | 
 |  2420         if( rc!=SQLITE_OK ) return rc; | 
 |  2421       } | 
 |  2422     } | 
 |  2423   } | 
 |  2424  | 
 |  2425   for(ii=0; ii<pPhrase->nToken; ii++){ | 
 |  2426     Fts3PhraseToken *pTok;        /* Token to find doclist for */ | 
 |  2427     int iTok = 0;                 /* The token being queried this iteration */ | 
 |  2428     char *pList = 0;              /* Pointer to token doclist */ | 
 |  2429     int nList = 0;                /* Size of buffer at pList */ | 
 |  2430  | 
 |  2431     /* Select a token to process. If this is an xFilter() call, then tokens  | 
 |  2432     ** are processed in order from least to most costly. Otherwise, tokens  | 
 |  2433     ** are processed in the order in which they occur in the phrase. | 
 |  2434     */ | 
 |  2435     if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){ | 
 |  2436       assert( isReqPos ); | 
 |  2437       iTok = ii; | 
 |  2438       pTok = &pPhrase->aToken[iTok]; | 
 |  2439       if( pTok->bFulltext==0 ) continue; | 
 |  2440     }else if( pCsr->eEvalmode==FTS3_EVAL_NEXT || isReqPos ){ | 
 |  2441       iTok = ii; | 
 |  2442       pTok = &pPhrase->aToken[iTok]; | 
|  2864     }else{ |  2443     }else{ | 
|  2865       zIn[n] = 0; |  2444       int nMinCost = 0x7FFFFFFF; | 
|  2866       *pzTail = &zIn[1]; |  2445       int jj; | 
|  2867       dequoteString(zIn); |  2446  | 
|  2868       return zIn; |  2447       /* Find the remaining token with the lowest cost. */ | 
|  2869     } |  2448       for(jj=0; jj<pPhrase->nToken; jj++){ | 
|  2870   } |  2449         Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[jj].pSegcsr; | 
|  2871   /*NOTREACHED*/ |  2450         if( pSegcsr && pSegcsr->nCost<nMinCost ){ | 
|  2872 } |  2451           iTok = jj; | 
|  2873  |  2452           nMinCost = pSegcsr->nCost; | 
|  2874 /* Return true if... |  2453         } | 
|  2875 ** |  2454       } | 
|  2876 **   *  s begins with the string t, ignoring case |  2455       pTok = &pPhrase->aToken[iTok]; | 
|  2877 **   *  s is longer than t |  2456  | 
|  2878 **   *  The first character of s beyond t is not a alphanumeric |  2457       /* This branch is taken if it is determined that loading the doclist | 
|  2879 **  |  2458       ** for the next token would require more IO than loading all documents | 
|  2880 ** Ignore leading space in *s. |  2459       ** currently identified by doclist pOut/nOut. No further doclists will | 
|  2881 ** |  2460       ** be loaded from the full-text index for this phrase. | 
|  2882 ** To put it another way, return true if the first token of |  2461       */ | 
|  2883 ** s[] is t[]. |  2462       if( nMinCost>nDoc && ii>0 ){ | 
|  2884 */ |  2463         rc = fts3DeferExpression(pCsr, pCsr->pExpr); | 
|  2885 static int startsWith(const char *s, const char *t){ |  2464         break; | 
|  2886   while( safe_isspace(*s) ){ s++; } |  2465       } | 
|  2887   while( *t ){ |  2466     } | 
|  2888     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; |  2467  | 
|  2889   } |  2468     if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ | 
|  2890   return *s!='_' && !safe_isalnum(*s); |  2469       rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); | 
|  2891 } |  | 
|  2892  |  | 
|  2893 /* |  | 
|  2894 ** An instance of this structure defines the "spec" of a |  | 
|  2895 ** full text index.  This structure is populated by parseSpec |  | 
|  2896 ** and use by fulltextConnect and fulltextCreate. |  | 
|  2897 */ |  | 
|  2898 typedef struct TableSpec { |  | 
|  2899   const char *zDb;         /* Logical database name */ |  | 
|  2900   const char *zName;       /* Name of the full-text index */ |  | 
|  2901   int nColumn;             /* Number of columns to be indexed */ |  | 
|  2902   char **azColumn;         /* Original names of columns to be indexed */ |  | 
|  2903   char **azContentColumn;  /* Column names for %_content */ |  | 
|  2904   char **azTokenizer;      /* Name of tokenizer and its arguments */ |  | 
|  2905 } TableSpec; |  | 
|  2906  |  | 
|  2907 /* |  | 
|  2908 ** Reclaim all of the memory used by a TableSpec |  | 
|  2909 */ |  | 
|  2910 static void clearTableSpec(TableSpec *p) { |  | 
|  2911   sqlite3_free(p->azColumn); |  | 
|  2912   sqlite3_free(p->azContentColumn); |  | 
|  2913   sqlite3_free(p->azTokenizer); |  | 
|  2914 } |  | 
|  2915  |  | 
|  2916 /* Parse a CREATE VIRTUAL TABLE statement, which looks like this: |  | 
|  2917  * |  | 
|  2918  * CREATE VIRTUAL TABLE email |  | 
|  2919  *        USING fts3(subject, body, tokenize mytokenizer(myarg)) |  | 
|  2920  * |  | 
|  2921  * We return parsed information in a TableSpec structure. |  | 
|  2922  *  |  | 
|  2923  */ |  | 
|  2924 static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, |  | 
|  2925                      char**pzErr){ |  | 
|  2926   int i, n; |  | 
|  2927   char *z, *zDummy; |  | 
|  2928   char **azArg; |  | 
|  2929   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */ |  | 
|  2930  |  | 
|  2931   assert( argc>=3 ); |  | 
|  2932   /* Current interface: |  | 
|  2933   ** argv[0] - module name |  | 
|  2934   ** argv[1] - database name |  | 
|  2935   ** argv[2] - table name |  | 
|  2936   ** argv[3..] - columns, optionally followed by tokenizer specification |  | 
|  2937   **             and snippet delimiters specification. |  | 
|  2938   */ |  | 
|  2939  |  | 
|  2940   /* Make a copy of the complete argv[][] array in a single allocation. |  | 
|  2941   ** The argv[][] array is read-only and transient.  We can write to the |  | 
|  2942   ** copy in order to modify things and the copy is persistent. |  | 
|  2943   */ |  | 
|  2944   CLEAR(pSpec); |  | 
|  2945   for(i=n=0; i<argc; i++){ |  | 
|  2946     n += strlen(argv[i]) + 1; |  | 
|  2947   } |  | 
|  2948   azArg = sqlite3_malloc( sizeof(char*)*argc + n ); |  | 
|  2949   if( azArg==0 ){ |  | 
|  2950     return SQLITE_NOMEM; |  | 
|  2951   } |  | 
|  2952   z = (char*)&azArg[argc]; |  | 
|  2953   for(i=0; i<argc; i++){ |  | 
|  2954     azArg[i] = z; |  | 
|  2955     strcpy(z, argv[i]); |  | 
|  2956     z += strlen(z)+1; |  | 
|  2957   } |  | 
|  2958  |  | 
|  2959   /* Identify the column names and the tokenizer and delimiter arguments |  | 
|  2960   ** in the argv[][] array. |  | 
|  2961   */ |  | 
|  2962   pSpec->zDb = azArg[1]; |  | 
|  2963   pSpec->zName = azArg[2]; |  | 
|  2964   pSpec->nColumn = 0; |  | 
|  2965   pSpec->azColumn = azArg; |  | 
|  2966   zTokenizer = "tokenize simple"; |  | 
|  2967   for(i=3; i<argc; ++i){ |  | 
|  2968     if( startsWith(azArg[i],"tokenize") ){ |  | 
|  2969       zTokenizer = azArg[i]; |  | 
|  2970     }else{ |  2470     }else{ | 
|  2971       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy); |  2471       if( pTok->pSegcsr ){ | 
|  2972       pSpec->nColumn++; |  2472         rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); | 
|  2973     } |  2473       } | 
|  2974   } |  2474       pTok->bFulltext = 1; | 
|  2975   if( pSpec->nColumn==0 ){ |  2475     } | 
|  2976     azArg[0] = "content"; |  2476     assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pSegcsr==0 ); | 
|  2977     pSpec->nColumn = 1; |  2477     if( rc!=SQLITE_OK ) break; | 
|  2978   } |  2478  | 
|  2979  |  2479     if( isFirst ){ | 
|  2980   /* |  2480       pOut = pList; | 
|  2981   ** Construct the list of content column names. |  2481       nOut = nList; | 
|  2982   ** |  2482       if( pCsr->eEvalmode==FTS3_EVAL_FILTER && pPhrase->nToken>1 ){ | 
|  2983   ** Each content column name will be of the form cNNAAAA |  2483         nDoc = fts3DoclistCountDocids(1, pOut, nOut); | 
|  2984   ** where NN is the column number and AAAA is the sanitized |  2484       } | 
|  2985   ** column name.  "sanitized" means that special characters are |  2485       isFirst = 0; | 
|  2986   ** converted to "_".  The cNN prefix guarantees that all column |  2486       iPrevTok = iTok; | 
|  2987   ** names are unique. |  2487     }else{ | 
|  2988   ** |  2488       /* Merge the new term list and the current output. */ | 
|  2989   ** The AAAA suffix is not strictly necessary.  It is included |  2489       char *aLeft, *aRight; | 
|  2990   ** for the convenience of people who might examine the generated |  2490       int nLeft, nRight; | 
|  2991   ** %_content table and wonder what the columns are used for. |  2491       int nDist; | 
|  2992   */ |  2492       int mt; | 
|  2993   pSpec->azContentColumn = sqlite3_malloc( pSpec->nColumn * sizeof(char *) ); |  2493  | 
|  2994   if( pSpec->azContentColumn==0 ){ |  2494       /* If this is the final token of the phrase, and positions were not | 
|  2995     clearTableSpec(pSpec); |  2495       ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE. | 
|  2996     return SQLITE_NOMEM; |  2496       ** This drops the position information from the output list. | 
|  2997   } |  2497       */ | 
|  2998   for(i=0; i<pSpec->nColumn; i++){ |  2498       mt = MERGE_POS_PHRASE; | 
|  2999     char *p; |  2499       if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE; | 
|  3000     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); |  2500  | 
|  3001     for (p = pSpec->azContentColumn[i]; *p ; ++p) { |  2501       assert( iPrevTok!=iTok ); | 
|  3002       if( !safe_isalnum(*p) ) *p = '_'; |  2502       if( iPrevTok<iTok ){ | 
|  3003     } |  2503         aLeft = pOut; | 
|  3004   } |  2504         nLeft = nOut; | 
|  3005  |  2505         aRight = pList; | 
|  3006   /* |  2506         nRight = nList; | 
|  3007   ** Parse the tokenizer specification string. |  2507         nDist = iTok-iPrevTok; | 
|  3008   */ |  2508         iPrevTok = iTok; | 
|  3009   pSpec->azTokenizer = tokenizeString(zTokenizer, &n); |  2509       }else{ | 
|  3010   tokenListToIdList(pSpec->azTokenizer); |  2510         aRight = pOut; | 
|  3011  |  2511         nRight = nOut; | 
|  3012   return SQLITE_OK; |  2512         aLeft = pList; | 
|  3013 } |  2513         nLeft = nList; | 
|  3014  |  2514         nDist = iPrevTok-iTok; | 
|  3015 /* |  2515       } | 
|  3016 ** Generate a CREATE TABLE statement that describes the schema of |  2516       pOut = aRight; | 
|  3017 ** the virtual table.  Return a pointer to this schema string. |  2517       fts3DoclistMerge( | 
|  3018 ** |  2518           mt, nDist, 0, pOut, &nOut, aLeft, nLeft, aRight, nRight, &nDoc | 
|  3019 ** Space is obtained from sqlite3_mprintf() and should be freed |  2519       ); | 
|  3020 ** using sqlite3_free(). |  2520       sqlite3_free(aLeft); | 
|  3021 */ |  2521     } | 
|  3022 static char *fulltextSchema( |  2522     assert( nOut==0 || pOut!=0 ); | 
|  3023   int nColumn,                  /* Number of columns */ |  2523   } | 
|  3024   const char *const* azColumn,  /* List of columns */ |  2524  | 
|  3025   const char *zTableName        /* Name of the table */ |  2525   if( rc==SQLITE_OK ){ | 
|  3026 ){ |  2526     if( ii!=pPhrase->nToken ){ | 
|  3027   int i; |  2527       assert( pCsr->eEvalmode==FTS3_EVAL_FILTER && isReqPos==0 ); | 
|  3028   char *zSchema, *zNext; |  2528       fts3DoclistStripPositions(pOut, &nOut); | 
|  3029   const char *zSep = "("; |  2529     } | 
|  3030   zSchema = sqlite3_mprintf("CREATE TABLE x"); |  2530     *paOut = pOut; | 
|  3031   for(i=0; i<nColumn; i++){ |  2531     *pnOut = nOut; | 
|  3032     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]); |  | 
|  3033     sqlite3_free(zSchema); |  | 
|  3034     zSchema = zNext; |  | 
|  3035     zSep = ","; |  | 
|  3036   } |  | 
|  3037   zNext = sqlite3_mprintf("%s,%Q HIDDEN", zSchema, zTableName); |  | 
|  3038   sqlite3_free(zSchema); |  | 
|  3039   zSchema = zNext; |  | 
|  3040   zNext = sqlite3_mprintf("%s,docid HIDDEN)", zSchema); |  | 
|  3041   sqlite3_free(zSchema); |  | 
|  3042   return zNext; |  | 
|  3043 } |  | 
|  3044  |  | 
|  3045 /* |  | 
|  3046 ** Build a new sqlite3_vtab structure that will describe the |  | 
|  3047 ** fulltext index defined by spec. |  | 
|  3048 */ |  | 
|  3049 static int constructVtab( |  | 
|  3050   sqlite3 *db,              /* The SQLite database connection */ |  | 
|  3051   fts3Hash *pHash,          /* Hash table containing tokenizers */ |  | 
|  3052   TableSpec *spec,          /* Parsed spec information from parseSpec() */ |  | 
|  3053   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */ |  | 
|  3054   char **pzErr              /* Write any error message here */ |  | 
|  3055 ){ |  | 
|  3056   int rc; |  | 
|  3057   int n; |  | 
|  3058   fulltext_vtab *v = 0; |  | 
|  3059   const sqlite3_tokenizer_module *m = NULL; |  | 
|  3060   char *schema; |  | 
|  3061  |  | 
|  3062   char const *zTok;         /* Name of tokenizer to use for this fts table */ |  | 
|  3063   int nTok;                 /* Length of zTok, including nul terminator */ |  | 
|  3064  |  | 
|  3065   v = (fulltext_vtab *) sqlite3_malloc(sizeof(fulltext_vtab)); |  | 
|  3066   if( v==0 ) return SQLITE_NOMEM; |  | 
|  3067   CLEAR(v); |  | 
|  3068   /* sqlite will initialize v->base */ |  | 
|  3069   v->db = db; |  | 
|  3070   v->zDb = spec->zDb;       /* Freed when azColumn is freed */ |  | 
|  3071   v->zName = spec->zName;   /* Freed when azColumn is freed */ |  | 
|  3072   v->nColumn = spec->nColumn; |  | 
|  3073   v->azContentColumn = spec->azContentColumn; |  | 
|  3074   spec->azContentColumn = 0; |  | 
|  3075   v->azColumn = spec->azColumn; |  | 
|  3076   spec->azColumn = 0; |  | 
|  3077  |  | 
|  3078   if( spec->azTokenizer==0 ){ |  | 
|  3079     return SQLITE_NOMEM; |  | 
|  3080   } |  | 
|  3081  |  | 
|  3082   zTok = spec->azTokenizer[0];  |  | 
|  3083   if( !zTok ){ |  | 
|  3084     zTok = "simple"; |  | 
|  3085   } |  | 
|  3086   nTok = strlen(zTok)+1; |  | 
|  3087  |  | 
|  3088   m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zTok, nTok); |  | 
|  3089   if( !m ){ |  | 
|  3090     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); |  | 
|  3091     rc = SQLITE_ERROR; |  | 
|  3092     goto err; |  | 
|  3093   } |  | 
|  3094  |  | 
|  3095   for(n=0; spec->azTokenizer[n]; n++){} |  | 
|  3096   if( n ){ |  | 
|  3097     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], |  | 
|  3098                     &v->pTokenizer); |  | 
|  3099   }else{ |  2532   }else{ | 
|  3100     rc = m->xCreate(0, 0, &v->pTokenizer); |  2533     sqlite3_free(pOut); | 
|  3101   } |  2534   } | 
|  3102   if( rc!=SQLITE_OK ) goto err; |  | 
|  3103   v->pTokenizer->pModule = m; |  | 
|  3104  |  | 
|  3105   /* TODO: verify the existence of backing tables foo_content, foo_term */ |  | 
|  3106  |  | 
|  3107   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, |  | 
|  3108                           spec->zName); |  | 
|  3109   rc = sqlite3_declare_vtab(db, schema); |  | 
|  3110   sqlite3_free(schema); |  | 
|  3111   if( rc!=SQLITE_OK ) goto err; |  | 
|  3112  |  | 
|  3113   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); |  | 
|  3114  |  | 
|  3115   /* Indicate that the buffer is not live. */ |  | 
|  3116   v->nPendingData = -1; |  | 
|  3117  |  | 
|  3118   *ppVTab = &v->base; |  | 
|  3119   FTSTRACE(("FTS3 Connect %p\n", v)); |  | 
|  3120  |  | 
|  3121   return rc; |  2535   return rc; | 
|  3122  |  2536 } | 
|  3123 err: |  2537  | 
|  3124   fulltext_vtab_destroy(v); |  2538 /* | 
 |  2539 ** This function merges two doclists according to the requirements of a | 
 |  2540 ** NEAR operator. | 
 |  2541 ** | 
 |  2542 ** Both input doclists must include position information. The output doclist  | 
 |  2543 ** includes position information if the first argument to this function | 
 |  2544 ** is MERGE_POS_NEAR, or does not if it is MERGE_NEAR. | 
 |  2545 */ | 
 |  2546 static int fts3NearMerge( | 
 |  2547   int mergetype,                  /* MERGE_POS_NEAR or MERGE_NEAR */ | 
 |  2548   int nNear,                      /* Parameter to NEAR operator */ | 
 |  2549   int nTokenLeft,                 /* Number of tokens in LHS phrase arg */ | 
 |  2550   char *aLeft,                    /* Doclist for LHS (incl. positions) */ | 
 |  2551   int nLeft,                      /* Size of LHS doclist in bytes */ | 
 |  2552   int nTokenRight,                /* As nTokenLeft */ | 
 |  2553   char *aRight,                   /* As aLeft */ | 
 |  2554   int nRight,                     /* As nRight */ | 
 |  2555   char **paOut,                   /* OUT: Results of merge (malloced) */ | 
 |  2556   int *pnOut                      /* OUT: Sized of output buffer */ | 
 |  2557 ){ | 
 |  2558   char *aOut;                     /* Buffer to write output doclist to */ | 
 |  2559   int rc;                         /* Return code */ | 
 |  2560  | 
 |  2561   assert( mergetype==MERGE_POS_NEAR || MERGE_NEAR ); | 
 |  2562  | 
 |  2563   aOut = sqlite3_malloc(nLeft+nRight+1); | 
 |  2564   if( aOut==0 ){ | 
 |  2565     rc = SQLITE_NOMEM; | 
 |  2566   }else{ | 
 |  2567     rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft,  | 
 |  2568       aOut, pnOut, aLeft, nLeft, aRight, nRight, 0 | 
 |  2569     ); | 
 |  2570     if( rc!=SQLITE_OK ){ | 
 |  2571       sqlite3_free(aOut); | 
 |  2572       aOut = 0; | 
 |  2573     } | 
 |  2574   } | 
 |  2575  | 
 |  2576   *paOut = aOut; | 
|  3125   return rc; |  2577   return rc; | 
|  3126 } |  2578 } | 
|  3127  |  2579  | 
|  3128 static int fulltextConnect( |  2580 /* | 
|  3129   sqlite3 *db, |  2581 ** This function is used as part of the processing for the snippet() and | 
|  3130   void *pAux, |  2582 ** offsets() functions. | 
|  3131   int argc, const char *const*argv, |  2583 ** | 
|  3132   sqlite3_vtab **ppVTab, |  2584 ** Both pLeft and pRight are expression nodes of type FTSQUERY_PHRASE. Both | 
|  3133   char **pzErr |  2585 ** have their respective doclists (including position information) loaded | 
|  3134 ){ |  2586 ** in Fts3Expr.aDoclist/nDoclist. This function removes all entries from | 
|  3135   TableSpec spec; |  2587 ** each doclist that are not within nNear tokens of a corresponding entry | 
|  3136   int rc = parseSpec(&spec, argc, argv, pzErr); |  2588 ** in the other doclist. | 
|  3137   if( rc!=SQLITE_OK ) return rc; |  2589 */ | 
|  3138  |  2590 int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ | 
|  3139   rc = constructVtab(db, (fts3Hash *)pAux, &spec, ppVTab, pzErr); |  2591   int rc;                         /* Return code */ | 
|  3140   clearTableSpec(&spec); |  2592  | 
 |  2593   assert( pLeft->eType==FTSQUERY_PHRASE ); | 
 |  2594   assert( pRight->eType==FTSQUERY_PHRASE ); | 
 |  2595   assert( pLeft->isLoaded && pRight->isLoaded ); | 
 |  2596  | 
 |  2597   if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){ | 
 |  2598     sqlite3_free(pLeft->aDoclist); | 
 |  2599     sqlite3_free(pRight->aDoclist); | 
 |  2600     pRight->aDoclist = 0; | 
 |  2601     pLeft->aDoclist = 0; | 
 |  2602     rc = SQLITE_OK; | 
 |  2603   }else{ | 
 |  2604     char *aOut;                   /* Buffer in which to assemble new doclist */ | 
 |  2605     int nOut;                     /* Size of buffer aOut in bytes */ | 
 |  2606  | 
 |  2607     rc = fts3NearMerge(MERGE_POS_NEAR, nNear,  | 
 |  2608         pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, | 
 |  2609         pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, | 
 |  2610         &aOut, &nOut | 
 |  2611     ); | 
 |  2612     if( rc!=SQLITE_OK ) return rc; | 
 |  2613     sqlite3_free(pRight->aDoclist); | 
 |  2614     pRight->aDoclist = aOut; | 
 |  2615     pRight->nDoclist = nOut; | 
 |  2616  | 
 |  2617     rc = fts3NearMerge(MERGE_POS_NEAR, nNear,  | 
 |  2618         pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, | 
 |  2619         pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, | 
 |  2620         &aOut, &nOut | 
 |  2621     ); | 
 |  2622     sqlite3_free(pLeft->aDoclist); | 
 |  2623     pLeft->aDoclist = aOut; | 
 |  2624     pLeft->nDoclist = nOut; | 
 |  2625   } | 
|  3141   return rc; |  2626   return rc; | 
|  3142 } |  2627 } | 
|  3143  |  2628  | 
|  3144 /* The %_content table holds the text of each document, with |  2629  | 
|  3145 ** the docid column exposed as the SQLite rowid for the table. |  2630 /* | 
|  3146 */ |  2631 ** Allocate an Fts3SegReaderArray for each token in the expression pExpr.  | 
|  3147 /* TODO(shess) This comment needs elaboration to match the updated |  2632 ** The allocated objects are stored in the Fts3PhraseToken.pArray member | 
|  3148 ** code.  Work it into the top-of-file comment at that time. |  2633 ** variables of each token structure. | 
|  3149 */ |  2634 */ | 
|  3150 static int fulltextCreate(sqlite3 *db, void *pAux, |  2635 static int fts3ExprAllocateSegReaders( | 
|  3151                           int argc, const char * const *argv, |  2636   Fts3Cursor *pCsr,               /* FTS3 table */ | 
|  3152                           sqlite3_vtab **ppVTab, char **pzErr){ |  2637   Fts3Expr *pExpr,                /* Expression to create seg-readers for */ | 
|  3153   int rc; |  2638   int *pnExpr                     /* OUT: Number of AND'd expressions */ | 
|  3154   TableSpec spec; |  2639 ){ | 
|  3155   StringBuffer schema; |  2640   int rc = SQLITE_OK;             /* Return code */ | 
|  3156   FTSTRACE(("FTS3 Create\n")); |  2641  | 
|  3157  |  2642   assert( pCsr->eEvalmode==FTS3_EVAL_FILTER ); | 
|  3158   rc = parseSpec(&spec, argc, argv, pzErr); |  2643   if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ | 
|  3159   if( rc!=SQLITE_OK ) return rc; |  2644     (*pnExpr)++; | 
|  3160  |  2645     pnExpr = 0; | 
|  3161   initStringBuffer(&schema); |  2646   } | 
|  3162   append(&schema, "CREATE TABLE %_content("); |  2647  | 
|  3163   append(&schema, "  docid INTEGER PRIMARY KEY,"); |  2648   if( pExpr->eType==FTSQUERY_PHRASE ){ | 
|  3164   appendList(&schema, spec.nColumn, spec.azContentColumn); |  2649     Fts3Phrase *pPhrase = pExpr->pPhrase; | 
|  3165   append(&schema, ")"); |  2650     int ii; | 
|  3166   rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema)); |  2651  | 
|  3167   stringBufferDestroy(&schema); |  2652     for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){ | 
|  3168   if( rc!=SQLITE_OK ) goto out; |  2653       Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; | 
|  3169  |  2654       if( pTok->pSegcsr==0 ){ | 
|  3170   rc = sql_exec(db, spec.zDb, spec.zName, |  2655         rc = fts3TermSegReaderCursor( | 
|  3171                 "create table %_segments(" |  2656             pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr | 
|  3172                 "  blockid INTEGER PRIMARY KEY," |  2657         ); | 
|  3173                 "  block blob" |  2658       } | 
|  3174                 ");" |  2659     } | 
|  3175                 ); |  2660   }else{  | 
|  3176   if( rc!=SQLITE_OK ) goto out; |  2661     rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); | 
|  3177  |  2662     if( rc==SQLITE_OK ){ | 
|  3178   rc = sql_exec(db, spec.zDb, spec.zName, |  2663       rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr); | 
|  3179                 "create table %_segdir(" |  2664     } | 
|  3180                 "  level integer," |  2665   } | 
|  3181                 "  idx integer," |  | 
|  3182                 "  start_block integer," |  | 
|  3183                 "  leaves_end_block integer," |  | 
|  3184                 "  end_block integer," |  | 
|  3185                 "  root blob," |  | 
|  3186                 "  primary key(level, idx)" |  | 
|  3187                 ");"); |  | 
|  3188   if( rc!=SQLITE_OK ) goto out; |  | 
|  3189  |  | 
|  3190   rc = constructVtab(db, (fts3Hash *)pAux, &spec, ppVTab, pzErr); |  | 
|  3191  |  | 
|  3192 out: |  | 
|  3193   clearTableSpec(&spec); |  | 
|  3194   return rc; |  2666   return rc; | 
|  3195 } |  2667 } | 
|  3196  |  2668  | 
|  3197 /* Decide how to handle an SQL query. */ |  2669 /* | 
|  3198 static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |  2670 ** Free the Fts3SegReaderArray objects associated with each token in the | 
|  3199   fulltext_vtab *v = (fulltext_vtab *)pVTab; |  2671 ** expression pExpr. In other words, this function frees the resources | 
|  3200   int i; |  2672 ** allocated by fts3ExprAllocateSegReaders(). | 
|  3201   FTSTRACE(("FTS3 BestIndex\n")); |  2673 */ | 
|  3202  |  2674 static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ | 
|  3203   for(i=0; i<pInfo->nConstraint; ++i){ |  2675   if( pExpr ){ | 
|  3204     const struct sqlite3_index_constraint *pConstraint; |  2676     Fts3Phrase *pPhrase = pExpr->pPhrase; | 
|  3205     pConstraint = &pInfo->aConstraint[i]; |  2677     if( pPhrase ){ | 
|  3206     if( pConstraint->usable ) { |  2678       int kk; | 
|  3207       if( (pConstraint->iColumn==-1 || pConstraint->iColumn==v->nColumn+1) && |  2679       for(kk=0; kk<pPhrase->nToken; kk++){ | 
|  3208           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ |  2680         fts3SegReaderCursorFree(pPhrase->aToken[kk].pSegcsr); | 
|  3209         pInfo->idxNum = QUERY_DOCID;      /* lookup by docid */ |  2681         pPhrase->aToken[kk].pSegcsr = 0; | 
|  3210         FTSTRACE(("FTS3 QUERY_DOCID\n")); |  2682       } | 
|  3211       } else if( pConstraint->iColumn>=0 && pConstraint->iColumn<=v->nColumn && |  2683     } | 
|  3212                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |  2684     fts3ExprFreeSegReaders(pExpr->pLeft); | 
|  3213         /* full-text search */ |  2685     fts3ExprFreeSegReaders(pExpr->pRight); | 
|  3214         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; |  2686   } | 
|  3215         FTSTRACE(("FTS3 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); |  2687 } | 
|  3216       } else continue; |  2688  | 
|  3217  |  2689 /* | 
|  3218       pInfo->aConstraintUsage[i].argvIndex = 1; |  2690 ** Return the sum of the costs of all tokens in the expression pExpr. This | 
|  3219       pInfo->aConstraintUsage[i].omit = 1; |  2691 ** function must be called after Fts3SegReaderArrays have been allocated | 
|  3220  |  2692 ** for all tokens using fts3ExprAllocateSegReaders(). | 
|  3221       /* An arbitrary value for now. |  2693 */ | 
|  3222        * TODO: Perhaps docid matches should be considered cheaper than |  2694 static int fts3ExprCost(Fts3Expr *pExpr){ | 
|  3223        * full-text searches. */ |  2695   int nCost;                      /* Return value */ | 
|  3224       pInfo->estimatedCost = 1.0;    |  2696   if( pExpr->eType==FTSQUERY_PHRASE ){ | 
|  3225  |  2697     Fts3Phrase *pPhrase = pExpr->pPhrase; | 
|  3226       return SQLITE_OK; |  2698     int ii; | 
|  3227     } |  2699     nCost = 0; | 
|  3228   } |  2700     for(ii=0; ii<pPhrase->nToken; ii++){ | 
|  3229   pInfo->idxNum = QUERY_GENERIC; |  2701       Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[ii].pSegcsr; | 
|  3230   return SQLITE_OK; |  2702       if( pSegcsr ) nCost += pSegcsr->nCost; | 
|  3231 } |  2703     } | 
|  3232  |  | 
|  3233 static int fulltextDisconnect(sqlite3_vtab *pVTab){ |  | 
|  3234   FTSTRACE(("FTS3 Disconnect %p\n", pVTab)); |  | 
|  3235   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|  3236   return SQLITE_OK; |  | 
|  3237 } |  | 
|  3238  |  | 
|  3239 static int fulltextDestroy(sqlite3_vtab *pVTab){ |  | 
|  3240   fulltext_vtab *v = (fulltext_vtab *)pVTab; |  | 
|  3241   int rc; |  | 
|  3242  |  | 
|  3243   FTSTRACE(("FTS3 Destroy %p\n", pVTab)); |  | 
|  3244   rc = sql_exec(v->db, v->zDb, v->zName, |  | 
|  3245                 "drop table if exists %_content;" |  | 
|  3246                 "drop table if exists %_segments;" |  | 
|  3247                 "drop table if exists %_segdir;" |  | 
|  3248                 ); |  | 
|  3249   if( rc!=SQLITE_OK ) return rc; |  | 
|  3250  |  | 
|  3251   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|  3252   return SQLITE_OK; |  | 
|  3253 } |  | 
|  3254  |  | 
|  3255 static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ |  | 
|  3256   fulltext_cursor *c; |  | 
|  3257  |  | 
|  3258   c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor)); |  | 
|  3259   if( c ){ |  | 
|  3260     memset(c, 0, sizeof(fulltext_cursor)); |  | 
|  3261     /* sqlite will initialize c->base */ |  | 
|  3262     *ppCursor = &c->base; |  | 
|  3263     FTSTRACE(("FTS3 Open %p: %p\n", pVTab, c)); |  | 
|  3264     return SQLITE_OK; |  | 
|  3265   }else{ |  2704   }else{ | 
|  3266     return SQLITE_NOMEM; |  2705     nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); | 
|  3267   } |  2706   } | 
|  3268 } |  2707   return nCost; | 
|  3269  |  2708 } | 
|  3270 /* Free all of the dynamically allocated memory held by the |  2709  | 
|  3271 ** Snippet |  2710 /* | 
|  3272 */ |  2711 ** The following is a helper function (and type) for fts3EvalExpr(). It | 
|  3273 static void snippetClear(Snippet *p){ |  2712 ** must be called after Fts3SegReaders have been allocated for every token | 
|  3274   sqlite3_free(p->aMatch); |  2713 ** in the expression. See the context it is called from in fts3EvalExpr() | 
|  3275   sqlite3_free(p->zOffset); |  2714 ** for further explanation. | 
|  3276   sqlite3_free(p->zSnippet); |  2715 */ | 
|  3277   CLEAR(p); |  2716 typedef struct ExprAndCost ExprAndCost; | 
|  3278 } |  2717 struct ExprAndCost { | 
|  3279  |  2718   Fts3Expr *pExpr; | 
|  3280 /* |  2719   int nCost; | 
|  3281 ** Append a single entry to the p->aMatch[] log. |  2720 }; | 
|  3282 */ |  2721 static void fts3ExprAssignCosts( | 
|  3283 static void snippetAppendMatch( |  2722   Fts3Expr *pExpr,                /* Expression to create seg-readers for */ | 
|  3284   Snippet *p,               /* Append the entry to this snippet */ |  2723   ExprAndCost **ppExprCost        /* OUT: Write to *ppExprCost */ | 
|  3285   int iCol, int iTerm,      /* The column and query term */ |  2724 ){ | 
|  3286   int iToken,               /* Matching token in document */ |  2725   if( pExpr->eType==FTSQUERY_AND ){ | 
|  3287   int iStart, int nByte     /* Offset and size of the match */ |  2726     fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); | 
|  3288 ){ |  2727     fts3ExprAssignCosts(pExpr->pRight, ppExprCost); | 
|  3289   int i; |  | 
|  3290   struct snippetMatch *pMatch; |  | 
|  3291   if( p->nMatch+1>=p->nAlloc ){ |  | 
|  3292     p->nAlloc = p->nAlloc*2 + 10; |  | 
|  3293     p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); |  | 
|  3294     if( p->aMatch==0 ){ |  | 
|  3295       p->nMatch = 0; |  | 
|  3296       p->nAlloc = 0; |  | 
|  3297       return; |  | 
|  3298     } |  | 
|  3299   } |  | 
|  3300   i = p->nMatch++; |  | 
|  3301   pMatch = &p->aMatch[i]; |  | 
|  3302   pMatch->iCol = iCol; |  | 
|  3303   pMatch->iTerm = iTerm; |  | 
|  3304   pMatch->iToken = iToken; |  | 
|  3305   pMatch->iStart = iStart; |  | 
|  3306   pMatch->nByte = nByte; |  | 
|  3307 } |  | 
|  3308  |  | 
|  3309 /* |  | 
|  3310 ** Sizing information for the circular buffer used in snippetOffsetsOfColumn() |  | 
|  3311 */ |  | 
|  3312 #define FTS3_ROTOR_SZ   (32) |  | 
|  3313 #define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1) |  | 
|  3314  |  | 
|  3315 /* |  | 
|  3316 ** Function to iterate through the tokens of a compiled expression. |  | 
|  3317 ** |  | 
|  3318 ** Except, skip all tokens on the right-hand side of a NOT operator. |  | 
|  3319 ** This function is used to find tokens as part of snippet and offset |  | 
|  3320 ** generation and we do nt want snippets and offsets to report matches |  | 
|  3321 ** for tokens on the RHS of a NOT. |  | 
|  3322 */ |  | 
|  3323 static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){ |  | 
|  3324   Fts3Expr *p = *ppExpr; |  | 
|  3325   int iToken = *piToken; |  | 
|  3326   if( iToken<0 ){ |  | 
|  3327     /* In this case the expression p is the root of an expression tree. |  | 
|  3328     ** Move to the first token in the expression tree. |  | 
|  3329     */ |  | 
|  3330     while( p->pLeft ){ |  | 
|  3331       p = p->pLeft; |  | 
|  3332     } |  | 
|  3333     iToken = 0; |  | 
|  3334   }else{ |  2728   }else{ | 
|  3335     assert(p && p->eType==FTSQUERY_PHRASE ); |  2729     (*ppExprCost)->pExpr = pExpr; | 
|  3336     if( iToken<(p->pPhrase->nToken-1) ){ |  2730     (*ppExprCost)->nCost = fts3ExprCost(pExpr); | 
|  3337       iToken++; |  2731     (*ppExprCost)++; | 
|  3338     }else{ |  2732   } | 
|  3339       iToken = 0; |  2733 } | 
|  3340       while( p->pParent && p->pParent->pLeft!=p ){ |  2734  | 
|  3341         assert( p->pParent->pRight==p ); |  2735 /* | 
|  3342         p = p->pParent; |  2736 ** Evaluate the full-text expression pExpr against FTS3 table pTab. Store | 
|  3343       } |  2737 ** the resulting doclist in *paOut and *pnOut. This routine mallocs for | 
|  3344       p = p->pParent; |  2738 ** the space needed to store the output. The caller is responsible for | 
|  3345       if( p ){ |  2739 ** freeing the space when it has finished. | 
|  3346         assert( p->pRight!=0 ); |  2740 ** | 
|  3347         p = p->pRight; |  2741 ** This function is called in two distinct contexts: | 
|  3348         while( p->pLeft ){ |  2742 ** | 
|  3349           p = p->pLeft; |  2743 **   * From within the virtual table xFilter() method. In this case, the | 
|  3350         } |  2744 **     output doclist contains entries for all rows in the table, based on | 
|  3351       } |  2745 **     data read from the full-text index. | 
|  3352     } |  2746 ** | 
|  3353   } |  2747 **     In this case, if the query expression contains one or more tokens that  | 
|  3354  |  2748 **     are very common, then the returned doclist may contain a superset of  | 
|  3355   *ppExpr = p; |  2749 **     the documents that actually match the expression. | 
|  3356   *piToken = iToken; |  2750 ** | 
|  3357   return p?1:0; |  2751 **   * From within the virtual table xNext() method. This call is only made | 
|  3358 } |  2752 **     if the call from within xFilter() found that there were very common  | 
|  3359  |  2753 **     tokens in the query expression and did return a superset of the  | 
|  3360 /* |  2754 **     matching documents. In this case the returned doclist contains only | 
|  3361 ** Return TRUE if the expression node pExpr is located beneath the |  2755 **     entries that correspond to the current row of the table. Instead of | 
|  3362 ** RHS of a NOT operator. |  2756 **     reading the data for each token from the full-text index, the data is | 
|  3363 */ |  2757 **     already available in-memory in the Fts3PhraseToken.pDeferred structures. | 
|  3364 static int fts3ExprBeneathNot(Fts3Expr *p){ |  2758 **     See fts3EvalDeferred() for how it gets there. | 
|  3365   Fts3Expr *pParent; |  2759 ** | 
|  3366   while( p ){ |  2760 ** In the first case above, Fts3Cursor.doDeferred==0. In the second (if it is | 
|  3367     pParent = p->pParent; |  2761 ** required) Fts3Cursor.doDeferred==1. | 
|  3368     if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){ |  2762 ** | 
|  3369       return 1; |  2763 ** If the SQLite invokes the snippet(), offsets() or matchinfo() function | 
|  3370     } |  2764 ** as part of a SELECT on an FTS3 table, this function is called on each | 
|  3371     p = pParent; |  2765 ** individual phrase expression in the query. If there were very common tokens | 
|  3372   } |  2766 ** found in the xFilter() call, then this function is called once for phrase | 
|  3373   return 0; |  2767 ** for each row visited, and the returned doclist contains entries for the | 
|  3374 } |  2768 ** current row only. Otherwise, if there were no very common tokens, then this | 
|  3375  |  2769 ** function is called once only for each phrase in the query and the returned | 
|  3376 /* |  2770 ** doclist contains entries for all rows of the table. | 
|  3377 ** Add entries to pSnippet->aMatch[] for every match that occurs against |  2771 ** | 
|  3378 ** document zDoc[0..nDoc-1] which is stored in column iColumn. |  2772 ** Fts3Cursor.doDeferred==1 when this function is called on phrases as a | 
|  3379 */ |  2773 ** result of a snippet(), offsets() or matchinfo() invocation. | 
|  3380 static void snippetOffsetsOfColumn( |  2774 */ | 
|  3381   fulltext_cursor *pCur,         /* The fulltest search cursor */ |  2775 static int fts3EvalExpr( | 
|  3382   Snippet *pSnippet,             /* The Snippet object to be filled in */ |  2776   Fts3Cursor *p,                  /* Virtual table cursor handle */ | 
|  3383   int iColumn,                   /* Index of fulltext table column */ |  2777   Fts3Expr *pExpr,                /* Parsed fts3 expression */ | 
|  3384   const char *zDoc,              /* Text of the fulltext table column */ |  2778   char **paOut,                   /* OUT: Pointer to malloc'd result buffer */ | 
|  3385   int nDoc                       /* Length of zDoc in bytes */ |  2779   int *pnOut,                     /* OUT: Size of buffer at *paOut */ | 
|  3386 ){ |  2780   int isReqPos                    /* Require positions in output buffer */ | 
|  3387   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */ |  2781 ){ | 
|  3388   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */ |  2782   int rc = SQLITE_OK;             /* Return code */ | 
|  3389   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */ |  2783  | 
|  3390   fulltext_vtab *pVtab;                /* The full text index */ |  2784   /* Zero the output parameters. */ | 
|  3391   int nColumn;                         /* Number of columns in the index */ |  2785   *paOut = 0; | 
|  3392   int i, j;                            /* Loop counters */ |  2786   *pnOut = 0; | 
|  3393   int rc;                              /* Return code */ |  2787  | 
|  3394   unsigned int match, prevMatch;       /* Phrase search bitmasks */ |  | 
|  3395   const char *zToken;                  /* Next token from the tokenizer */ |  | 
|  3396   int nToken;                          /* Size of zToken */ |  | 
|  3397   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */ |  | 
|  3398  |  | 
|  3399   /* The following variables keep a circular buffer of the last |  | 
|  3400   ** few tokens */ |  | 
|  3401   unsigned int iRotor = 0;             /* Index of current token */ |  | 
|  3402   int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */ |  | 
|  3403   int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */ |  | 
|  3404  |  | 
|  3405   pVtab = cursor_vtab(pCur); |  | 
|  3406   nColumn = pVtab->nColumn; |  | 
|  3407   pTokenizer = pVtab->pTokenizer; |  | 
|  3408   pTModule = pTokenizer->pModule; |  | 
|  3409   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); |  | 
|  3410   if( rc ) return; |  | 
|  3411   pTCursor->pTokenizer = pTokenizer; |  | 
|  3412  |  | 
|  3413   prevMatch = 0; |  | 
|  3414   while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){ |  | 
|  3415     Fts3Expr *pIter = pCur->pExpr; |  | 
|  3416     int iIter = -1; |  | 
|  3417     iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin; |  | 
|  3418     iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin; |  | 
|  3419     match = 0; |  | 
|  3420     for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){ |  | 
|  3421       int nPhrase;                    /* Number of tokens in current phrase */ |  | 
|  3422       struct PhraseToken *pToken;     /* Current token */ |  | 
|  3423       int iCol;                       /* Column index */ |  | 
|  3424  |  | 
|  3425       if( fts3ExprBeneathNot(pIter) ) continue; |  | 
|  3426       nPhrase = pIter->pPhrase->nToken; |  | 
|  3427       pToken = &pIter->pPhrase->aToken[iIter]; |  | 
|  3428       iCol = pIter->pPhrase->iColumn; |  | 
|  3429       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue; |  | 
|  3430       if( pToken->n>nToken ) continue; |  | 
|  3431       if( !pToken->isPrefix && pToken->n<nToken ) continue; |  | 
|  3432       assert( pToken->n<=nToken ); |  | 
|  3433       if( memcmp(pToken->z, zToken, pToken->n) ) continue; |  | 
|  3434       if( iIter>0 && (prevMatch & (1<<i))==0 ) continue; |  | 
|  3435       match |= 1<<i; |  | 
|  3436       if( i==(FTS3_ROTOR_SZ-2) || nPhrase==iIter+1 ){ |  | 
|  3437         for(j=nPhrase-1; j>=0; j--){ |  | 
|  3438           int k = (iRotor-j) & FTS3_ROTOR_MASK; |  | 
|  3439           snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j, |  | 
|  3440                 iRotorBegin[k], iRotorLen[k]); |  | 
|  3441         } |  | 
|  3442       } |  | 
|  3443     } |  | 
|  3444     prevMatch = match<<1; |  | 
|  3445     iRotor++; |  | 
|  3446   } |  | 
|  3447   pTModule->xClose(pTCursor);   |  | 
|  3448 } |  | 
|  3449  |  | 
|  3450 /* |  | 
|  3451 ** Remove entries from the pSnippet structure to account for the NEAR |  | 
|  3452 ** operator. When this is called, pSnippet contains the list of token  |  | 
|  3453 ** offsets produced by treating all NEAR operators as AND operators. |  | 
|  3454 ** This function removes any entries that should not be present after |  | 
|  3455 ** accounting for the NEAR restriction. For example, if the queried |  | 
|  3456 ** document is: |  | 
|  3457 ** |  | 
|  3458 **     "A B C D E A" |  | 
|  3459 ** |  | 
|  3460 ** and the query is: |  | 
|  3461 **  |  | 
|  3462 **     A NEAR/0 E |  | 
|  3463 ** |  | 
|  3464 ** then when this function is called the Snippet contains token offsets |  | 
|  3465 ** 0, 4 and 5. This function removes the "0" entry (because the first A |  | 
|  3466 ** is not near enough to an E). |  | 
|  3467 ** |  | 
|  3468 ** When this function is called, the value pointed to by parameter piLeft is |  | 
|  3469 ** the integer id of the left-most token in the expression tree headed by |  | 
|  3470 ** pExpr. This function increments *piLeft by the total number of tokens |  | 
|  3471 ** in the expression tree headed by pExpr. |  | 
|  3472 ** |  | 
|  3473 ** Return 1 if any trimming occurs.  Return 0 if no trimming is required. |  | 
|  3474 */ |  | 
|  3475 static int trimSnippetOffsets( |  | 
|  3476   Fts3Expr *pExpr,      /* The search expression */ |  | 
|  3477   Snippet *pSnippet,    /* The set of snippet offsets to be trimmed */ |  | 
|  3478   int *piLeft           /* Index of left-most token in pExpr */ |  | 
|  3479 ){ |  | 
|  3480   if( pExpr ){ |  2788   if( pExpr ){ | 
|  3481     if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){ |  2789     assert( pExpr->eType==FTSQUERY_NEAR   || pExpr->eType==FTSQUERY_OR      | 
|  3482       return 1; |  2790          || pExpr->eType==FTSQUERY_AND    || pExpr->eType==FTSQUERY_NOT | 
|  3483     } |  2791          || pExpr->eType==FTSQUERY_PHRASE | 
|  3484  |  2792     ); | 
|  3485     switch( pExpr->eType ){ |  2793     assert( pExpr->eType==FTSQUERY_PHRASE || isReqPos==0 ); | 
|  3486       case FTSQUERY_PHRASE: |  2794  | 
|  3487         *piLeft += pExpr->pPhrase->nToken; |  2795     if( pExpr->eType==FTSQUERY_PHRASE ){ | 
|  3488         break; |  2796       rc = fts3PhraseSelect(p, pExpr->pPhrase, | 
|  3489       case FTSQUERY_NEAR: { |  2797           isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), | 
|  3490         /* The right-hand-side of a NEAR operator is always a phrase. The |  2798           paOut, pnOut | 
|  3491         ** left-hand-side is either a phrase or an expression tree that is  |  2799       ); | 
|  3492         ** itself headed by a NEAR operator. The following initializations |  2800       fts3ExprFreeSegReaders(pExpr); | 
|  3493         ** set local variable iLeft to the token number of the left-most |  2801     }else if( p->eEvalmode==FTS3_EVAL_FILTER && pExpr->eType==FTSQUERY_AND ){ | 
|  3494         ** token in the right-hand phrase, and iRight to the right most |  2802       ExprAndCost *aExpr = 0;     /* Array of AND'd expressions and costs */ | 
|  3495         ** token in the same phrase. For example, if we had: |  2803       int nExpr = 0;              /* Size of aExpr[] */ | 
|  3496         ** |  2804       char *aRet = 0;             /* Doclist to return to caller */ | 
|  3497         **     <col> MATCH '"abc def" NEAR/2 "ghi jkl"' |  2805       int nRet = 0;               /* Length of aRet[] in bytes */ | 
|  3498         ** |  2806       int nDoc = 0x7FFFFFFF; | 
|  3499         ** then iLeft will be set to 2 (token number of ghi) and nToken will |  2807  | 
|  3500         ** be set to 4. |  2808       assert( !isReqPos ); | 
|  3501         */ |  2809  | 
|  3502         Fts3Expr *pLeft = pExpr->pLeft; |  2810       rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr); | 
|  3503         Fts3Expr *pRight = pExpr->pRight; |  2811       if( rc==SQLITE_OK ){ | 
|  3504         int iLeft = *piLeft; |  2812         assert( nExpr>1 ); | 
|  3505         int nNear = pExpr->nNear; |  2813         aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr); | 
|  3506         int nToken = pRight->pPhrase->nToken; |  2814         if( !aExpr ) rc = SQLITE_NOMEM; | 
|  3507         int jj, ii; |  2815       } | 
|  3508         if( pLeft->eType==FTSQUERY_NEAR ){ |  2816       if( rc==SQLITE_OK ){ | 
|  3509           pLeft = pLeft->pRight; |  2817         int ii;                   /* Used to iterate through expressions */ | 
|  3510         } |  2818  | 
|  3511         assert( pRight->eType==FTSQUERY_PHRASE ); |  2819         fts3ExprAssignCosts(pExpr, &aExpr); | 
|  3512         assert( pLeft->eType==FTSQUERY_PHRASE ); |  2820         aExpr -= nExpr; | 
|  3513         nToken += pLeft->pPhrase->nToken; |  2821         for(ii=0; ii<nExpr; ii++){ | 
|  3514  |  2822           char *aNew; | 
|  3515         for(ii=0; ii<pSnippet->nMatch; ii++){ |  2823           int nNew; | 
|  3516           struct snippetMatch *p = &pSnippet->aMatch[ii]; |  2824           int jj; | 
|  3517           if( p->iTerm==iLeft ){ |  2825           ExprAndCost *pBest = 0; | 
|  3518             int isOk = 0; |  2826    | 
|  3519             /* Snippet ii is an occurence of query term iLeft in the document. |  2827           for(jj=0; jj<nExpr; jj++){ | 
|  3520             ** It occurs at position (p->iToken) of the document. We now |  2828             ExprAndCost *pCand = &aExpr[jj]; | 
|  3521             ** search for an instance of token (iLeft-1) somewhere in the  |  2829             if( pCand->pExpr && (pBest==0 || pCand->nCost<pBest->nCost) ){ | 
|  3522             ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within  |  2830               pBest = pCand; | 
|  3523             ** the set of snippetMatch structures. If one is found, proceed.  |  | 
|  3524             ** If one cannot be found, then remove snippets ii..(ii+N-1)  |  | 
|  3525             ** from the matching snippets, where N is the number of tokens  |  | 
|  3526             ** in phrase pRight->pPhrase. |  | 
|  3527             */ |  | 
|  3528             for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){ |  | 
|  3529               struct snippetMatch *p2 = &pSnippet->aMatch[jj]; |  | 
|  3530               if( p2->iTerm==(iLeft-1) ){ |  | 
|  3531                 if( p2->iToken>=(p->iToken-nNear-1)  |  | 
|  3532                  && p2->iToken<(p->iToken+nNear+nToken)  |  | 
|  3533                 ){ |  | 
|  3534                   isOk = 1; |  | 
|  3535                 } |  | 
|  3536               } |  | 
|  3537             } |  | 
|  3538             if( !isOk ){ |  | 
|  3539               int kk; |  | 
|  3540               for(kk=0; kk<pRight->pPhrase->nToken; kk++){ |  | 
|  3541                 pSnippet->aMatch[kk+ii].iTerm = -2; |  | 
|  3542               } |  | 
|  3543               return 1; |  | 
|  3544             } |  2831             } | 
|  3545           } |  2832           } | 
|  3546           if( p->iTerm==(iLeft-1) ){ |  2833    | 
|  3547             int isOk = 0; |  2834           if( pBest->nCost>nDoc ){ | 
|  3548             for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){ |  2835             rc = fts3DeferExpression(p, p->pExpr); | 
|  3549               struct snippetMatch *p2 = &pSnippet->aMatch[jj]; |  2836             break; | 
|  3550               if( p2->iTerm==iLeft ){ |  2837           }else{ | 
|  3551                 if( p2->iToken<=(p->iToken+nNear+1)  |  2838             rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0); | 
|  3552                  && p2->iToken>(p->iToken-nNear-nToken)  |  2839             if( rc!=SQLITE_OK ) break; | 
|  3553                 ){ |  2840             pBest->pExpr = 0; | 
|  3554                   isOk = 1; |  2841             if( ii==0 ){ | 
|  3555                 } |  2842               aRet = aNew; | 
|  3556               } |  2843               nRet = nNew; | 
|  3557             } |  2844               nDoc = fts3DoclistCountDocids(0, aRet, nRet); | 
|  3558             if( !isOk ){ |  2845             }else{ | 
|  3559               int kk; |  2846               fts3DoclistMerge( | 
|  3560               for(kk=0; kk<pLeft->pPhrase->nToken; kk++){ |  2847                   MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew, &nDoc | 
|  3561                 pSnippet->aMatch[ii-kk].iTerm = -2; |  2848               ); | 
|  3562               } |  2849               sqlite3_free(aNew); | 
|  3563               return 1; |  | 
|  3564             } |  2850             } | 
|  3565           } |  2851           } | 
|  3566         } |  2852         } | 
|  3567         break; |  2853       } | 
|  3568       } |  2854  | 
|  3569     } |  2855       if( rc==SQLITE_OK ){ | 
|  3570  |  2856         *paOut = aRet; | 
|  3571     if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){ |  2857         *pnOut = nRet; | 
|  3572       return 1; |  2858       }else{ | 
|  3573     } |  2859         assert( *paOut==0 ); | 
|  3574   } |  2860         sqlite3_free(aRet); | 
|  3575   return 0; |  2861       } | 
|  3576 } |  2862       sqlite3_free(aExpr); | 
|  3577  |  2863       fts3ExprFreeSegReaders(pExpr); | 
|  3578 /* |  2864  | 
|  3579 ** Compute all offsets for the current row of the query.   |  | 
|  3580 ** If the offsets have already been computed, this routine is a no-op. |  | 
|  3581 */ |  | 
|  3582 static void snippetAllOffsets(fulltext_cursor *p){ |  | 
|  3583   int nColumn; |  | 
|  3584   int iColumn, i; |  | 
|  3585   int iFirst, iLast; |  | 
|  3586   int iTerm = 0; |  | 
|  3587   fulltext_vtab *pFts = cursor_vtab(p); |  | 
|  3588  |  | 
|  3589   if( p->snippet.nMatch || p->pExpr==0 ){ |  | 
|  3590     return; |  | 
|  3591   } |  | 
|  3592   nColumn = pFts->nColumn; |  | 
|  3593   iColumn = (p->iCursorType - QUERY_FULLTEXT); |  | 
|  3594   if( iColumn<0 || iColumn>=nColumn ){ |  | 
|  3595     /* Look for matches over all columns of the full-text index */ |  | 
|  3596     iFirst = 0; |  | 
|  3597     iLast = nColumn-1; |  | 
|  3598   }else{ |  | 
|  3599     /* Look for matches in the iColumn-th column of the index only */ |  | 
|  3600     iFirst = iColumn; |  | 
|  3601     iLast = iColumn; |  | 
|  3602   } |  | 
|  3603   for(i=iFirst; i<=iLast; i++){ |  | 
|  3604     const char *zDoc; |  | 
|  3605     int nDoc; |  | 
|  3606     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); |  | 
|  3607     nDoc = sqlite3_column_bytes(p->pStmt, i+1); |  | 
|  3608     snippetOffsetsOfColumn(p, &p->snippet, i, zDoc, nDoc); |  | 
|  3609   } |  | 
|  3610  |  | 
|  3611   while( trimSnippetOffsets(p->pExpr, &p->snippet, &iTerm) ){ |  | 
|  3612     iTerm = 0; |  | 
|  3613   } |  | 
|  3614 } |  | 
|  3615  |  | 
|  3616 /* |  | 
|  3617 ** Convert the information in the aMatch[] array of the snippet |  | 
|  3618 ** into the string zOffset[0..nOffset-1]. This string is used as |  | 
|  3619 ** the return of the SQL offsets() function. |  | 
|  3620 */ |  | 
|  3621 static void snippetOffsetText(Snippet *p){ |  | 
|  3622   int i; |  | 
|  3623   int cnt = 0; |  | 
|  3624   StringBuffer sb; |  | 
|  3625   char zBuf[200]; |  | 
|  3626   if( p->zOffset ) return; |  | 
|  3627   initStringBuffer(&sb); |  | 
|  3628   for(i=0; i<p->nMatch; i++){ |  | 
|  3629     struct snippetMatch *pMatch = &p->aMatch[i]; |  | 
|  3630     if( pMatch->iTerm>=0 ){ |  | 
|  3631       /* If snippetMatch.iTerm is less than 0, then the match was  |  | 
|  3632       ** discarded as part of processing the NEAR operator (see the  |  | 
|  3633       ** trimSnippetOffsetsForNear() function for details). Ignore  |  | 
|  3634       ** it in this case |  | 
|  3635       */ |  | 
|  3636       zBuf[0] = ' '; |  | 
|  3637       sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", |  | 
|  3638           pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); |  | 
|  3639       append(&sb, zBuf); |  | 
|  3640       cnt++; |  | 
|  3641     } |  | 
|  3642   } |  | 
|  3643   p->zOffset = stringBufferData(&sb); |  | 
|  3644   p->nOffset = stringBufferLength(&sb); |  | 
|  3645 } |  | 
|  3646  |  | 
|  3647 /* |  | 
|  3648 ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set |  | 
|  3649 ** of matching words some of which might be in zDoc.  zDoc is column |  | 
|  3650 ** number iCol. |  | 
|  3651 ** |  | 
|  3652 ** iBreak is suggested spot in zDoc where we could begin or end an |  | 
|  3653 ** excerpt.  Return a value similar to iBreak but possibly adjusted |  | 
|  3654 ** to be a little left or right so that the break point is better. |  | 
|  3655 */ |  | 
|  3656 static int wordBoundary( |  | 
|  3657   int iBreak,                   /* The suggested break point */ |  | 
|  3658   const char *zDoc,             /* Document text */ |  | 
|  3659   int nDoc,                     /* Number of bytes in zDoc[] */ |  | 
|  3660   struct snippetMatch *aMatch,  /* Matching words */ |  | 
|  3661   int nMatch,                   /* Number of entries in aMatch[] */ |  | 
|  3662   int iCol                      /* The column number for zDoc[] */ |  | 
|  3663 ){ |  | 
|  3664   int i; |  | 
|  3665   if( iBreak<=10 ){ |  | 
|  3666     return 0; |  | 
|  3667   } |  | 
|  3668   if( iBreak>=nDoc-10 ){ |  | 
|  3669     return nDoc; |  | 
|  3670   } |  | 
|  3671   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){} |  | 
|  3672   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; } |  | 
|  3673   if( i<nMatch ){ |  | 
|  3674     if( aMatch[i].iStart<iBreak+10 ){ |  | 
|  3675       return aMatch[i].iStart; |  | 
|  3676     } |  | 
|  3677     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ |  | 
|  3678       return aMatch[i-1].iStart; |  | 
|  3679     } |  | 
|  3680   } |  | 
|  3681   for(i=1; i<=10; i++){ |  | 
|  3682     if( safe_isspace(zDoc[iBreak-i]) ){ |  | 
|  3683       return iBreak - i + 1; |  | 
|  3684     } |  | 
|  3685     if( safe_isspace(zDoc[iBreak+i]) ){ |  | 
|  3686       return iBreak + i + 1; |  | 
|  3687     } |  | 
|  3688   } |  | 
|  3689   return iBreak; |  | 
|  3690 } |  | 
|  3691  |  | 
|  3692  |  | 
|  3693  |  | 
|  3694 /* |  | 
|  3695 ** Allowed values for Snippet.aMatch[].snStatus |  | 
|  3696 */ |  | 
|  3697 #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */ |  | 
|  3698 #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */ |  | 
|  3699  |  | 
|  3700 /* |  | 
|  3701 ** Generate the text of a snippet. |  | 
|  3702 */ |  | 
|  3703 static void snippetText( |  | 
|  3704   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */ |  | 
|  3705   const char *zStartMark,     /* Markup to appear before each match */ |  | 
|  3706   const char *zEndMark,       /* Markup to appear after each match */ |  | 
|  3707   const char *zEllipsis       /* Ellipsis mark */ |  | 
|  3708 ){ |  | 
|  3709   int i, j; |  | 
|  3710   struct snippetMatch *aMatch; |  | 
|  3711   int nMatch; |  | 
|  3712   int nDesired; |  | 
|  3713   StringBuffer sb; |  | 
|  3714   int tailCol; |  | 
|  3715   int tailOffset; |  | 
|  3716   int iCol; |  | 
|  3717   int nDoc; |  | 
|  3718   const char *zDoc; |  | 
|  3719   int iStart, iEnd; |  | 
|  3720   int tailEllipsis = 0; |  | 
|  3721   int iMatch; |  | 
|  3722    |  | 
|  3723  |  | 
|  3724   sqlite3_free(pCursor->snippet.zSnippet); |  | 
|  3725   pCursor->snippet.zSnippet = 0; |  | 
|  3726   aMatch = pCursor->snippet.aMatch; |  | 
|  3727   nMatch = pCursor->snippet.nMatch; |  | 
|  3728   initStringBuffer(&sb); |  | 
|  3729  |  | 
|  3730   for(i=0; i<nMatch; i++){ |  | 
|  3731     aMatch[i].snStatus = SNIPPET_IGNORE; |  | 
|  3732   } |  | 
|  3733   nDesired = 0; |  | 
|  3734   for(i=0; i<FTS3_ROTOR_SZ; i++){ |  | 
|  3735     for(j=0; j<nMatch; j++){ |  | 
|  3736       if( aMatch[j].iTerm==i ){ |  | 
|  3737         aMatch[j].snStatus = SNIPPET_DESIRED; |  | 
|  3738         nDesired++; |  | 
|  3739         break; |  | 
|  3740       } |  | 
|  3741     } |  | 
|  3742   } |  | 
|  3743  |  | 
|  3744   iMatch = 0; |  | 
|  3745   tailCol = -1; |  | 
|  3746   tailOffset = 0; |  | 
|  3747   for(i=0; i<nMatch && nDesired>0; i++){ |  | 
|  3748     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; |  | 
|  3749     nDesired--; |  | 
|  3750     iCol = aMatch[i].iCol; |  | 
|  3751     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); |  | 
|  3752     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); |  | 
|  3753     iStart = aMatch[i].iStart - 40; |  | 
|  3754     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); |  | 
|  3755     if( iStart<=10 ){ |  | 
|  3756       iStart = 0; |  | 
|  3757     } |  | 
|  3758     if( iCol==tailCol && iStart<=tailOffset+20 ){ |  | 
|  3759       iStart = tailOffset; |  | 
|  3760     } |  | 
|  3761     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ |  | 
|  3762       trimWhiteSpace(&sb); |  | 
|  3763       appendWhiteSpace(&sb); |  | 
|  3764       append(&sb, zEllipsis); |  | 
|  3765       appendWhiteSpace(&sb); |  | 
|  3766     } |  | 
|  3767     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; |  | 
|  3768     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); |  | 
|  3769     if( iEnd>=nDoc-10 ){ |  | 
|  3770       iEnd = nDoc; |  | 
|  3771       tailEllipsis = 0; |  | 
|  3772     }else{ |  2865     }else{ | 
|  3773       tailEllipsis = 1; |  2866       char *aLeft; | 
|  3774     } |  2867       char *aRight; | 
|  3775     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; } |  2868       int nLeft; | 
|  3776     while( iStart<iEnd ){ |  2869       int nRight; | 
|  3777       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart |  2870  | 
|  3778              && aMatch[iMatch].iCol<=iCol ){ |  2871       assert( pExpr->eType==FTSQUERY_NEAR  | 
|  3779         iMatch++; |  2872            || pExpr->eType==FTSQUERY_OR | 
|  3780       } |  2873            || pExpr->eType==FTSQUERY_NOT | 
|  3781       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd |  2874            || (pExpr->eType==FTSQUERY_AND && p->eEvalmode==FTS3_EVAL_NEXT) | 
|  3782              && aMatch[iMatch].iCol==iCol ){ |  2875       ); | 
|  3783         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart); |  2876  | 
|  3784         iStart = aMatch[iMatch].iStart; |  2877       if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) | 
|  3785         append(&sb, zStartMark); |  2878        && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) | 
|  3786         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte); |  | 
|  3787         append(&sb, zEndMark); |  | 
|  3788         iStart += aMatch[iMatch].nByte; |  | 
|  3789         for(j=iMatch+1; j<nMatch; j++){ |  | 
|  3790           if( aMatch[j].iTerm==aMatch[iMatch].iTerm |  | 
|  3791               && aMatch[j].snStatus==SNIPPET_DESIRED ){ |  | 
|  3792             nDesired--; |  | 
|  3793             aMatch[j].snStatus = SNIPPET_IGNORE; |  | 
|  3794           } |  | 
|  3795         } |  | 
|  3796       }else{ |  | 
|  3797         nappend(&sb, &zDoc[iStart], iEnd - iStart); |  | 
|  3798         iStart = iEnd; |  | 
|  3799       } |  | 
|  3800     } |  | 
|  3801     tailCol = iCol; |  | 
|  3802     tailOffset = iEnd; |  | 
|  3803   } |  | 
|  3804   trimWhiteSpace(&sb); |  | 
|  3805   if( tailEllipsis ){ |  | 
|  3806     appendWhiteSpace(&sb); |  | 
|  3807     append(&sb, zEllipsis); |  | 
|  3808   } |  | 
|  3809   pCursor->snippet.zSnippet = stringBufferData(&sb); |  | 
|  3810   pCursor->snippet.nSnippet = stringBufferLength(&sb); |  | 
|  3811 } |  | 
|  3812  |  | 
|  3813  |  | 
|  3814 /* |  | 
|  3815 ** Close the cursor.  For additional information see the documentation |  | 
|  3816 ** on the xClose method of the virtual table interface. |  | 
|  3817 */ |  | 
|  3818 static int fulltextClose(sqlite3_vtab_cursor *pCursor){ |  | 
|  3819   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  3820   FTSTRACE(("FTS3 Close %p\n", c)); |  | 
|  3821   sqlite3_finalize(c->pStmt); |  | 
|  3822   sqlite3Fts3ExprFree(c->pExpr); |  | 
|  3823   snippetClear(&c->snippet); |  | 
|  3824   if( c->result.nData!=0 ){ |  | 
|  3825     dlrDestroy(&c->reader); |  | 
|  3826   } |  | 
|  3827   dataBufferDestroy(&c->result); |  | 
|  3828   sqlite3_free(c); |  | 
|  3829   return SQLITE_OK; |  | 
|  3830 } |  | 
|  3831  |  | 
|  3832 static int fulltextNext(sqlite3_vtab_cursor *pCursor){ |  | 
|  3833   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  3834   int rc; |  | 
|  3835  |  | 
|  3836   FTSTRACE(("FTS3 Next %p\n", pCursor)); |  | 
|  3837   snippetClear(&c->snippet); |  | 
|  3838   if( c->iCursorType < QUERY_FULLTEXT ){ |  | 
|  3839     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  3840     rc = sqlite3_step(c->pStmt); |  | 
|  3841     switch( rc ){ |  | 
|  3842       case SQLITE_ROW: |  | 
|  3843         c->eof = 0; |  | 
|  3844         return SQLITE_OK; |  | 
|  3845       case SQLITE_DONE: |  | 
|  3846         c->eof = 1; |  | 
|  3847         return SQLITE_OK; |  | 
|  3848       default: |  | 
|  3849         c->eof = 1; |  | 
|  3850         return rc; |  | 
|  3851     } |  | 
|  3852   } else {  /* full-text query */ |  | 
|  3853     rc = sqlite3_reset(c->pStmt); |  | 
|  3854     if( rc!=SQLITE_OK ) return rc; |  | 
|  3855  |  | 
|  3856     if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ |  | 
|  3857       c->eof = 1; |  | 
|  3858       return SQLITE_OK; |  | 
|  3859     } |  | 
|  3860     rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); |  | 
|  3861     if( rc!=SQLITE_OK ) return rc; |  | 
|  3862     rc = dlrStep(&c->reader); |  | 
|  3863     if( rc!=SQLITE_OK ) return rc; |  | 
|  3864     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  3865     rc = sqlite3_step(c->pStmt); |  | 
|  3866     if( rc==SQLITE_ROW ){   /* the case we expect */ |  | 
|  3867       c->eof = 0; |  | 
|  3868       return SQLITE_OK; |  | 
|  3869     } |  | 
|  3870     /* Corrupt if the index refers to missing document. */ |  | 
|  3871     if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; |  | 
|  3872  |  | 
|  3873     return rc; |  | 
|  3874   } |  | 
|  3875 } |  | 
|  3876  |  | 
|  3877  |  | 
|  3878 /* TODO(shess) If we pushed LeafReader to the top of the file, or to |  | 
|  3879 ** another file, term_select() could be pushed above |  | 
|  3880 ** docListOfTerm(). |  | 
|  3881 */ |  | 
|  3882 static int termSelect(fulltext_vtab *v, int iColumn, |  | 
|  3883                       const char *pTerm, int nTerm, int isPrefix, |  | 
|  3884                       DocListType iType, DataBuffer *out); |  | 
|  3885  |  | 
|  3886 /*  |  | 
|  3887 ** Return a DocList corresponding to the phrase *pPhrase. |  | 
|  3888 ** |  | 
|  3889 ** The resulting DL_DOCIDS doclist is stored in pResult, which is |  | 
|  3890 ** overwritten. |  | 
|  3891 */ |  | 
|  3892 static int docListOfPhrase( |  | 
|  3893   fulltext_vtab *pTab,   /* The full text index */ |  | 
|  3894   Fts3Phrase *pPhrase,   /* Phrase to return a doclist corresponding to */ |  | 
|  3895   DocListType eListType, /* Either DL_DOCIDS or DL_POSITIONS */ |  | 
|  3896   DataBuffer *pResult    /* Write the result here */ |  | 
|  3897 ){ |  | 
|  3898   int ii; |  | 
|  3899   int rc = SQLITE_OK; |  | 
|  3900   int iCol = pPhrase->iColumn; |  | 
|  3901   DocListType eType = eListType; |  | 
|  3902   assert( eType==DL_POSITIONS || eType==DL_DOCIDS ); |  | 
|  3903   if( pPhrase->nToken>1 ){ |  | 
|  3904     eType = DL_POSITIONS; |  | 
|  3905   } |  | 
|  3906  |  | 
|  3907   /* This code should never be called with buffered updates. */ |  | 
|  3908   assert( pTab->nPendingData<0 ); |  | 
|  3909  |  | 
|  3910   for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){ |  | 
|  3911     DataBuffer tmp; |  | 
|  3912     struct PhraseToken *p = &pPhrase->aToken[ii]; |  | 
|  3913     rc = termSelect(pTab, iCol, p->z, p->n, p->isPrefix, eType, &tmp); |  | 
|  3914     if( rc==SQLITE_OK ){ |  | 
|  3915       if( ii==0 ){ |  | 
|  3916         *pResult = tmp; |  | 
|  3917       }else{ |  | 
|  3918         DataBuffer res = *pResult; |  | 
|  3919         dataBufferInit(pResult, 0); |  | 
|  3920         if( ii==(pPhrase->nToken-1) ){ |  | 
|  3921           eType = eListType; |  | 
|  3922         } |  | 
|  3923         rc = docListPhraseMerge( |  | 
|  3924           res.pData, res.nData, tmp.pData, tmp.nData, 0, 0, eType, pResult |  | 
|  3925         ); |  | 
|  3926         dataBufferDestroy(&res); |  | 
|  3927         dataBufferDestroy(&tmp); |  | 
|  3928         if( rc!= SQLITE_OK ) return rc; |  | 
|  3929       } |  | 
|  3930     } |  | 
|  3931   } |  | 
|  3932  |  | 
|  3933   return rc; |  | 
|  3934 } |  | 
|  3935  |  | 
|  3936 /* |  | 
|  3937 ** Evaluate the full-text expression pExpr against fts3 table pTab. Write |  | 
|  3938 ** the results into pRes. |  | 
|  3939 */ |  | 
|  3940 static int evalFts3Expr( |  | 
|  3941   fulltext_vtab *pTab,           /* Fts3 Virtual table object */ |  | 
|  3942   Fts3Expr *pExpr,               /* Parsed fts3 expression */ |  | 
|  3943   DataBuffer *pRes               /* OUT: Write results of the expression here */ |  | 
|  3944 ){ |  | 
|  3945   int rc = SQLITE_OK; |  | 
|  3946  |  | 
|  3947   /* Initialize the output buffer. If this is an empty query (pExpr==0),  |  | 
|  3948   ** this is all that needs to be done. Empty queries produce empty  |  | 
|  3949   ** result sets. |  | 
|  3950   */ |  | 
|  3951   dataBufferInit(pRes, 0); |  | 
|  3952  |  | 
|  3953   if( pExpr ){ |  | 
|  3954     if( pExpr->eType==FTSQUERY_PHRASE ){ |  | 
|  3955       DocListType eType = DL_DOCIDS; |  | 
|  3956       if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ |  | 
|  3957         eType = DL_POSITIONS; |  | 
|  3958       } |  | 
|  3959       rc = docListOfPhrase(pTab, pExpr->pPhrase, eType, pRes); |  | 
|  3960     }else{ |  | 
|  3961       DataBuffer lhs; |  | 
|  3962       DataBuffer rhs; |  | 
|  3963  |  | 
|  3964       dataBufferInit(&rhs, 0); |  | 
|  3965       if( SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pLeft, &lhs))  |  | 
|  3966        && SQLITE_OK==(rc = evalFts3Expr(pTab, pExpr->pRight, &rhs))  |  | 
|  3967       ){ |  2879       ){ | 
|  3968         switch( pExpr->eType ){ |  2880         switch( pExpr->eType ){ | 
|  3969           case FTSQUERY_NEAR: { |  2881           case FTSQUERY_NEAR: { | 
|  3970             int nToken; |  | 
|  3971             Fts3Expr *pLeft; |  2882             Fts3Expr *pLeft; | 
|  3972             DocListType eType = DL_DOCIDS; |  2883             Fts3Expr *pRight; | 
 |  2884             int mergetype = MERGE_NEAR; | 
|  3973             if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ |  2885             if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ | 
|  3974               eType = DL_POSITIONS; |  2886               mergetype = MERGE_POS_NEAR; | 
|  3975             } |  2887             } | 
|  3976             pLeft = pExpr->pLeft; |  2888             pLeft = pExpr->pLeft; | 
|  3977             while( pLeft->eType==FTSQUERY_NEAR ){  |  2889             while( pLeft->eType==FTSQUERY_NEAR ){  | 
|  3978               pLeft=pLeft->pRight; |  2890               pLeft=pLeft->pRight; | 
|  3979             } |  2891             } | 
|  3980             assert( pExpr->pRight->eType==FTSQUERY_PHRASE ); |  2892             pRight = pExpr->pRight; | 
 |  2893             assert( pRight->eType==FTSQUERY_PHRASE ); | 
|  3981             assert( pLeft->eType==FTSQUERY_PHRASE ); |  2894             assert( pLeft->eType==FTSQUERY_PHRASE ); | 
|  3982             nToken = pLeft->pPhrase->nToken + pExpr->pRight->pPhrase->nToken; |  2895  | 
|  3983             rc = docListPhraseMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,  |  2896             rc = fts3NearMerge(mergetype, pExpr->nNear,  | 
|  3984                 pExpr->nNear+1, nToken, eType, pRes |  2897                 pLeft->pPhrase->nToken, aLeft, nLeft, | 
 |  2898                 pRight->pPhrase->nToken, aRight, nRight, | 
 |  2899                 paOut, pnOut | 
|  3985             ); |  2900             ); | 
 |  2901             sqlite3_free(aLeft); | 
|  3986             break; |  2902             break; | 
|  3987           } |  2903           } | 
|  3988           case FTSQUERY_NOT: { |  2904  | 
|  3989             rc = docListExceptMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData,p
      Res); |  2905           case FTSQUERY_OR: { | 
 |  2906             /* Allocate a buffer for the output. The maximum size is the | 
 |  2907             ** sum of the sizes of the two input buffers. The +1 term is | 
 |  2908             ** so that a buffer of zero bytes is never allocated - this can | 
 |  2909             ** cause fts3DoclistMerge() to incorrectly return SQLITE_NOMEM. | 
 |  2910             */ | 
 |  2911             char *aBuffer = sqlite3_malloc(nRight+nLeft+1); | 
 |  2912             rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut, | 
 |  2913                 aLeft, nLeft, aRight, nRight, 0 | 
 |  2914             ); | 
 |  2915             *paOut = aBuffer; | 
 |  2916             sqlite3_free(aLeft); | 
|  3990             break; |  2917             break; | 
|  3991           } |  2918           } | 
|  3992           case FTSQUERY_AND: { |  2919  | 
|  3993             rc = docListAndMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRe
      s); |  2920           default: { | 
|  3994             break; |  2921             assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND ); | 
|  3995           } |  2922             fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut, | 
|  3996           case FTSQUERY_OR: { |  2923                 aLeft, nLeft, aRight, nRight, 0 | 
|  3997             rc = docListOrMerge(lhs.pData, lhs.nData, rhs.pData, rhs.nData, pRes
      ); |  2924             ); | 
 |  2925             *paOut = aLeft; | 
|  3998             break; |  2926             break; | 
|  3999           } |  2927           } | 
|  4000         } |  2928         } | 
|  4001       } |  2929       } | 
|  4002       dataBufferDestroy(&lhs); |  2930       sqlite3_free(aRight); | 
|  4003       dataBufferDestroy(&rhs); |  2931     } | 
|  4004     } |  2932   } | 
|  4005   } |  2933  | 
|  4006  |  2934   assert( rc==SQLITE_OK || *paOut==0 ); | 
|  4007   return rc; |  2935   return rc; | 
|  4008 } |  2936 } | 
|  4009  |  2937  | 
|  4010 /* TODO(shess) Refactor the code to remove this forward decl. */ |  2938 /* | 
|  4011 static int flushPendingTerms(fulltext_vtab *v); |  2939 ** This function is called from within xNext() for each row visited by | 
|  4012  |  2940 ** an FTS3 query. If evaluating the FTS3 query expression within xFilter() | 
|  4013 /* Perform a full-text query using the search expression in |  2941 ** was able to determine the exact set of matching rows, this function sets | 
|  4014 ** zInput[0..nInput-1].  Return a list of matching documents |  2942 ** *pbRes to true and returns SQLITE_IO immediately. | 
|  4015 ** in pResult. |  2943 ** | 
|  4016 ** |  2944 ** Otherwise, if evaluating the query expression within xFilter() returned a | 
|  4017 ** Queries must match column iColumn.  Or if iColumn>=nColumn |  2945 ** superset of the matching documents instead of an exact set (this happens | 
|  4018 ** they are allowed to match against any column. |  2946 ** when the query includes very common tokens and it is deemed too expensive to | 
|  4019 */ |  2947 ** load their doclists from disk), this function tests if the current row | 
|  4020 static int fulltextQuery( |  2948 ** really does match the FTS3 query. | 
|  4021   fulltext_vtab *v,      /* The full text index */ |  2949 ** | 
|  4022   int iColumn,           /* Match against this column by default */ |  2950 ** If an error occurs, an SQLite error code is returned. Otherwise, SQLITE_OK | 
|  4023   const char *zInput,    /* The query string */ |  2951 ** is returned and *pbRes is set to true if the current row matches the | 
|  4024   int nInput,            /* Number of bytes in zInput[] */ |  2952 ** FTS3 query (and should be included in the results returned to SQLite), or | 
|  4025   DataBuffer *pResult,   /* Write the result doclist here */ |  2953 ** false otherwise. | 
|  4026   Fts3Expr **ppExpr        /* Put parsed query string here */ |  2954 */ | 
|  4027 ){ |  2955 static int fts3EvalDeferred( | 
|  4028   int rc; |  2956   Fts3Cursor *pCsr,               /* FTS3 cursor pointing at row to test */ | 
|  4029  |  2957   int *pbRes                      /* OUT: Set to true if row is a match */ | 
|  4030   /* TODO(shess) Instead of flushing pendingTerms, we could query for |  2958 ){ | 
|  4031   ** the relevant term and merge the doclist into what we receive from |  2959   int rc = SQLITE_OK; | 
|  4032   ** the database.  Wait and see if this is a common issue, first. |  2960   if( pCsr->pDeferred==0 ){ | 
|  4033   ** |  2961     *pbRes = 1; | 
|  4034   ** A good reason not to flush is to not generate update-related |  2962   }else{ | 
|  4035   ** error codes from here. |  2963     rc = fts3CursorSeek(0, pCsr); | 
|  4036   */ |  2964     if( rc==SQLITE_OK ){ | 
|  4037  |  2965       sqlite3Fts3FreeDeferredDoclists(pCsr); | 
|  4038   /* Flush any buffered updates before executing the query. */ |  2966       rc = sqlite3Fts3CacheDeferredDoclists(pCsr); | 
|  4039   rc = flushPendingTerms(v); |  2967     } | 
|  4040   if( rc!=SQLITE_OK ){ |  2968     if( rc==SQLITE_OK ){ | 
|  4041     return rc; |  2969       char *a = 0; | 
|  4042   } |  2970       int n = 0; | 
|  4043  |  2971       rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); | 
|  4044   /* Parse the query passed to the MATCH operator. */ |  2972       assert( n>=0 ); | 
|  4045   rc = sqlite3Fts3ExprParse(v->pTokenizer,  |  2973       *pbRes = (n>0); | 
|  4046       v->azColumn, v->nColumn, iColumn, zInput, nInput, ppExpr |  2974       sqlite3_free(a); | 
|  4047   ); |  2975     } | 
|  4048   if( rc!=SQLITE_OK ){ |  2976   } | 
|  4049     assert( 0==(*ppExpr) ); |  2977   return rc; | 
|  4050     return rc; |  2978 } | 
|  4051   } |  2979  | 
|  4052  |  2980 /* | 
|  4053   return evalFts3Expr(v, *ppExpr, pResult); |  2981 ** Advance the cursor to the next row in the %_content table that | 
 |  2982 ** matches the search criteria.  For a MATCH search, this will be | 
 |  2983 ** the next row that matches. For a full-table scan, this will be | 
 |  2984 ** simply the next row in the %_content table.  For a docid lookup, | 
 |  2985 ** this routine simply sets the EOF flag. | 
 |  2986 ** | 
 |  2987 ** Return SQLITE_OK if nothing goes wrong.  SQLITE_OK is returned | 
 |  2988 ** even if we reach end-of-file.  The fts3EofMethod() will be called | 
 |  2989 ** subsequently to determine whether or not an EOF was hit. | 
 |  2990 */ | 
 |  2991 static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ | 
 |  2992   int res; | 
 |  2993   int rc = SQLITE_OK;             /* Return code */ | 
 |  2994   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; | 
 |  2995  | 
 |  2996   pCsr->eEvalmode = FTS3_EVAL_NEXT; | 
 |  2997   do { | 
 |  2998     if( pCsr->aDoclist==0 ){ | 
 |  2999       if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ | 
 |  3000         pCsr->isEof = 1; | 
 |  3001         rc = sqlite3_reset(pCsr->pStmt); | 
 |  3002         break; | 
 |  3003       } | 
 |  3004       pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); | 
 |  3005     }else{ | 
 |  3006       if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ | 
 |  3007         pCsr->isEof = 1; | 
 |  3008         break; | 
 |  3009       } | 
 |  3010       sqlite3_reset(pCsr->pStmt); | 
 |  3011       fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); | 
 |  3012       pCsr->isRequireSeek = 1; | 
 |  3013       pCsr->isMatchinfoNeeded = 1; | 
 |  3014     } | 
 |  3015   }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); | 
 |  3016  | 
 |  3017   return rc; | 
|  4054 } |  3018 } | 
|  4055  |  3019  | 
|  4056 /* |  3020 /* | 
|  4057 ** This is the xFilter interface for the virtual table.  See |  3021 ** This is the xFilter interface for the virtual table.  See | 
|  4058 ** the virtual table xFilter method documentation for additional |  3022 ** the virtual table xFilter method documentation for additional | 
|  4059 ** information. |  3023 ** information. | 
|  4060 ** |  3024 ** | 
|  4061 ** If idxNum==QUERY_GENERIC then do a full table scan against |  3025 ** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against | 
|  4062 ** the %_content table. |  3026 ** the %_content table. | 
|  4063 ** |  3027 ** | 
|  4064 ** If idxNum==QUERY_DOCID then do a docid lookup for a single entry |  3028 ** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry | 
|  4065 ** in the %_content table. |  3029 ** in the %_content table. | 
|  4066 ** |  3030 ** | 
|  4067 ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The |  3031 ** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index.  The | 
|  4068 ** column on the left-hand side of the MATCH operator is column |  3032 ** column on the left-hand side of the MATCH operator is column | 
|  4069 ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand |  3033 ** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed.  argv[0] is the right-hand | 
|  4070 ** side of the MATCH operator. |  3034 ** side of the MATCH operator. | 
|  4071 */ |  3035 */ | 
|  4072 /* TODO(shess) Upgrade the cursor initialization and destruction to |  3036 static int fts3FilterMethod( | 
|  4073 ** account for fulltextFilter() being called multiple times on the |  3037   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */ | 
|  4074 ** same cursor.  The current solution is very fragile.  Apply fix to |  3038   int idxNum,                     /* Strategy index */ | 
|  4075 ** fts3 as appropriate. |  3039   const char *idxStr,             /* Unused */ | 
|  4076 */ |  3040   int nVal,                       /* Number of elements in apVal */ | 
|  4077 static int fulltextFilter( |  3041   sqlite3_value **apVal           /* Arguments for the indexing scheme */ | 
|  4078   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */ |  3042 ){ | 
|  4079   int idxNum, const char *idxStr,   /* Which indexing scheme to use */ |  3043   const char *azSql[] = { | 
|  4080   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */ |  3044     "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ | 
|  4081 ){ |  3045     "SELECT %s FROM %Q.'%q_content' AS x ",                /* full-scan */ | 
|  4082   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  3046   }; | 
|  4083   fulltext_vtab *v = cursor_vtab(c); |  3047   int rc;                         /* Return code */ | 
 |  3048   char *zSql;                     /* SQL statement used to access %_content */ | 
 |  3049   Fts3Table *p = (Fts3Table *)pCursor->pVtab; | 
 |  3050   Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; | 
 |  3051  | 
 |  3052   UNUSED_PARAMETER(idxStr); | 
 |  3053   UNUSED_PARAMETER(nVal); | 
 |  3054  | 
 |  3055   assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); | 
 |  3056   assert( nVal==0 || nVal==1 ); | 
 |  3057   assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); | 
 |  3058   assert( p->pSegments==0 ); | 
 |  3059  | 
 |  3060   /* In case the cursor has been used before, clear it now. */ | 
 |  3061   sqlite3_finalize(pCsr->pStmt); | 
 |  3062   sqlite3_free(pCsr->aDoclist); | 
 |  3063   sqlite3Fts3ExprFree(pCsr->pExpr); | 
 |  3064   memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); | 
 |  3065  | 
 |  3066   if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ | 
 |  3067     int iCol = idxNum-FTS3_FULLTEXT_SEARCH; | 
 |  3068     const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); | 
 |  3069  | 
 |  3070     if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ | 
 |  3071       return SQLITE_NOMEM; | 
 |  3072     } | 
 |  3073  | 
 |  3074     rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn,  | 
 |  3075         iCol, zQuery, -1, &pCsr->pExpr | 
 |  3076     ); | 
 |  3077     if( rc!=SQLITE_OK ){ | 
 |  3078       if( rc==SQLITE_ERROR ){ | 
 |  3079         p->base.zErrMsg = sqlite3_mprintf("malformed MATCH expression: [%s]", | 
 |  3080                                           zQuery); | 
 |  3081       } | 
 |  3082       return rc; | 
 |  3083     } | 
 |  3084  | 
 |  3085     rc = sqlite3Fts3ReadLock(p); | 
 |  3086     if( rc!=SQLITE_OK ) return rc; | 
 |  3087  | 
 |  3088     rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); | 
 |  3089     sqlite3Fts3SegmentsClose(p); | 
 |  3090     if( rc!=SQLITE_OK ) return rc; | 
 |  3091     pCsr->pNextId = pCsr->aDoclist; | 
 |  3092     pCsr->iPrevId = 0; | 
 |  3093   } | 
 |  3094  | 
 |  3095   /* Compile a SELECT statement for this cursor. For a full-table-scan, the | 
 |  3096   ** statement loops through all rows of the %_content table. For a | 
 |  3097   ** full-text query or docid lookup, the statement retrieves a single | 
 |  3098   ** row by docid. | 
 |  3099   */ | 
 |  3100   zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH]; | 
 |  3101   zSql = sqlite3_mprintf(zSql, p->zReadExprlist, p->zDb, p->zName); | 
 |  3102   if( !zSql ){ | 
 |  3103     rc = SQLITE_NOMEM; | 
 |  3104   }else{ | 
 |  3105     rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); | 
 |  3106     sqlite3_free(zSql); | 
 |  3107   } | 
 |  3108   if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){ | 
 |  3109     rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); | 
 |  3110   } | 
 |  3111   pCsr->eSearch = (i16)idxNum; | 
 |  3112  | 
 |  3113   if( rc!=SQLITE_OK ) return rc; | 
 |  3114   return fts3NextMethod(pCursor); | 
 |  3115 } | 
 |  3116  | 
 |  3117 /*  | 
 |  3118 ** This is the xEof method of the virtual table. SQLite calls this  | 
 |  3119 ** routine to find out if it has reached the end of a result set. | 
 |  3120 */ | 
 |  3121 static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ | 
 |  3122   return ((Fts3Cursor *)pCursor)->isEof; | 
 |  3123 } | 
 |  3124  | 
 |  3125 /*  | 
 |  3126 ** This is the xRowid method. The SQLite core calls this routine to | 
 |  3127 ** retrieve the rowid for the current row of the result set. fts3 | 
 |  3128 ** exposes %_content.docid as the rowid for the virtual table. The | 
 |  3129 ** rowid should be written to *pRowid. | 
 |  3130 */ | 
 |  3131 static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ | 
 |  3132   Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; | 
 |  3133   if( pCsr->aDoclist ){ | 
 |  3134     *pRowid = pCsr->iPrevId; | 
 |  3135   }else{ | 
 |  3136     /* This branch runs if the query is implemented using a full-table scan | 
 |  3137     ** (not using the full-text index). In this case grab the rowid from the | 
 |  3138     ** SELECT statement. | 
 |  3139     */ | 
 |  3140     assert( pCsr->isRequireSeek==0 ); | 
 |  3141     *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); | 
 |  3142   } | 
 |  3143   return SQLITE_OK; | 
 |  3144 } | 
 |  3145  | 
 |  3146 /*  | 
 |  3147 ** This is the xColumn method, called by SQLite to request a value from | 
 |  3148 ** the row that the supplied cursor currently points to. | 
 |  3149 */ | 
 |  3150 static int fts3ColumnMethod( | 
 |  3151   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */ | 
 |  3152   sqlite3_context *pContext,      /* Context for sqlite3_result_xxx() calls */ | 
 |  3153   int iCol                        /* Index of column to read value from */ | 
 |  3154 ){ | 
 |  3155   int rc;                         /* Return Code */ | 
 |  3156   Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; | 
 |  3157   Fts3Table *p = (Fts3Table *)pCursor->pVtab; | 
 |  3158  | 
 |  3159   /* The column value supplied by SQLite must be in range. */ | 
 |  3160   assert( iCol>=0 && iCol<=p->nColumn+1 ); | 
 |  3161  | 
 |  3162   if( iCol==p->nColumn+1 ){ | 
 |  3163     /* This call is a request for the "docid" column. Since "docid" is an  | 
 |  3164     ** alias for "rowid", use the xRowid() method to obtain the value. | 
 |  3165     */ | 
 |  3166     sqlite3_int64 iRowid; | 
 |  3167     rc = fts3RowidMethod(pCursor, &iRowid); | 
 |  3168     sqlite3_result_int64(pContext, iRowid); | 
 |  3169   }else if( iCol==p->nColumn ){ | 
 |  3170     /* The extra column whose name is the same as the table. | 
 |  3171     ** Return a blob which is a pointer to the cursor. | 
 |  3172     */ | 
 |  3173     sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); | 
 |  3174     rc = SQLITE_OK; | 
 |  3175   }else{ | 
 |  3176     rc = fts3CursorSeek(0, pCsr); | 
 |  3177     if( rc==SQLITE_OK ){ | 
 |  3178       sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1)); | 
 |  3179     } | 
 |  3180   } | 
 |  3181   return rc; | 
 |  3182 } | 
 |  3183  | 
 |  3184 /*  | 
 |  3185 ** This function is the implementation of the xUpdate callback used by  | 
 |  3186 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be | 
 |  3187 ** inserted, updated or deleted. | 
 |  3188 */ | 
 |  3189 static int fts3UpdateMethod( | 
 |  3190   sqlite3_vtab *pVtab,            /* Virtual table handle */ | 
 |  3191   int nArg,                       /* Size of argument array */ | 
 |  3192   sqlite3_value **apVal,          /* Array of arguments */ | 
 |  3193   sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */ | 
 |  3194 ){ | 
 |  3195   return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid); | 
 |  3196 } | 
 |  3197  | 
 |  3198 /* | 
 |  3199 ** Implementation of xSync() method. Flush the contents of the pending-terms | 
 |  3200 ** hash-table to the database. | 
 |  3201 */ | 
 |  3202 static int fts3SyncMethod(sqlite3_vtab *pVtab){ | 
 |  3203   int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab); | 
 |  3204   sqlite3Fts3SegmentsClose((Fts3Table *)pVtab); | 
 |  3205   return rc; | 
 |  3206 } | 
 |  3207  | 
 |  3208 /* | 
 |  3209 ** Implementation of xBegin() method. This is a no-op. | 
 |  3210 */ | 
 |  3211 static int fts3BeginMethod(sqlite3_vtab *pVtab){ | 
 |  3212   UNUSED_PARAMETER(pVtab); | 
 |  3213   assert( ((Fts3Table *)pVtab)->nPendingData==0 ); | 
 |  3214   return SQLITE_OK; | 
 |  3215 } | 
 |  3216  | 
 |  3217 /* | 
 |  3218 ** Implementation of xCommit() method. This is a no-op. The contents of | 
 |  3219 ** the pending-terms hash-table have already been flushed into the database | 
 |  3220 ** by fts3SyncMethod(). | 
 |  3221 */ | 
 |  3222 static int fts3CommitMethod(sqlite3_vtab *pVtab){ | 
 |  3223   UNUSED_PARAMETER(pVtab); | 
 |  3224   assert( ((Fts3Table *)pVtab)->nPendingData==0 ); | 
 |  3225   return SQLITE_OK; | 
 |  3226 } | 
 |  3227  | 
 |  3228 /* | 
 |  3229 ** Implementation of xRollback(). Discard the contents of the pending-terms | 
 |  3230 ** hash-table. Any changes made to the database are reverted by SQLite. | 
 |  3231 */ | 
 |  3232 static int fts3RollbackMethod(sqlite3_vtab *pVtab){ | 
 |  3233   sqlite3Fts3PendingTermsClear((Fts3Table *)pVtab); | 
 |  3234   return SQLITE_OK; | 
 |  3235 } | 
 |  3236  | 
 |  3237 /* | 
 |  3238 ** Load the doclist associated with expression pExpr to pExpr->aDoclist. | 
 |  3239 ** The loaded doclist contains positions as well as the document ids. | 
 |  3240 ** This is used by the matchinfo(), snippet() and offsets() auxillary | 
 |  3241 ** functions. | 
 |  3242 */ | 
 |  3243 int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ | 
|  4084   int rc; |  3244   int rc; | 
|  4085  |  3245   assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); | 
|  4086   FTSTRACE(("FTS3 Filter %p\n",pCursor)); |  3246   assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); | 
|  4087  |  3247   rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); | 
|  4088   /* If the cursor has a statement that was not prepared according to |  3248   return rc; | 
|  4089   ** idxNum, clear it.  I believe all calls to fulltextFilter with a |  3249 } | 
|  4090   ** given cursor will have the same idxNum , but in this case it's |  3250  | 
|  4091   ** easy to be safe. |  3251 int sqlite3Fts3ExprLoadFtDoclist( | 
 |  3252   Fts3Cursor *pCsr,  | 
 |  3253   Fts3Expr *pExpr, | 
 |  3254   char **paDoclist, | 
 |  3255   int *pnDoclist | 
 |  3256 ){ | 
 |  3257   int rc; | 
 |  3258   assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); | 
 |  3259   assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); | 
 |  3260   pCsr->eEvalmode = FTS3_EVAL_MATCHINFO; | 
 |  3261   rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1); | 
 |  3262   pCsr->eEvalmode = FTS3_EVAL_NEXT; | 
 |  3263   return rc; | 
 |  3264 } | 
 |  3265  | 
 |  3266 /* | 
 |  3267 ** After ExprLoadDoclist() (see above) has been called, this function is | 
 |  3268 ** used to iterate/search through the position lists that make up the doclist | 
 |  3269 ** stored in pExpr->aDoclist. | 
 |  3270 */ | 
 |  3271 char *sqlite3Fts3FindPositions( | 
 |  3272   Fts3Expr *pExpr,                /* Access this expressions doclist */ | 
 |  3273   sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */ | 
 |  3274   int iCol                        /* Column of requested pos-list */ | 
 |  3275 ){ | 
 |  3276   assert( pExpr->isLoaded ); | 
 |  3277   if( pExpr->aDoclist ){ | 
 |  3278     char *pEnd = &pExpr->aDoclist[pExpr->nDoclist]; | 
 |  3279     char *pCsr; | 
 |  3280  | 
 |  3281     if( pExpr->pCurrent==0 ){ | 
 |  3282       pExpr->pCurrent = pExpr->aDoclist; | 
 |  3283       pExpr->iCurrent = 0; | 
 |  3284       pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent); | 
 |  3285     } | 
 |  3286     pCsr = pExpr->pCurrent; | 
 |  3287     assert( pCsr ); | 
 |  3288  | 
 |  3289     while( pCsr<pEnd ){ | 
 |  3290       if( pExpr->iCurrent<iDocid ){ | 
 |  3291         fts3PoslistCopy(0, &pCsr); | 
 |  3292         if( pCsr<pEnd ){ | 
 |  3293           fts3GetDeltaVarint(&pCsr, &pExpr->iCurrent); | 
 |  3294         } | 
 |  3295         pExpr->pCurrent = pCsr; | 
 |  3296       }else{ | 
 |  3297         if( pExpr->iCurrent==iDocid ){ | 
 |  3298           int iThis = 0; | 
 |  3299           if( iCol<0 ){ | 
 |  3300             /* If iCol is negative, return a pointer to the start of the | 
 |  3301             ** position-list (instead of a pointer to the start of a list | 
 |  3302             ** of offsets associated with a specific column). | 
 |  3303             */ | 
 |  3304             return pCsr; | 
 |  3305           } | 
 |  3306           while( iThis<iCol ){ | 
 |  3307             fts3ColumnlistCopy(0, &pCsr); | 
 |  3308             if( *pCsr==0x00 ) return 0; | 
 |  3309             pCsr++; | 
 |  3310             pCsr += sqlite3Fts3GetVarint32(pCsr, &iThis); | 
 |  3311           } | 
 |  3312           if( iCol==iThis && (*pCsr&0xFE) ) return pCsr; | 
 |  3313         } | 
 |  3314         return 0; | 
 |  3315       } | 
 |  3316     } | 
 |  3317   } | 
 |  3318  | 
 |  3319   return 0; | 
 |  3320 } | 
 |  3321  | 
 |  3322 /* | 
 |  3323 ** Helper function used by the implementation of the overloaded snippet(), | 
 |  3324 ** offsets() and optimize() SQL functions. | 
 |  3325 ** | 
 |  3326 ** If the value passed as the third argument is a blob of size | 
 |  3327 ** sizeof(Fts3Cursor*), then the blob contents are copied to the  | 
 |  3328 ** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error | 
 |  3329 ** message is written to context pContext and SQLITE_ERROR returned. The | 
 |  3330 ** string passed via zFunc is used as part of the error message. | 
 |  3331 */ | 
 |  3332 static int fts3FunctionArg( | 
 |  3333   sqlite3_context *pContext,      /* SQL function call context */ | 
 |  3334   const char *zFunc,              /* Function name */ | 
 |  3335   sqlite3_value *pVal,            /* argv[0] passed to function */ | 
 |  3336   Fts3Cursor **ppCsr              /* OUT: Store cursor handle here */ | 
 |  3337 ){ | 
 |  3338   Fts3Cursor *pRet; | 
 |  3339   if( sqlite3_value_type(pVal)!=SQLITE_BLOB  | 
 |  3340    || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *) | 
 |  3341   ){ | 
 |  3342     char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc); | 
 |  3343     sqlite3_result_error(pContext, zErr, -1); | 
 |  3344     sqlite3_free(zErr); | 
 |  3345     return SQLITE_ERROR; | 
 |  3346   } | 
 |  3347   memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *)); | 
 |  3348   *ppCsr = pRet; | 
 |  3349   return SQLITE_OK; | 
 |  3350 } | 
 |  3351  | 
 |  3352 /* | 
 |  3353 ** Implementation of the snippet() function for FTS3 | 
 |  3354 */ | 
 |  3355 static void fts3SnippetFunc( | 
 |  3356   sqlite3_context *pContext,      /* SQLite function call context */ | 
 |  3357   int nVal,                       /* Size of apVal[] array */ | 
 |  3358   sqlite3_value **apVal           /* Array of arguments */ | 
 |  3359 ){ | 
 |  3360   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */ | 
 |  3361   const char *zStart = "<b>"; | 
 |  3362   const char *zEnd = "</b>"; | 
 |  3363   const char *zEllipsis = "<b>...</b>"; | 
 |  3364   int iCol = -1; | 
 |  3365   int nToken = 15;                /* Default number of tokens in snippet */ | 
 |  3366  | 
 |  3367   /* There must be at least one argument passed to this function (otherwise | 
 |  3368   ** the non-overloaded version would have been called instead of this one). | 
|  4092   */ |  3369   */ | 
|  4093   if( c->pStmt && c->iCursorType!=idxNum ){ |  3370   assert( nVal>=1 ); | 
|  4094     sqlite3_finalize(c->pStmt); |  3371  | 
|  4095     c->pStmt = NULL; |  3372   if( nVal>6 ){ | 
|  4096   } |  3373     sqlite3_result_error(pContext,  | 
|  4097  |  3374         "wrong number of arguments to function snippet()", -1); | 
|  4098   /* Get a fresh statement appropriate to idxNum. */ |  3375     return; | 
|  4099   /* TODO(shess): Add a prepared-statement cache in the vt structure. |  3376   } | 
|  4100   ** The cache must handle multiple open cursors.  Easier to cache the |  3377   if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; | 
|  4101   ** statement variants at the vt to reduce malloc/realloc/free here. |  3378  | 
|  4102   ** Or we could have a StringBuffer variant which allowed stack |  3379   switch( nVal ){ | 
|  4103   ** construction for small values. |  3380     case 6: nToken = sqlite3_value_int(apVal[5]); | 
|  4104   */ |  3381     case 5: iCol = sqlite3_value_int(apVal[4]); | 
|  4105   if( !c->pStmt ){ |  3382     case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); | 
|  4106     StringBuffer sb; |  3383     case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); | 
|  4107     initStringBuffer(&sb); |  3384     case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); | 
|  4108     append(&sb, "SELECT docid, "); |  3385   } | 
|  4109     appendList(&sb, v->nColumn, v->azContentColumn); |  3386   if( !zEllipsis || !zEnd || !zStart ){ | 
|  4110     append(&sb, " FROM %_content"); |  3387     sqlite3_result_error_nomem(pContext); | 
|  4111     if( idxNum!=QUERY_GENERIC ) append(&sb, " WHERE docid = ?"); |  3388   }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ | 
|  4112     rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, |  3389     sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); | 
|  4113                      stringBufferData(&sb)); |  3390   } | 
|  4114     stringBufferDestroy(&sb); |  3391 } | 
|  4115     if( rc!=SQLITE_OK ) return rc; |  3392  | 
|  4116     c->iCursorType = idxNum; |  3393 /* | 
|  4117   }else{ |  3394 ** Implementation of the offsets() function for FTS3 | 
|  4118     sqlite3_reset(c->pStmt); |  3395 */ | 
|  4119     assert( c->iCursorType==idxNum ); |  3396 static void fts3OffsetsFunc( | 
|  4120   } |  3397   sqlite3_context *pContext,      /* SQLite function call context */ | 
|  4121  |  3398   int nVal,                       /* Size of argument array */ | 
|  4122   switch( idxNum ){ |  3399   sqlite3_value **apVal           /* Array of arguments */ | 
|  4123     case QUERY_GENERIC: |  3400 ){ | 
 |  3401   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */ | 
 |  3402  | 
 |  3403   UNUSED_PARAMETER(nVal); | 
 |  3404  | 
 |  3405   assert( nVal==1 ); | 
 |  3406   if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return; | 
 |  3407   assert( pCsr ); | 
 |  3408   if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ | 
 |  3409     sqlite3Fts3Offsets(pContext, pCsr); | 
 |  3410   } | 
 |  3411 } | 
 |  3412  | 
 |  3413 /*  | 
 |  3414 ** Implementation of the special optimize() function for FTS3. This  | 
 |  3415 ** function merges all segments in the database to a single segment. | 
 |  3416 ** Example usage is: | 
 |  3417 ** | 
 |  3418 **   SELECT optimize(t) FROM t LIMIT 1; | 
 |  3419 ** | 
 |  3420 ** where 't' is the name of an FTS3 table. | 
 |  3421 */ | 
 |  3422 static void fts3OptimizeFunc( | 
 |  3423   sqlite3_context *pContext,      /* SQLite function call context */ | 
 |  3424   int nVal,                       /* Size of argument array */ | 
 |  3425   sqlite3_value **apVal           /* Array of arguments */ | 
 |  3426 ){ | 
 |  3427   int rc;                         /* Return code */ | 
 |  3428   Fts3Table *p;                   /* Virtual table handle */ | 
 |  3429   Fts3Cursor *pCursor;            /* Cursor handle passed through apVal[0] */ | 
 |  3430  | 
 |  3431   UNUSED_PARAMETER(nVal); | 
 |  3432  | 
 |  3433   assert( nVal==1 ); | 
 |  3434   if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return; | 
 |  3435   p = (Fts3Table *)pCursor->base.pVtab; | 
 |  3436   assert( p ); | 
 |  3437  | 
 |  3438   rc = sqlite3Fts3Optimize(p); | 
 |  3439  | 
 |  3440   switch( rc ){ | 
 |  3441     case SQLITE_OK: | 
 |  3442       sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); | 
|  4124       break; |  3443       break; | 
|  4125  |  3444     case SQLITE_DONE: | 
|  4126     case QUERY_DOCID: |  3445       sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC); | 
|  4127       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); |  | 
|  4128       if( rc!=SQLITE_OK ) return rc; |  | 
|  4129       break; |  3446       break; | 
|  4130  |  3447     default: | 
|  4131     default:   /* full-text search */ |  3448       sqlite3_result_error_code(pContext, rc); | 
|  4132     { |  | 
|  4133       int iCol = idxNum-QUERY_FULLTEXT; |  | 
|  4134       const char *zQuery = (const char *)sqlite3_value_text(argv[0]); |  | 
|  4135       assert( idxNum<=QUERY_FULLTEXT+v->nColumn); |  | 
|  4136       assert( argc==1 ); |  | 
|  4137       if( c->result.nData!=0 ){ |  | 
|  4138         /* This case happens if the same cursor is used repeatedly. */ |  | 
|  4139         dlrDestroy(&c->reader); |  | 
|  4140         dataBufferReset(&c->result); |  | 
|  4141       }else{ |  | 
|  4142         dataBufferInit(&c->result, 0); |  | 
|  4143       } |  | 
|  4144       rc = fulltextQuery(v, iCol, zQuery, -1, &c->result, &c->pExpr); |  | 
|  4145       if( rc!=SQLITE_OK ) return rc; |  | 
|  4146       if( c->result.nData!=0 ){ |  | 
|  4147         dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); |  | 
|  4148       } |  | 
|  4149       break; |  3449       break; | 
|  4150     } |  3450   } | 
|  4151   } |  3451 } | 
|  4152  |  3452  | 
|  4153   return fulltextNext(pCursor); |  3453 /* | 
|  4154 } |  3454 ** Implementation of the matchinfo() function for FTS3 | 
|  4155  |  3455 */ | 
|  4156 /* This is the xEof method of the virtual table.  The SQLite core |  3456 static void fts3MatchinfoFunc( | 
|  4157 ** calls this routine to find out if it has reached the end of |  3457   sqlite3_context *pContext,      /* SQLite function call context */ | 
|  4158 ** a query's results set. |  3458   int nVal,                       /* Size of argument array */ | 
|  4159 */ |  3459   sqlite3_value **apVal           /* Array of arguments */ | 
|  4160 static int fulltextEof(sqlite3_vtab_cursor *pCursor){ |  3460 ){ | 
|  4161   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  3461   Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */ | 
|  4162   return c->eof; |  3462   assert( nVal==1 || nVal==2 ); | 
|  4163 } |  3463   if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ | 
|  4164  |  3464     const char *zArg = 0; | 
|  4165 /* This is the xColumn method of the virtual table.  The SQLite |  3465     if( nVal>1 ){ | 
|  4166 ** core calls this method during a query when it needs the value |  3466       zArg = (const char *)sqlite3_value_text(apVal[1]); | 
|  4167 ** of a column from the virtual table.  This method needs to use |  3467     } | 
|  4168 ** one of the sqlite3_result_*() routines to store the requested |  3468     sqlite3Fts3Matchinfo(pContext, pCsr, zArg); | 
|  4169 ** value back in the pContext. |  3469   } | 
|  4170 */ |  3470 } | 
|  4171 static int fulltextColumn(sqlite3_vtab_cursor *pCursor, |  | 
|  4172                           sqlite3_context *pContext, int idxCol){ |  | 
|  4173   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  4174   fulltext_vtab *v = cursor_vtab(c); |  | 
|  4175  |  | 
|  4176   if( idxCol<v->nColumn ){ |  | 
|  4177     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); |  | 
|  4178     sqlite3_result_value(pContext, pVal); |  | 
|  4179   }else if( idxCol==v->nColumn ){ |  | 
|  4180     /* The extra column whose name is the same as the table. |  | 
|  4181     ** Return a blob which is a pointer to the cursor |  | 
|  4182     */ |  | 
|  4183     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); |  | 
|  4184   }else if( idxCol==v->nColumn+1 ){ |  | 
|  4185     /* The docid column, which is an alias for rowid. */ |  | 
|  4186     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, 0); |  | 
|  4187     sqlite3_result_value(pContext, pVal); |  | 
|  4188   } |  | 
|  4189   return SQLITE_OK; |  | 
|  4190 } |  | 
|  4191  |  | 
|  4192 /* This is the xRowid method.  The SQLite core calls this routine to |  | 
|  4193 ** retrieve the rowid for the current row of the result set.  fts3 |  | 
|  4194 ** exposes %_content.docid as the rowid for the virtual table.  The |  | 
|  4195 ** rowid should be written to *pRowid. |  | 
|  4196 */ |  | 
|  4197 static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |  | 
|  4198   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  4199  |  | 
|  4200   *pRowid = sqlite3_column_int64(c->pStmt, 0); |  | 
|  4201   return SQLITE_OK; |  | 
|  4202 } |  | 
|  4203  |  | 
|  4204 /* Add all terms in [zText] to pendingTerms table.  If [iColumn] > 0, |  | 
|  4205 ** we also store positions and offsets in the hash table using that |  | 
|  4206 ** column number. |  | 
|  4207 */ |  | 
|  4208 static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid, |  | 
|  4209                       const char *zText, int iColumn){ |  | 
|  4210   sqlite3_tokenizer *pTokenizer = v->pTokenizer; |  | 
|  4211   sqlite3_tokenizer_cursor *pCursor; |  | 
|  4212   const char *pToken; |  | 
|  4213   int nTokenBytes; |  | 
|  4214   int iStartOffset, iEndOffset, iPosition; |  | 
|  4215   int rc; |  | 
|  4216  |  | 
|  4217   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); |  | 
|  4218   if( rc!=SQLITE_OK ) return rc; |  | 
|  4219  |  | 
|  4220   pCursor->pTokenizer = pTokenizer; |  | 
|  4221   while( SQLITE_OK==(rc=pTokenizer->pModule->xNext(pCursor, |  | 
|  4222                                                    &pToken, &nTokenBytes, |  | 
|  4223                                                    &iStartOffset, &iEndOffset, |  | 
|  4224                                                    &iPosition)) ){ |  | 
|  4225     DLCollector *p; |  | 
|  4226     int nData;                   /* Size of doclist before our update. */ |  | 
|  4227  |  | 
|  4228     /* Positions can't be negative; we use -1 as a terminator |  | 
|  4229      * internally.  Token can't be NULL or empty. */ |  | 
|  4230     if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){ |  | 
|  4231       rc = SQLITE_ERROR; |  | 
|  4232       break; |  | 
|  4233     } |  | 
|  4234  |  | 
|  4235     p = fts3HashFind(&v->pendingTerms, pToken, nTokenBytes); |  | 
|  4236     if( p==NULL ){ |  | 
|  4237       nData = 0; |  | 
|  4238       p = dlcNew(iDocid, DL_DEFAULT); |  | 
|  4239       fts3HashInsert(&v->pendingTerms, pToken, nTokenBytes, p); |  | 
|  4240  |  | 
|  4241       /* Overhead for our hash table entry, the key, and the value. */ |  | 
|  4242       v->nPendingData += sizeof(struct fts3HashElem)+sizeof(*p)+nTokenBytes; |  | 
|  4243     }else{ |  | 
|  4244       nData = p->b.nData; |  | 
|  4245       if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid); |  | 
|  4246     } |  | 
|  4247     if( iColumn>=0 ){ |  | 
|  4248       dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); |  | 
|  4249     } |  | 
|  4250  |  | 
|  4251     /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */ |  | 
|  4252     v->nPendingData += p->b.nData-nData; |  | 
|  4253   } |  | 
|  4254  |  | 
|  4255   /* TODO(shess) Check return?  Should this be able to cause errors at |  | 
|  4256   ** this point?  Actually, same question about sqlite3_finalize(), |  | 
|  4257   ** though one could argue that failure there means that the data is |  | 
|  4258   ** not durable.  *ponder* |  | 
|  4259   */ |  | 
|  4260   pTokenizer->pModule->xClose(pCursor); |  | 
|  4261   if( SQLITE_DONE == rc ) return SQLITE_OK; |  | 
|  4262   return rc; |  | 
|  4263 } |  | 
|  4264  |  | 
|  4265 /* Add doclists for all terms in [pValues] to pendingTerms table. */ |  | 
|  4266 static int insertTerms(fulltext_vtab *v, sqlite_int64 iDocid, |  | 
|  4267                        sqlite3_value **pValues){ |  | 
|  4268   int i; |  | 
|  4269   for(i = 0; i < v->nColumn ; ++i){ |  | 
|  4270     char *zText = (char*)sqlite3_value_text(pValues[i]); |  | 
|  4271     int rc = buildTerms(v, iDocid, zText, i); |  | 
|  4272     if( rc!=SQLITE_OK ) return rc; |  | 
|  4273   } |  | 
|  4274   return SQLITE_OK; |  | 
|  4275 } |  | 
|  4276  |  | 
|  4277 /* Add empty doclists for all terms in the given row's content to |  | 
|  4278 ** pendingTerms. |  | 
|  4279 */ |  | 
|  4280 static int deleteTerms(fulltext_vtab *v, sqlite_int64 iDocid){ |  | 
|  4281   const char **pValues; |  | 
|  4282   int i, rc; |  | 
|  4283  |  | 
|  4284   /* TODO(shess) Should we allow such tables at all? */ |  | 
|  4285   if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; |  | 
|  4286  |  | 
|  4287   rc = content_select(v, iDocid, &pValues); |  | 
|  4288   if( rc!=SQLITE_OK ) return rc; |  | 
|  4289  |  | 
|  4290   for(i = 0 ; i < v->nColumn; ++i) { |  | 
|  4291     rc = buildTerms(v, iDocid, pValues[i], -1); |  | 
|  4292     if( rc!=SQLITE_OK ) break; |  | 
|  4293   } |  | 
|  4294  |  | 
|  4295   freeStringArray(v->nColumn, pValues); |  | 
|  4296   return SQLITE_OK; |  | 
|  4297 } |  | 
|  4298  |  | 
|  4299 /* TODO(shess) Refactor the code to remove this forward decl. */ |  | 
|  4300 static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid); |  | 
|  4301  |  | 
|  4302 /* Insert a row into the %_content table; set *piDocid to be the ID of the |  | 
|  4303 ** new row.  Add doclists for terms to pendingTerms. |  | 
|  4304 */ |  | 
|  4305 static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestDocid, |  | 
|  4306                         sqlite3_value **pValues, sqlite_int64 *piDocid){ |  | 
|  4307   int rc; |  | 
|  4308  |  | 
|  4309   rc = content_insert(v, pRequestDocid, pValues);  /* execute an SQL INSERT */ |  | 
|  4310   if( rc!=SQLITE_OK ) return rc; |  | 
|  4311  |  | 
|  4312   /* docid column is an alias for rowid. */ |  | 
|  4313   *piDocid = sqlite3_last_insert_rowid(v->db); |  | 
|  4314   rc = initPendingTerms(v, *piDocid); |  | 
|  4315   if( rc!=SQLITE_OK ) return rc; |  | 
|  4316  |  | 
|  4317   return insertTerms(v, *piDocid, pValues); |  | 
|  4318 } |  | 
|  4319  |  | 
|  4320 /* Delete a row from the %_content table; add empty doclists for terms |  | 
|  4321 ** to pendingTerms. |  | 
|  4322 */ |  | 
|  4323 static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ |  | 
|  4324   int rc = initPendingTerms(v, iRow); |  | 
|  4325   if( rc!=SQLITE_OK ) return rc; |  | 
|  4326  |  | 
|  4327   rc = deleteTerms(v, iRow); |  | 
|  4328   if( rc!=SQLITE_OK ) return rc; |  | 
|  4329  |  | 
|  4330   return content_delete(v, iRow);  /* execute an SQL DELETE */ |  | 
|  4331 } |  | 
|  4332  |  | 
|  4333 /* Update a row in the %_content table; add delete doclists to |  | 
|  4334 ** pendingTerms for old terms not in the new data, add insert doclists |  | 
|  4335 ** to pendingTerms for terms in the new data. |  | 
|  4336 */ |  | 
|  4337 static int index_update(fulltext_vtab *v, sqlite_int64 iRow, |  | 
|  4338                         sqlite3_value **pValues){ |  | 
|  4339   int rc = initPendingTerms(v, iRow); |  | 
|  4340   if( rc!=SQLITE_OK ) return rc; |  | 
|  4341  |  | 
|  4342   /* Generate an empty doclist for each term that previously appeared in this |  | 
|  4343    * row. */ |  | 
|  4344   rc = deleteTerms(v, iRow); |  | 
|  4345   if( rc!=SQLITE_OK ) return rc; |  | 
|  4346  |  | 
|  4347   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */ |  | 
|  4348   if( rc!=SQLITE_OK ) return rc; |  | 
|  4349  |  | 
|  4350   /* Now add positions for terms which appear in the updated row. */ |  | 
|  4351   return insertTerms(v, iRow, pValues); |  | 
|  4352 } |  | 
|  4353  |  | 
|  4354 /*******************************************************************/ |  | 
|  4355 /* InteriorWriter is used to collect terms and block references into |  | 
|  4356 ** interior nodes in %_segments.  See commentary at top of file for |  | 
|  4357 ** format. |  | 
|  4358 */ |  | 
|  4359  |  | 
|  4360 /* How large interior nodes can grow. */ |  | 
|  4361 #define INTERIOR_MAX 2048 |  | 
|  4362  |  | 
|  4363 /* Minimum number of terms per interior node (except the root). This |  | 
|  4364 ** prevents large terms from making the tree too skinny - must be >0 |  | 
|  4365 ** so that the tree always makes progress.  Note that the min tree |  | 
|  4366 ** fanout will be INTERIOR_MIN_TERMS+1. |  | 
|  4367 */ |  | 
|  4368 #define INTERIOR_MIN_TERMS 7 |  | 
|  4369 #if INTERIOR_MIN_TERMS<1 |  | 
|  4370 # error INTERIOR_MIN_TERMS must be greater than 0. |  | 
|  4371 #endif |  | 
|  4372  |  | 
|  4373 /* ROOT_MAX controls how much data is stored inline in the segment |  | 
|  4374 ** directory. |  | 
|  4375 */ |  | 
|  4376 /* TODO(shess) Push ROOT_MAX down to whoever is writing things.  It's |  | 
|  4377 ** only here so that interiorWriterRootInfo() and leafWriterRootInfo() |  | 
|  4378 ** can both see it, but if the caller passed it in, we wouldn't even |  | 
|  4379 ** need a define. |  | 
|  4380 */ |  | 
|  4381 #define ROOT_MAX 1024 |  | 
|  4382 #if ROOT_MAX<VARINT_MAX*2 |  | 
|  4383 # error ROOT_MAX must have enough space for a header. |  | 
|  4384 #endif |  | 
|  4385  |  | 
|  4386 /* InteriorBlock stores a linked-list of interior blocks while a lower |  | 
|  4387 ** layer is being constructed. |  | 
|  4388 */ |  | 
|  4389 typedef struct InteriorBlock { |  | 
|  4390   DataBuffer term;           /* Leftmost term in block's subtree. */ |  | 
|  4391   DataBuffer data;           /* Accumulated data for the block. */ |  | 
|  4392   struct InteriorBlock *next; |  | 
|  4393 } InteriorBlock; |  | 
|  4394  |  | 
|  4395 static InteriorBlock *interiorBlockNew(int iHeight, sqlite_int64 iChildBlock, |  | 
|  4396                                        const char *pTerm, int nTerm){ |  | 
|  4397   InteriorBlock *block = sqlite3_malloc(sizeof(InteriorBlock)); |  | 
|  4398   char c[VARINT_MAX+VARINT_MAX]; |  | 
|  4399   int n; |  | 
|  4400  |  | 
|  4401   if( block ){ |  | 
|  4402     memset(block, 0, sizeof(*block)); |  | 
|  4403     dataBufferInit(&block->term, 0); |  | 
|  4404     dataBufferReplace(&block->term, pTerm, nTerm); |  | 
|  4405  |  | 
|  4406     n = fts3PutVarint(c, iHeight); |  | 
|  4407     n += fts3PutVarint(c+n, iChildBlock); |  | 
|  4408     dataBufferInit(&block->data, INTERIOR_MAX); |  | 
|  4409     dataBufferReplace(&block->data, c, n); |  | 
|  4410   } |  | 
|  4411   return block; |  | 
|  4412 } |  | 
|  4413  |  | 
|  4414 #ifndef NDEBUG |  | 
|  4415 /* Verify that the data is readable as an interior node. */ |  | 
|  4416 static void interiorBlockValidate(InteriorBlock *pBlock){ |  | 
|  4417   const char *pData = pBlock->data.pData; |  | 
|  4418   int nData = pBlock->data.nData; |  | 
|  4419   int n, iDummy; |  | 
|  4420   sqlite_int64 iBlockid; |  | 
|  4421  |  | 
|  4422   assert( nData>0 ); |  | 
|  4423   assert( pData!=0 ); |  | 
|  4424   assert( pData+nData>pData ); |  | 
|  4425  |  | 
|  4426   /* Must lead with height of node as a varint(n), n>0 */ |  | 
|  4427   n = fts3GetVarint32(pData, &iDummy); |  | 
|  4428   assert( n>0 ); |  | 
|  4429   assert( iDummy>0 ); |  | 
|  4430   assert( n<nData ); |  | 
|  4431   pData += n; |  | 
|  4432   nData -= n; |  | 
|  4433  |  | 
|  4434   /* Must contain iBlockid. */ |  | 
|  4435   n = fts3GetVarint(pData, &iBlockid); |  | 
|  4436   assert( n>0 ); |  | 
|  4437   assert( n<=nData ); |  | 
|  4438   pData += n; |  | 
|  4439   nData -= n; |  | 
|  4440  |  | 
|  4441   /* Zero or more terms of positive length */ |  | 
|  4442   if( nData!=0 ){ |  | 
|  4443     /* First term is not delta-encoded. */ |  | 
|  4444     n = fts3GetVarint32(pData, &iDummy); |  | 
|  4445     assert( n>0 ); |  | 
|  4446     assert( iDummy>0 ); |  | 
|  4447     assert( n+iDummy>0); |  | 
|  4448     assert( n+iDummy<=nData ); |  | 
|  4449     pData += n+iDummy; |  | 
|  4450     nData -= n+iDummy; |  | 
|  4451  |  | 
|  4452     /* Following terms delta-encoded. */ |  | 
|  4453     while( nData!=0 ){ |  | 
|  4454       /* Length of shared prefix. */ |  | 
|  4455       n = fts3GetVarint32(pData, &iDummy); |  | 
|  4456       assert( n>0 ); |  | 
|  4457       assert( iDummy>=0 ); |  | 
|  4458       assert( n<nData ); |  | 
|  4459       pData += n; |  | 
|  4460       nData -= n; |  | 
|  4461  |  | 
|  4462       /* Length and data of distinct suffix. */ |  | 
|  4463       n = fts3GetVarint32(pData, &iDummy); |  | 
|  4464       assert( n>0 ); |  | 
|  4465       assert( iDummy>0 ); |  | 
|  4466       assert( n+iDummy>0); |  | 
|  4467       assert( n+iDummy<=nData ); |  | 
|  4468       pData += n+iDummy; |  | 
|  4469       nData -= n+iDummy; |  | 
|  4470     } |  | 
|  4471   } |  | 
|  4472 } |  | 
|  4473 #define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x) |  | 
|  4474 #else |  | 
|  4475 #define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 ) |  | 
|  4476 #endif |  | 
|  4477  |  | 
|  4478 typedef struct InteriorWriter { |  | 
|  4479   int iHeight;                   /* from 0 at leaves. */ |  | 
|  4480   InteriorBlock *first, *last; |  | 
|  4481   struct InteriorWriter *parentWriter; |  | 
|  4482  |  | 
|  4483   DataBuffer term;               /* Last term written to block "last". */ |  | 
|  4484   sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ |  | 
|  4485 #ifndef NDEBUG |  | 
|  4486   sqlite_int64 iLastChildBlock;  /* for consistency checks. */ |  | 
|  4487 #endif |  | 
|  4488 } InteriorWriter; |  | 
|  4489  |  | 
|  4490 /* Initialize an interior node where pTerm[nTerm] marks the leftmost |  | 
|  4491 ** term in the tree.  iChildBlock is the leftmost child block at the |  | 
|  4492 ** next level down the tree. |  | 
|  4493 */ |  | 
|  4494 static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, |  | 
|  4495                                sqlite_int64 iChildBlock, |  | 
|  4496                                InteriorWriter *pWriter){ |  | 
|  4497   InteriorBlock *block; |  | 
|  4498   assert( iHeight>0 ); |  | 
|  4499   CLEAR(pWriter); |  | 
|  4500  |  | 
|  4501   pWriter->iHeight = iHeight; |  | 
|  4502   pWriter->iOpeningChildBlock = iChildBlock; |  | 
|  4503 #ifndef NDEBUG |  | 
|  4504   pWriter->iLastChildBlock = iChildBlock; |  | 
|  4505 #endif |  | 
|  4506   block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); |  | 
|  4507   pWriter->last = pWriter->first = block; |  | 
|  4508   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); |  | 
|  4509   dataBufferInit(&pWriter->term, 0); |  | 
|  4510 } |  | 
|  4511  |  | 
|  4512 /* Append the child node rooted at iChildBlock to the interior node, |  | 
|  4513 ** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. |  | 
|  4514 */ |  | 
|  4515 static void interiorWriterAppend(InteriorWriter *pWriter, |  | 
|  4516                                  const char *pTerm, int nTerm, |  | 
|  4517                                  sqlite_int64 iChildBlock){ |  | 
|  4518   char c[VARINT_MAX+VARINT_MAX]; |  | 
|  4519   int n, nPrefix = 0; |  | 
|  4520  |  | 
|  4521   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); |  | 
|  4522  |  | 
|  4523   /* The first term written into an interior node is actually |  | 
|  4524   ** associated with the second child added (the first child was added |  | 
|  4525   ** in interiorWriterInit, or in the if clause at the bottom of this |  | 
|  4526   ** function).  That term gets encoded straight up, with nPrefix left |  | 
|  4527   ** at 0. |  | 
|  4528   */ |  | 
|  4529   if( pWriter->term.nData==0 ){ |  | 
|  4530     n = fts3PutVarint(c, nTerm); |  | 
|  4531   }else{ |  | 
|  4532     while( nPrefix<pWriter->term.nData && |  | 
|  4533            pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ |  | 
|  4534       nPrefix++; |  | 
|  4535     } |  | 
|  4536  |  | 
|  4537     n = fts3PutVarint(c, nPrefix); |  | 
|  4538     n += fts3PutVarint(c+n, nTerm-nPrefix); |  | 
|  4539   } |  | 
|  4540  |  | 
|  4541 #ifndef NDEBUG |  | 
|  4542   pWriter->iLastChildBlock++; |  | 
|  4543 #endif |  | 
|  4544   assert( pWriter->iLastChildBlock==iChildBlock ); |  | 
|  4545  |  | 
|  4546   /* Overflow to a new block if the new term makes the current block |  | 
|  4547   ** too big, and the current block already has enough terms. |  | 
|  4548   */ |  | 
|  4549   if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX && |  | 
|  4550       iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ |  | 
|  4551     pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, |  | 
|  4552                                            pTerm, nTerm); |  | 
|  4553     pWriter->last = pWriter->last->next; |  | 
|  4554     pWriter->iOpeningChildBlock = iChildBlock; |  | 
|  4555     dataBufferReset(&pWriter->term); |  | 
|  4556   }else{ |  | 
|  4557     dataBufferAppend2(&pWriter->last->data, c, n, |  | 
|  4558                       pTerm+nPrefix, nTerm-nPrefix); |  | 
|  4559     dataBufferReplace(&pWriter->term, pTerm, nTerm); |  | 
|  4560   } |  | 
|  4561   ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); |  | 
|  4562 } |  | 
|  4563  |  | 
|  4564 /* Free the space used by pWriter, including the linked-list of |  | 
|  4565 ** InteriorBlocks, and parentWriter, if present. |  | 
|  4566 */ |  | 
|  4567 static int interiorWriterDestroy(InteriorWriter *pWriter){ |  | 
|  4568   InteriorBlock *block = pWriter->first; |  | 
|  4569  |  | 
|  4570   while( block!=NULL ){ |  | 
|  4571     InteriorBlock *b = block; |  | 
|  4572     block = block->next; |  | 
|  4573     dataBufferDestroy(&b->term); |  | 
|  4574     dataBufferDestroy(&b->data); |  | 
|  4575     sqlite3_free(b); |  | 
|  4576   } |  | 
|  4577   if( pWriter->parentWriter!=NULL ){ |  | 
|  4578     interiorWriterDestroy(pWriter->parentWriter); |  | 
|  4579     sqlite3_free(pWriter->parentWriter); |  | 
|  4580   } |  | 
|  4581   dataBufferDestroy(&pWriter->term); |  | 
|  4582   SCRAMBLE(pWriter); |  | 
|  4583   return SQLITE_OK; |  | 
|  4584 } |  | 
|  4585  |  | 
|  4586 /* If pWriter can fit entirely in ROOT_MAX, return it as the root info |  | 
|  4587 ** directly, leaving *piEndBlockid unchanged.  Otherwise, flush |  | 
|  4588 ** pWriter to %_segments, building a new layer of interior nodes, and |  | 
|  4589 ** recursively ask for their root into. |  | 
|  4590 */ |  | 
|  4591 static int interiorWriterRootInfo(fulltext_vtab *v, InteriorWriter *pWriter, |  | 
|  4592                                   char **ppRootInfo, int *pnRootInfo, |  | 
|  4593                                   sqlite_int64 *piEndBlockid){ |  | 
|  4594   InteriorBlock *block = pWriter->first; |  | 
|  4595   sqlite_int64 iBlockid = 0; |  | 
|  4596   int rc; |  | 
|  4597  |  | 
|  4598   /* If we can fit the segment inline */ |  | 
|  4599   if( block==pWriter->last && block->data.nData<ROOT_MAX ){ |  | 
|  4600     *ppRootInfo = block->data.pData; |  | 
|  4601     *pnRootInfo = block->data.nData; |  | 
|  4602     return SQLITE_OK; |  | 
|  4603   } |  | 
|  4604  |  | 
|  4605   /* Flush the first block to %_segments, and create a new level of |  | 
|  4606   ** interior node. |  | 
|  4607   */ |  | 
|  4608   ASSERT_VALID_INTERIOR_BLOCK(block); |  | 
|  4609   rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); |  | 
|  4610   if( rc!=SQLITE_OK ) return rc; |  | 
|  4611   *piEndBlockid = iBlockid; |  | 
|  4612  |  | 
|  4613   pWriter->parentWriter = sqlite3_malloc(sizeof(*pWriter->parentWriter)); |  | 
|  4614   interiorWriterInit(pWriter->iHeight+1, |  | 
|  4615                      block->term.pData, block->term.nData, |  | 
|  4616                      iBlockid, pWriter->parentWriter); |  | 
|  4617  |  | 
|  4618   /* Flush additional blocks and append to the higher interior |  | 
|  4619   ** node. |  | 
|  4620   */ |  | 
|  4621   for(block=block->next; block!=NULL; block=block->next){ |  | 
|  4622     ASSERT_VALID_INTERIOR_BLOCK(block); |  | 
|  4623     rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); |  | 
|  4624     if( rc!=SQLITE_OK ) return rc; |  | 
|  4625     *piEndBlockid = iBlockid; |  | 
|  4626  |  | 
|  4627     interiorWriterAppend(pWriter->parentWriter, |  | 
|  4628                          block->term.pData, block->term.nData, iBlockid); |  | 
|  4629   } |  | 
|  4630  |  | 
|  4631   /* Parent node gets the chance to be the root. */ |  | 
|  4632   return interiorWriterRootInfo(v, pWriter->parentWriter, |  | 
|  4633                                 ppRootInfo, pnRootInfo, piEndBlockid); |  | 
|  4634 } |  | 
|  4635  |  | 
|  4636 /****************************************************************/ |  | 
|  4637 /* InteriorReader is used to read off the data from an interior node |  | 
|  4638 ** (see comment at top of file for the format). |  | 
|  4639 */ |  | 
|  4640 typedef struct InteriorReader { |  | 
|  4641   const char *pData; |  | 
|  4642   int nData; |  | 
|  4643  |  | 
|  4644   DataBuffer term;          /* previous term, for decoding term delta. */ |  | 
|  4645  |  | 
|  4646   sqlite_int64 iBlockid; |  | 
|  4647 } InteriorReader; |  | 
|  4648  |  | 
|  4649 static void interiorReaderDestroy(InteriorReader *pReader){ |  | 
|  4650   dataBufferDestroy(&pReader->term); |  | 
|  4651   SCRAMBLE(pReader); |  | 
|  4652 } |  | 
|  4653  |  | 
|  4654 static int interiorReaderInit(const char *pData, int nData, |  | 
|  4655                               InteriorReader *pReader){ |  | 
|  4656   int n, nTerm; |  | 
|  4657  |  | 
|  4658   /* These conditions are checked and met by the callers. */ |  | 
|  4659   assert( nData>0 ); |  | 
|  4660   assert( pData[0]!='\0' ); |  | 
|  4661  |  | 
|  4662   CLEAR(pReader); |  | 
|  4663  |  | 
|  4664   /* Decode the base blockid, and set the cursor to the first term. */ |  | 
|  4665   n = fts3GetVarintSafe(pData+1, &pReader->iBlockid, nData-1); |  | 
|  4666   if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|  4667   pReader->pData = pData+1+n; |  | 
|  4668   pReader->nData = nData-(1+n); |  | 
|  4669  |  | 
|  4670   /* A single-child interior node (such as when a leaf node was too |  | 
|  4671   ** large for the segment directory) won't have any terms. |  | 
|  4672   ** Otherwise, decode the first term. |  | 
|  4673   */ |  | 
|  4674   if( pReader->nData==0 ){ |  | 
|  4675     dataBufferInit(&pReader->term, 0); |  | 
|  4676   }else{ |  | 
|  4677     n = fts3GetVarint32Safe(pReader->pData, &nTerm, pReader->nData); |  | 
|  4678     if( !n || nTerm<0 || nTerm>pReader->nData-n) return SQLITE_CORRUPT_BKPT; |  | 
|  4679     dataBufferInit(&pReader->term, nTerm); |  | 
|  4680     dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); |  | 
|  4681     pReader->pData += n+nTerm; |  | 
|  4682     pReader->nData -= n+nTerm; |  | 
|  4683   } |  | 
|  4684   return SQLITE_OK; |  | 
|  4685 } |  | 
|  4686  |  | 
|  4687 static int interiorReaderAtEnd(InteriorReader *pReader){ |  | 
|  4688   return pReader->term.nData<=0; |  | 
|  4689 } |  | 
|  4690  |  | 
|  4691 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ |  | 
|  4692   return pReader->iBlockid; |  | 
|  4693 } |  | 
|  4694  |  | 
|  4695 static int interiorReaderTermBytes(InteriorReader *pReader){ |  | 
|  4696   assert( !interiorReaderAtEnd(pReader) ); |  | 
|  4697   return pReader->term.nData; |  | 
|  4698 } |  | 
|  4699 static const char *interiorReaderTerm(InteriorReader *pReader){ |  | 
|  4700   assert( !interiorReaderAtEnd(pReader) ); |  | 
|  4701   return pReader->term.pData; |  | 
|  4702 } |  | 
|  4703  |  | 
|  4704 /* Step forward to the next term in the node. */ |  | 
|  4705 static int interiorReaderStep(InteriorReader *pReader){ |  | 
|  4706   assert( !interiorReaderAtEnd(pReader) ); |  | 
|  4707  |  | 
|  4708   /* If the last term has been read, signal eof, else construct the |  | 
|  4709   ** next term. |  | 
|  4710   */ |  | 
|  4711   if( pReader->nData==0 ){ |  | 
|  4712     dataBufferReset(&pReader->term); |  | 
|  4713   }else{ |  | 
|  4714     int n, nPrefix, nSuffix; |  | 
|  4715  |  | 
|  4716     n = fts3GetVarint32Safe(pReader->pData, &nPrefix, pReader->nData); |  | 
|  4717     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|  4718     pReader->nData -= n; |  | 
|  4719     pReader->pData += n; |  | 
|  4720     n = fts3GetVarint32Safe(pReader->pData, &nSuffix, pReader->nData); |  | 
|  4721     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|  4722     pReader->nData -= n; |  | 
|  4723     pReader->pData += n; |  | 
|  4724     if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; |  | 
|  4725     if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; |  | 
|  4726  |  | 
|  4727     /* Truncate the current term and append suffix data. */ |  | 
|  4728     pReader->term.nData = nPrefix; |  | 
|  4729     dataBufferAppend(&pReader->term, pReader->pData, nSuffix); |  | 
|  4730  |  | 
|  4731     pReader->pData += nSuffix; |  | 
|  4732     pReader->nData -= nSuffix; |  | 
|  4733   } |  | 
|  4734   pReader->iBlockid++; |  | 
|  4735   return SQLITE_OK; |  | 
|  4736 } |  | 
|  4737  |  | 
|  4738 /* Compare the current term to pTerm[nTerm], returning strcmp-style |  | 
|  4739 ** results.  If isPrefix, equality means equal through nTerm bytes. |  | 
|  4740 */ |  | 
|  4741 static int interiorReaderTermCmp(InteriorReader *pReader, |  | 
|  4742                                  const char *pTerm, int nTerm, int isPrefix){ |  | 
|  4743   const char *pReaderTerm = interiorReaderTerm(pReader); |  | 
|  4744   int nReaderTerm = interiorReaderTermBytes(pReader); |  | 
|  4745   int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; |  | 
|  4746  |  | 
|  4747   if( n==0 ){ |  | 
|  4748     if( nReaderTerm>0 ) return -1; |  | 
|  4749     if( nTerm>0 ) return 1; |  | 
|  4750     return 0; |  | 
|  4751   } |  | 
|  4752  |  | 
|  4753   c = memcmp(pReaderTerm, pTerm, n); |  | 
|  4754   if( c!=0 ) return c; |  | 
|  4755   if( isPrefix && n==nTerm ) return 0; |  | 
|  4756   return nReaderTerm - nTerm; |  | 
|  4757 } |  | 
|  4758  |  | 
|  4759 /****************************************************************/ |  | 
|  4760 /* LeafWriter is used to collect terms and associated doclist data |  | 
|  4761 ** into leaf blocks in %_segments (see top of file for format info). |  | 
|  4762 ** Expected usage is: |  | 
|  4763 ** |  | 
|  4764 ** LeafWriter writer; |  | 
|  4765 ** leafWriterInit(0, 0, &writer); |  | 
|  4766 ** while( sorted_terms_left_to_process ){ |  | 
|  4767 **   // data is doclist data for that term. |  | 
|  4768 **   rc = leafWriterStep(v, &writer, pTerm, nTerm, pData, nData); |  | 
|  4769 **   if( rc!=SQLITE_OK ) goto err; |  | 
|  4770 ** } |  | 
|  4771 ** rc = leafWriterFinalize(v, &writer); |  | 
|  4772 **err: |  | 
|  4773 ** leafWriterDestroy(&writer); |  | 
|  4774 ** return rc; |  | 
|  4775 ** |  | 
|  4776 ** leafWriterStep() may write a collected leaf out to %_segments. |  | 
|  4777 ** leafWriterFinalize() finishes writing any buffered data and stores |  | 
|  4778 ** a root node in %_segdir.  leafWriterDestroy() frees all buffers and |  | 
|  4779 ** InteriorWriters allocated as part of writing this segment. |  | 
|  4780 ** |  | 
|  4781 ** TODO(shess) Document leafWriterStepMerge(). |  | 
|  4782 */ |  | 
|  4783  |  | 
|  4784 /* Put terms with data this big in their own block. */ |  | 
|  4785 #define STANDALONE_MIN 1024 |  | 
|  4786  |  | 
|  4787 /* Keep leaf blocks below this size. */ |  | 
|  4788 #define LEAF_MAX 2048 |  | 
|  4789  |  | 
|  4790 typedef struct LeafWriter { |  | 
|  4791   int iLevel; |  | 
|  4792   int idx; |  | 
|  4793   sqlite_int64 iStartBlockid;     /* needed to create the root info */ |  | 
|  4794   sqlite_int64 iEndBlockid;       /* when we're done writing. */ |  | 
|  4795  |  | 
|  4796   DataBuffer term;                /* previous encoded term */ |  | 
|  4797   DataBuffer data;                /* encoding buffer */ |  | 
|  4798  |  | 
|  4799   /* bytes of first term in the current node which distinguishes that |  | 
|  4800   ** term from the last term of the previous node. |  | 
|  4801   */ |  | 
|  4802   int nTermDistinct; |  | 
|  4803  |  | 
|  4804   InteriorWriter parentWriter;    /* if we overflow */ |  | 
|  4805   int has_parent; |  | 
|  4806 } LeafWriter; |  | 
|  4807  |  | 
|  4808 static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){ |  | 
|  4809   CLEAR(pWriter); |  | 
|  4810   pWriter->iLevel = iLevel; |  | 
|  4811   pWriter->idx = idx; |  | 
|  4812  |  | 
|  4813   dataBufferInit(&pWriter->term, 32); |  | 
|  4814  |  | 
|  4815   /* Start out with a reasonably sized block, though it can grow. */ |  | 
|  4816   dataBufferInit(&pWriter->data, LEAF_MAX); |  | 
|  4817 } |  | 
|  4818  |  | 
|  4819 #ifndef NDEBUG |  | 
|  4820 /* Verify that the data is readable as a leaf node. */ |  | 
|  4821 static void leafNodeValidate(const char *pData, int nData){ |  | 
|  4822   int n, iDummy; |  | 
|  4823  |  | 
|  4824   if( nData==0 ) return; |  | 
|  4825   assert( nData>0 ); |  | 
|  4826   assert( pData!=0 ); |  | 
|  4827   assert( pData+nData>pData ); |  | 
|  4828  |  | 
|  4829   /* Must lead with a varint(0) */ |  | 
|  4830   n = fts3GetVarint32(pData, &iDummy); |  | 
|  4831   assert( iDummy==0 ); |  | 
|  4832   assert( n>0 ); |  | 
|  4833   assert( n<nData ); |  | 
|  4834   pData += n; |  | 
|  4835   nData -= n; |  | 
|  4836  |  | 
|  4837   /* Leading term length and data must fit in buffer. */ |  | 
|  4838   n = fts3GetVarint32(pData, &iDummy); |  | 
|  4839   assert( n>0 ); |  | 
|  4840   assert( iDummy>0 ); |  | 
|  4841   assert( n+iDummy>0 ); |  | 
|  4842   assert( n+iDummy<nData ); |  | 
|  4843   pData += n+iDummy; |  | 
|  4844   nData -= n+iDummy; |  | 
|  4845  |  | 
|  4846   /* Leading term's doclist length and data must fit. */ |  | 
|  4847   n = fts3GetVarint32(pData, &iDummy); |  | 
|  4848   assert( n>0 ); |  | 
|  4849   assert( iDummy>0 ); |  | 
|  4850   assert( n+iDummy>0 ); |  | 
|  4851   assert( n+iDummy<=nData ); |  | 
|  4852   ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); |  | 
|  4853   pData += n+iDummy; |  | 
|  4854   nData -= n+iDummy; |  | 
|  4855  |  | 
|  4856   /* Verify that trailing terms and doclists also are readable. */ |  | 
|  4857   while( nData!=0 ){ |  | 
|  4858     n = fts3GetVarint32(pData, &iDummy); |  | 
|  4859     assert( n>0 ); |  | 
|  4860     assert( iDummy>=0 ); |  | 
|  4861     assert( n<nData ); |  | 
|  4862     pData += n; |  | 
|  4863     nData -= n; |  | 
|  4864     n = fts3GetVarint32(pData, &iDummy); |  | 
|  4865     assert( n>0 ); |  | 
|  4866     assert( iDummy>0 ); |  | 
|  4867     assert( n+iDummy>0 ); |  | 
|  4868     assert( n+iDummy<nData ); |  | 
|  4869     pData += n+iDummy; |  | 
|  4870     nData -= n+iDummy; |  | 
|  4871  |  | 
|  4872     n = fts3GetVarint32(pData, &iDummy); |  | 
|  4873     assert( n>0 ); |  | 
|  4874     assert( iDummy>0 ); |  | 
|  4875     assert( n+iDummy>0 ); |  | 
|  4876     assert( n+iDummy<=nData ); |  | 
|  4877     ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); |  | 
|  4878     pData += n+iDummy; |  | 
|  4879     nData -= n+iDummy; |  | 
|  4880   } |  | 
|  4881 } |  | 
|  4882 #define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n) |  | 
|  4883 #else |  | 
|  4884 #define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 ) |  | 
|  4885 #endif |  | 
|  4886  |  | 
|  4887 /* Flush the current leaf node to %_segments, and adding the resulting |  | 
|  4888 ** blockid and the starting term to the interior node which will |  | 
|  4889 ** contain it. |  | 
|  4890 */ |  | 
|  4891 static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter, |  | 
|  4892                                    int iData, int nData){ |  | 
|  4893   sqlite_int64 iBlockid = 0; |  | 
|  4894   const char *pStartingTerm; |  | 
|  4895   int nStartingTerm, rc, n; |  | 
|  4896  |  | 
|  4897   /* Must have the leading varint(0) flag, plus at least some |  | 
|  4898   ** valid-looking data. |  | 
|  4899   */ |  | 
|  4900   assert( nData>2 ); |  | 
|  4901   assert( iData>=0 ); |  | 
|  4902   assert( iData+nData<=pWriter->data.nData ); |  | 
|  4903   ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData); |  | 
|  4904  |  | 
|  4905   rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid); |  | 
|  4906   if( rc!=SQLITE_OK ) return rc; |  | 
|  4907   assert( iBlockid!=0 ); |  | 
|  4908  |  | 
|  4909   /* Reconstruct the first term in the leaf for purposes of building |  | 
|  4910   ** the interior node. |  | 
|  4911   */ |  | 
|  4912   n = fts3GetVarint32(pWriter->data.pData+iData+1, &nStartingTerm); |  | 
|  4913   pStartingTerm = pWriter->data.pData+iData+1+n; |  | 
|  4914   assert( pWriter->data.nData>iData+1+n+nStartingTerm ); |  | 
|  4915   assert( pWriter->nTermDistinct>0 ); |  | 
|  4916   assert( pWriter->nTermDistinct<=nStartingTerm ); |  | 
|  4917   nStartingTerm = pWriter->nTermDistinct; |  | 
|  4918  |  | 
|  4919   if( pWriter->has_parent ){ |  | 
|  4920     interiorWriterAppend(&pWriter->parentWriter, |  | 
|  4921                          pStartingTerm, nStartingTerm, iBlockid); |  | 
|  4922   }else{ |  | 
|  4923     interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid, |  | 
|  4924                        &pWriter->parentWriter); |  | 
|  4925     pWriter->has_parent = 1; |  | 
|  4926   } |  | 
|  4927  |  | 
|  4928   /* Track the span of this segment's leaf nodes. */ |  | 
|  4929   if( pWriter->iEndBlockid==0 ){ |  | 
|  4930     pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid; |  | 
|  4931   }else{ |  | 
|  4932     pWriter->iEndBlockid++; |  | 
|  4933     assert( iBlockid==pWriter->iEndBlockid ); |  | 
|  4934   } |  | 
|  4935  |  | 
|  4936   return SQLITE_OK; |  | 
|  4937 } |  | 
|  4938 static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){ |  | 
|  4939   int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData); |  | 
|  4940   if( rc!=SQLITE_OK ) return rc; |  | 
|  4941  |  | 
|  4942   /* Re-initialize the output buffer. */ |  | 
|  4943   dataBufferReset(&pWriter->data); |  | 
|  4944  |  | 
|  4945   return SQLITE_OK; |  | 
|  4946 } |  | 
|  4947  |  | 
|  4948 /* Fetch the root info for the segment.  If the entire leaf fits |  | 
|  4949 ** within ROOT_MAX, then it will be returned directly, otherwise it |  | 
|  4950 ** will be flushed and the root info will be returned from the |  | 
|  4951 ** interior node.  *piEndBlockid is set to the blockid of the last |  | 
|  4952 ** interior or leaf node written to disk (0 if none are written at |  | 
|  4953 ** all). |  | 
|  4954 */ |  | 
|  4955 static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter, |  | 
|  4956                               char **ppRootInfo, int *pnRootInfo, |  | 
|  4957                               sqlite_int64 *piEndBlockid){ |  | 
|  4958   /* we can fit the segment entirely inline */ |  | 
|  4959   if( !pWriter->has_parent && pWriter->data.nData<ROOT_MAX ){ |  | 
|  4960     *ppRootInfo = pWriter->data.pData; |  | 
|  4961     *pnRootInfo = pWriter->data.nData; |  | 
|  4962     *piEndBlockid = 0; |  | 
|  4963     return SQLITE_OK; |  | 
|  4964   } |  | 
|  4965  |  | 
|  4966   /* Flush remaining leaf data. */ |  | 
|  4967   if( pWriter->data.nData>0 ){ |  | 
|  4968     int rc = leafWriterFlush(v, pWriter); |  | 
|  4969     if( rc!=SQLITE_OK ) return rc; |  | 
|  4970   } |  | 
|  4971  |  | 
|  4972   /* We must have flushed a leaf at some point. */ |  | 
|  4973   assert( pWriter->has_parent ); |  | 
|  4974  |  | 
|  4975   /* Tenatively set the end leaf blockid as the end blockid.  If the |  | 
|  4976   ** interior node can be returned inline, this will be the final |  | 
|  4977   ** blockid, otherwise it will be overwritten by |  | 
|  4978   ** interiorWriterRootInfo(). |  | 
|  4979   */ |  | 
|  4980   *piEndBlockid = pWriter->iEndBlockid; |  | 
|  4981  |  | 
|  4982   return interiorWriterRootInfo(v, &pWriter->parentWriter, |  | 
|  4983                                 ppRootInfo, pnRootInfo, piEndBlockid); |  | 
|  4984 } |  | 
|  4985  |  | 
|  4986 /* Collect the rootInfo data and store it into the segment directory. |  | 
|  4987 ** This has the effect of flushing the segment's leaf data to |  | 
|  4988 ** %_segments, and also flushing any interior nodes to %_segments. |  | 
|  4989 */ |  | 
|  4990 static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){ |  | 
|  4991   sqlite_int64 iEndBlockid; |  | 
|  4992   char *pRootInfo; |  | 
|  4993   int rc, nRootInfo; |  | 
|  4994  |  | 
|  4995   rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid); |  | 
|  4996   if( rc!=SQLITE_OK ) return rc; |  | 
|  4997  |  | 
|  4998   /* Don't bother storing an entirely empty segment. */ |  | 
|  4999   if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK; |  | 
|  5000  |  | 
|  5001   return segdir_set(v, pWriter->iLevel, pWriter->idx, |  | 
|  5002                     pWriter->iStartBlockid, pWriter->iEndBlockid, |  | 
|  5003                     iEndBlockid, pRootInfo, nRootInfo); |  | 
|  5004 } |  | 
|  5005  |  | 
|  5006 static void leafWriterDestroy(LeafWriter *pWriter){ |  | 
|  5007   if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter); |  | 
|  5008   dataBufferDestroy(&pWriter->term); |  | 
|  5009   dataBufferDestroy(&pWriter->data); |  | 
|  5010 } |  | 
|  5011  |  | 
|  5012 /* Encode a term into the leafWriter, delta-encoding as appropriate. |  | 
|  5013 ** Returns the length of the new term which distinguishes it from the |  | 
|  5014 ** previous term, which can be used to set nTermDistinct when a node |  | 
|  5015 ** boundary is crossed. |  | 
|  5016 */ |  | 
|  5017 static int leafWriterEncodeTerm(LeafWriter *pWriter, |  | 
|  5018                                 const char *pTerm, int nTerm){ |  | 
|  5019   char c[VARINT_MAX+VARINT_MAX]; |  | 
|  5020   int n, nPrefix = 0; |  | 
|  5021  |  | 
|  5022   assert( nTerm>0 ); |  | 
|  5023   while( nPrefix<pWriter->term.nData && |  | 
|  5024          pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ |  | 
|  5025     nPrefix++; |  | 
|  5026     /* Failing this implies that the terms weren't in order. */ |  | 
|  5027     assert( nPrefix<nTerm ); |  | 
|  5028   } |  | 
|  5029  |  | 
|  5030   if( pWriter->data.nData==0 ){ |  | 
|  5031     /* Encode the node header and leading term as: |  | 
|  5032     **  varint(0) |  | 
|  5033     **  varint(nTerm) |  | 
|  5034     **  char pTerm[nTerm] |  | 
|  5035     */ |  | 
|  5036     n = fts3PutVarint(c, '\0'); |  | 
|  5037     n += fts3PutVarint(c+n, nTerm); |  | 
|  5038     dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm); |  | 
|  5039   }else{ |  | 
|  5040     /* Delta-encode the term as: |  | 
|  5041     **  varint(nPrefix) |  | 
|  5042     **  varint(nSuffix) |  | 
|  5043     **  char pTermSuffix[nSuffix] |  | 
|  5044     */ |  | 
|  5045     n = fts3PutVarint(c, nPrefix); |  | 
|  5046     n += fts3PutVarint(c+n, nTerm-nPrefix); |  | 
|  5047     dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix); |  | 
|  5048   } |  | 
|  5049   dataBufferReplace(&pWriter->term, pTerm, nTerm); |  | 
|  5050  |  | 
|  5051   return nPrefix+1; |  | 
|  5052 } |  | 
|  5053  |  | 
|  5054 /* Used to avoid a memmove when a large amount of doclist data is in |  | 
|  5055 ** the buffer.  This constructs a node and term header before |  | 
|  5056 ** iDoclistData and flushes the resulting complete node using |  | 
|  5057 ** leafWriterInternalFlush(). |  | 
|  5058 */ |  | 
|  5059 static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter, |  | 
|  5060                                  const char *pTerm, int nTerm, |  | 
|  5061                                  int iDoclistData){ |  | 
|  5062   char c[VARINT_MAX+VARINT_MAX]; |  | 
|  5063   int iData, n = fts3PutVarint(c, 0); |  | 
|  5064   n += fts3PutVarint(c+n, nTerm); |  | 
|  5065  |  | 
|  5066   /* There should always be room for the header.  Even if pTerm shared |  | 
|  5067   ** a substantial prefix with the previous term, the entire prefix |  | 
|  5068   ** could be constructed from earlier data in the doclist, so there |  | 
|  5069   ** should be room. |  | 
|  5070   */ |  | 
|  5071   assert( iDoclistData>=n+nTerm ); |  | 
|  5072  |  | 
|  5073   iData = iDoclistData-(n+nTerm); |  | 
|  5074   memcpy(pWriter->data.pData+iData, c, n); |  | 
|  5075   memcpy(pWriter->data.pData+iData+n, pTerm, nTerm); |  | 
|  5076  |  | 
|  5077   return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData); |  | 
|  5078 } |  | 
|  5079  |  | 
|  5080 /* Push pTerm[nTerm] along with the doclist data to the leaf layer of |  | 
|  5081 ** %_segments. |  | 
|  5082 */ |  | 
|  5083 static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter, |  | 
|  5084                                const char *pTerm, int nTerm, |  | 
|  5085                                DLReader *pReaders, int nReaders){ |  | 
|  5086   char c[VARINT_MAX+VARINT_MAX]; |  | 
|  5087   int iTermData = pWriter->data.nData, iDoclistData; |  | 
|  5088   int i, nData, n, nActualData, nActual, rc, nTermDistinct; |  | 
|  5089  |  | 
|  5090   ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); |  | 
|  5091   nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm); |  | 
|  5092  |  | 
|  5093   /* Remember nTermDistinct if opening a new node. */ |  | 
|  5094   if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct; |  | 
|  5095  |  | 
|  5096   iDoclistData = pWriter->data.nData; |  | 
|  5097  |  | 
|  5098   /* Estimate the length of the merged doclist so we can leave space |  | 
|  5099   ** to encode it. |  | 
|  5100   */ |  | 
|  5101   for(i=0, nData=0; i<nReaders; i++){ |  | 
|  5102     nData += dlrAllDataBytes(&pReaders[i]); |  | 
|  5103   } |  | 
|  5104   n = fts3PutVarint(c, nData); |  | 
|  5105   dataBufferAppend(&pWriter->data, c, n); |  | 
|  5106  |  | 
|  5107   rc = docListMerge(&pWriter->data, pReaders, nReaders); |  | 
|  5108   if( rc!=SQLITE_OK ) return rc; |  | 
|  5109   ASSERT_VALID_DOCLIST(DL_DEFAULT, |  | 
|  5110                        pWriter->data.pData+iDoclistData+n, |  | 
|  5111                        pWriter->data.nData-iDoclistData-n, NULL); |  | 
|  5112  |  | 
|  5113   /* The actual amount of doclist data at this point could be smaller |  | 
|  5114   ** than the length we encoded.  Additionally, the space required to |  | 
|  5115   ** encode this length could be smaller.  For small doclists, this is |  | 
|  5116   ** not a big deal, we can just use memmove() to adjust things. |  | 
|  5117   */ |  | 
|  5118   nActualData = pWriter->data.nData-(iDoclistData+n); |  | 
|  5119   nActual = fts3PutVarint(c, nActualData); |  | 
|  5120   assert( nActualData<=nData ); |  | 
|  5121   assert( nActual<=n ); |  | 
|  5122  |  | 
|  5123   /* If the new doclist is big enough for force a standalone leaf |  | 
|  5124   ** node, we can immediately flush it inline without doing the |  | 
|  5125   ** memmove(). |  | 
|  5126   */ |  | 
|  5127   /* TODO(shess) This test matches leafWriterStep(), which does this |  | 
|  5128   ** test before it knows the cost to varint-encode the term and |  | 
|  5129   ** doclist lengths.  At some point, change to |  | 
|  5130   ** pWriter->data.nData-iTermData>STANDALONE_MIN. |  | 
|  5131   */ |  | 
|  5132   if( nTerm+nActualData>STANDALONE_MIN ){ |  | 
|  5133     /* Push leaf node from before this term. */ |  | 
|  5134     if( iTermData>0 ){ |  | 
|  5135       rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); |  | 
|  5136       if( rc!=SQLITE_OK ) return rc; |  | 
|  5137  |  | 
|  5138       pWriter->nTermDistinct = nTermDistinct; |  | 
|  5139     } |  | 
|  5140  |  | 
|  5141     /* Fix the encoded doclist length. */ |  | 
|  5142     iDoclistData += n - nActual; |  | 
|  5143     memcpy(pWriter->data.pData+iDoclistData, c, nActual); |  | 
|  5144  |  | 
|  5145     /* Push the standalone leaf node. */ |  | 
|  5146     rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData); |  | 
|  5147     if( rc!=SQLITE_OK ) return rc; |  | 
|  5148  |  | 
|  5149     /* Leave the node empty. */ |  | 
|  5150     dataBufferReset(&pWriter->data); |  | 
|  5151  |  | 
|  5152     return rc; |  | 
|  5153   } |  | 
|  5154  |  | 
|  5155   /* At this point, we know that the doclist was small, so do the |  | 
|  5156   ** memmove if indicated. |  | 
|  5157   */ |  | 
|  5158   if( nActual<n ){ |  | 
|  5159     memmove(pWriter->data.pData+iDoclistData+nActual, |  | 
|  5160             pWriter->data.pData+iDoclistData+n, |  | 
|  5161             pWriter->data.nData-(iDoclistData+n)); |  | 
|  5162     pWriter->data.nData -= n-nActual; |  | 
|  5163   } |  | 
|  5164  |  | 
|  5165   /* Replace written length with actual length. */ |  | 
|  5166   memcpy(pWriter->data.pData+iDoclistData, c, nActual); |  | 
|  5167  |  | 
|  5168   /* If the node is too large, break things up. */ |  | 
|  5169   /* TODO(shess) This test matches leafWriterStep(), which does this |  | 
|  5170   ** test before it knows the cost to varint-encode the term and |  | 
|  5171   ** doclist lengths.  At some point, change to |  | 
|  5172   ** pWriter->data.nData>LEAF_MAX. |  | 
|  5173   */ |  | 
|  5174   if( iTermData+nTerm+nActualData>LEAF_MAX ){ |  | 
|  5175     /* Flush out the leading data as a node */ |  | 
|  5176     rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); |  | 
|  5177     if( rc!=SQLITE_OK ) return rc; |  | 
|  5178  |  | 
|  5179     pWriter->nTermDistinct = nTermDistinct; |  | 
|  5180  |  | 
|  5181     /* Rebuild header using the current term */ |  | 
|  5182     n = fts3PutVarint(pWriter->data.pData, 0); |  | 
|  5183     n += fts3PutVarint(pWriter->data.pData+n, nTerm); |  | 
|  5184     memcpy(pWriter->data.pData+n, pTerm, nTerm); |  | 
|  5185     n += nTerm; |  | 
|  5186  |  | 
|  5187     /* There should always be room, because the previous encoding |  | 
|  5188     ** included all data necessary to construct the term. |  | 
|  5189     */ |  | 
|  5190     assert( n<iDoclistData ); |  | 
|  5191     /* So long as STANDALONE_MIN is half or less of LEAF_MAX, the |  | 
|  5192     ** following memcpy() is safe (as opposed to needing a memmove). |  | 
|  5193     */ |  | 
|  5194     assert( 2*STANDALONE_MIN<=LEAF_MAX ); |  | 
|  5195     assert( n+pWriter->data.nData-iDoclistData<iDoclistData ); |  | 
|  5196     memcpy(pWriter->data.pData+n, |  | 
|  5197            pWriter->data.pData+iDoclistData, |  | 
|  5198            pWriter->data.nData-iDoclistData); |  | 
|  5199     pWriter->data.nData -= iDoclistData-n; |  | 
|  5200   } |  | 
|  5201   ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); |  | 
|  5202  |  | 
|  5203   return SQLITE_OK; |  | 
|  5204 } |  | 
|  5205  |  | 
|  5206 /* Push pTerm[nTerm] along with the doclist data to the leaf layer of |  | 
|  5207 ** %_segments. |  | 
|  5208 */ |  | 
|  5209 /* TODO(shess) Revise writeZeroSegment() so that doclists are |  | 
|  5210 ** constructed directly in pWriter->data. |  | 
|  5211 */ |  | 
|  5212 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, |  | 
|  5213                           const char *pTerm, int nTerm, |  | 
|  5214                           const char *pData, int nData){ |  | 
|  5215   int rc; |  | 
|  5216   DLReader reader; |  | 
|  5217  |  | 
|  5218   rc = dlrInit(&reader, DL_DEFAULT, pData, nData); |  | 
|  5219   if( rc!=SQLITE_OK ) return rc; |  | 
|  5220   rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); |  | 
|  5221   dlrDestroy(&reader); |  | 
|  5222  |  | 
|  5223   return rc; |  | 
|  5224 } |  | 
|  5225  |  | 
|  5226  |  | 
|  5227 /****************************************************************/ |  | 
|  5228 /* LeafReader is used to iterate over an individual leaf node. */ |  | 
|  5229 typedef struct LeafReader { |  | 
|  5230   DataBuffer term;          /* copy of current term. */ |  | 
|  5231  |  | 
|  5232   const char *pData;        /* data for current term. */ |  | 
|  5233   int nData; |  | 
|  5234 } LeafReader; |  | 
|  5235  |  | 
|  5236 static void leafReaderDestroy(LeafReader *pReader){ |  | 
|  5237   dataBufferDestroy(&pReader->term); |  | 
|  5238   SCRAMBLE(pReader); |  | 
|  5239 } |  | 
|  5240  |  | 
|  5241 static int leafReaderAtEnd(LeafReader *pReader){ |  | 
|  5242   return pReader->nData<=0; |  | 
|  5243 } |  | 
|  5244  |  | 
|  5245 /* Access the current term. */ |  | 
|  5246 static int leafReaderTermBytes(LeafReader *pReader){ |  | 
|  5247   return pReader->term.nData; |  | 
|  5248 } |  | 
|  5249 static const char *leafReaderTerm(LeafReader *pReader){ |  | 
|  5250   assert( pReader->term.nData>0 ); |  | 
|  5251   return pReader->term.pData; |  | 
|  5252 } |  | 
|  5253  |  | 
|  5254 /* Access the doclist data for the current term. */ |  | 
|  5255 static int leafReaderDataBytes(LeafReader *pReader){ |  | 
|  5256   int nData; |  | 
|  5257   assert( pReader->term.nData>0 ); |  | 
|  5258   fts3GetVarint32(pReader->pData, &nData); |  | 
|  5259   return nData; |  | 
|  5260 } |  | 
|  5261 static const char *leafReaderData(LeafReader *pReader){ |  | 
|  5262   int n, nData; |  | 
|  5263   assert( pReader->term.nData>0 ); |  | 
|  5264   n = fts3GetVarint32Safe(pReader->pData, &nData, pReader->nData); |  | 
|  5265   if( !n || nData>pReader->nData-n ) return NULL; |  | 
|  5266   return pReader->pData+n; |  | 
|  5267 } |  | 
|  5268  |  | 
|  5269 static int leafReaderInit(const char *pData, int nData, |  | 
|  5270                           LeafReader *pReader){ |  | 
|  5271   int nTerm, n; |  | 
|  5272  |  | 
|  5273   /* All callers check this precondition. */ |  | 
|  5274   assert( nData>0 ); |  | 
|  5275   assert( pData[0]=='\0' ); |  | 
|  5276  |  | 
|  5277   CLEAR(pReader); |  | 
|  5278  |  | 
|  5279   /* Read the first term, skipping the header byte. */ |  | 
|  5280   n = fts3GetVarint32Safe(pData+1, &nTerm, nData-1); |  | 
|  5281   if( !n || nTerm<0 || nTerm>nData-1-n ) return SQLITE_CORRUPT_BKPT; |  | 
|  5282   dataBufferInit(&pReader->term, nTerm); |  | 
|  5283   dataBufferReplace(&pReader->term, pData+1+n, nTerm); |  | 
|  5284  |  | 
|  5285   /* Position after the first term. */ |  | 
|  5286   pReader->pData = pData+1+n+nTerm; |  | 
|  5287   pReader->nData = nData-1-n-nTerm; |  | 
|  5288   return SQLITE_OK; |  | 
|  5289 } |  | 
|  5290  |  | 
|  5291 /* Step the reader forward to the next term. */ |  | 
|  5292 static int leafReaderStep(LeafReader *pReader){ |  | 
|  5293   int n, nData, nPrefix, nSuffix; |  | 
|  5294   assert( !leafReaderAtEnd(pReader) ); |  | 
|  5295  |  | 
|  5296   /* Skip previous entry's data block. */ |  | 
|  5297   n = fts3GetVarint32Safe(pReader->pData, &nData, pReader->nData); |  | 
|  5298   if( !n || nData<0 || nData>pReader->nData-n ) return SQLITE_CORRUPT_BKPT; |  | 
|  5299   pReader->pData += n+nData; |  | 
|  5300   pReader->nData -= n+nData; |  | 
|  5301  |  | 
|  5302   if( !leafReaderAtEnd(pReader) ){ |  | 
|  5303     /* Construct the new term using a prefix from the old term plus a |  | 
|  5304     ** suffix from the leaf data. |  | 
|  5305     */ |  | 
|  5306     n = fts3GetVarint32Safe(pReader->pData, &nPrefix, pReader->nData); |  | 
|  5307     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|  5308     pReader->nData -= n; |  | 
|  5309     pReader->pData += n; |  | 
|  5310     n = fts3GetVarint32Safe(pReader->pData, &nSuffix, pReader->nData); |  | 
|  5311     if( !n ) return SQLITE_CORRUPT_BKPT; |  | 
|  5312     pReader->nData -= n; |  | 
|  5313     pReader->pData += n; |  | 
|  5314     if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; |  | 
|  5315     if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; |  | 
|  5316     pReader->term.nData = nPrefix; |  | 
|  5317     dataBufferAppend(&pReader->term, pReader->pData, nSuffix); |  | 
|  5318  |  | 
|  5319     pReader->pData += nSuffix; |  | 
|  5320     pReader->nData -= nSuffix; |  | 
|  5321   } |  | 
|  5322   return SQLITE_OK; |  | 
|  5323 } |  | 
|  5324  |  | 
|  5325 /* strcmp-style comparison of pReader's current term against pTerm. |  | 
|  5326 ** If isPrefix, equality means equal through nTerm bytes. |  | 
|  5327 */ |  | 
|  5328 static int leafReaderTermCmp(LeafReader *pReader, |  | 
|  5329                              const char *pTerm, int nTerm, int isPrefix){ |  | 
|  5330   int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; |  | 
|  5331   if( n==0 ){ |  | 
|  5332     if( pReader->term.nData>0 ) return -1; |  | 
|  5333     if(nTerm>0 ) return 1; |  | 
|  5334     return 0; |  | 
|  5335   } |  | 
|  5336  |  | 
|  5337   c = memcmp(pReader->term.pData, pTerm, n); |  | 
|  5338   if( c!=0 ) return c; |  | 
|  5339   if( isPrefix && n==nTerm ) return 0; |  | 
|  5340   return pReader->term.nData - nTerm; |  | 
|  5341 } |  | 
|  5342  |  | 
|  5343  |  | 
|  5344 /****************************************************************/ |  | 
|  5345 /* LeavesReader wraps LeafReader to allow iterating over the entire |  | 
|  5346 ** leaf layer of the tree. |  | 
|  5347 */ |  | 
|  5348 typedef struct LeavesReader { |  | 
|  5349   int idx;                  /* Index within the segment. */ |  | 
|  5350  |  | 
|  5351   sqlite3_stmt *pStmt;      /* Statement we're streaming leaves from. */ |  | 
|  5352   int eof;                  /* we've seen SQLITE_DONE from pStmt. */ |  | 
|  5353  |  | 
|  5354   LeafReader leafReader;    /* reader for the current leaf. */ |  | 
|  5355   DataBuffer rootData;      /* root data for inline. */ |  | 
|  5356 } LeavesReader; |  | 
|  5357  |  | 
|  5358 /* Access the current term. */ |  | 
|  5359 static int leavesReaderTermBytes(LeavesReader *pReader){ |  | 
|  5360   assert( !pReader->eof ); |  | 
|  5361   return leafReaderTermBytes(&pReader->leafReader); |  | 
|  5362 } |  | 
|  5363 static const char *leavesReaderTerm(LeavesReader *pReader){ |  | 
|  5364   assert( !pReader->eof ); |  | 
|  5365   return leafReaderTerm(&pReader->leafReader); |  | 
|  5366 } |  | 
|  5367  |  | 
|  5368 /* Access the doclist data for the current term. */ |  | 
|  5369 static int leavesReaderDataBytes(LeavesReader *pReader){ |  | 
|  5370   assert( !pReader->eof ); |  | 
|  5371   return leafReaderDataBytes(&pReader->leafReader); |  | 
|  5372 } |  | 
|  5373 static const char *leavesReaderData(LeavesReader *pReader){ |  | 
|  5374   assert( !pReader->eof ); |  | 
|  5375   return leafReaderData(&pReader->leafReader); |  | 
|  5376 } |  | 
|  5377  |  | 
|  5378 static int leavesReaderAtEnd(LeavesReader *pReader){ |  | 
|  5379   return pReader->eof; |  | 
|  5380 } |  | 
|  5381  |  | 
|  5382 /* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus |  | 
|  5383 ** leaving the statement handle open, which locks the table. |  | 
|  5384 */ |  | 
|  5385 /* TODO(shess) This "solution" is not satisfactory.  Really, there |  | 
|  5386 ** should be check-in function for all statement handles which |  | 
|  5387 ** arranges to call sqlite3_reset().  This most likely will require |  | 
|  5388 ** modification to control flow all over the place, though, so for now |  | 
|  5389 ** just punt. |  | 
|  5390 ** |  | 
|  5391 ** Note the the current system assumes that segment merges will run to |  | 
|  5392 ** completion, which is why this particular probably hasn't arisen in |  | 
|  5393 ** this case.  Probably a brittle assumption. |  | 
|  5394 */ |  | 
|  5395 static int leavesReaderReset(LeavesReader *pReader){ |  | 
|  5396   return sqlite3_reset(pReader->pStmt); |  | 
|  5397 } |  | 
|  5398  |  | 
|  5399 static void leavesReaderDestroy(LeavesReader *pReader){ |  | 
|  5400   /* If idx is -1, that means we're using a non-cached statement |  | 
|  5401   ** handle in the optimize() case, so we need to release it. |  | 
|  5402   */ |  | 
|  5403   if( pReader->pStmt!=NULL && pReader->idx==-1 ){ |  | 
|  5404     sqlite3_finalize(pReader->pStmt); |  | 
|  5405   } |  | 
|  5406   leafReaderDestroy(&pReader->leafReader); |  | 
|  5407   dataBufferDestroy(&pReader->rootData); |  | 
|  5408   SCRAMBLE(pReader); |  | 
|  5409 } |  | 
|  5410  |  | 
|  5411 /* Initialize pReader with the given root data (if iStartBlockid==0 |  | 
|  5412 ** the leaf data was entirely contained in the root), or from the |  | 
|  5413 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. |  | 
|  5414 */ |  | 
|  5415 static int leavesReaderInit(fulltext_vtab *v, |  | 
|  5416                             int idx, |  | 
|  5417                             sqlite_int64 iStartBlockid, |  | 
|  5418                             sqlite_int64 iEndBlockid, |  | 
|  5419                             const char *pRootData, int nRootData, |  | 
|  5420                             LeavesReader *pReader){ |  | 
|  5421   CLEAR(pReader); |  | 
|  5422   pReader->idx = idx; |  | 
|  5423  |  | 
|  5424   dataBufferInit(&pReader->rootData, 0); |  | 
|  5425   if( iStartBlockid==0 ){ |  | 
|  5426     int rc; |  | 
|  5427     /* Corrupt if this can't be a leaf node. */ |  | 
|  5428     if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){ |  | 
|  5429       return SQLITE_CORRUPT_BKPT; |  | 
|  5430     } |  | 
|  5431     /* Entire leaf level fit in root data. */ |  | 
|  5432     dataBufferReplace(&pReader->rootData, pRootData, nRootData); |  | 
|  5433     rc = leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, |  | 
|  5434                         &pReader->leafReader); |  | 
|  5435     if( rc!=SQLITE_OK ){ |  | 
|  5436       dataBufferDestroy(&pReader->rootData); |  | 
|  5437       return rc; |  | 
|  5438     } |  | 
|  5439   }else{ |  | 
|  5440     sqlite3_stmt *s; |  | 
|  5441     int rc = sql_get_leaf_statement(v, idx, &s); |  | 
|  5442     if( rc!=SQLITE_OK ) return rc; |  | 
|  5443  |  | 
|  5444     rc = sqlite3_bind_int64(s, 1, iStartBlockid); |  | 
|  5445     if( rc!=SQLITE_OK ) goto err; |  | 
|  5446  |  | 
|  5447     rc = sqlite3_bind_int64(s, 2, iEndBlockid); |  | 
|  5448     if( rc!=SQLITE_OK ) goto err; |  | 
|  5449  |  | 
|  5450     rc = sqlite3_step(s); |  | 
|  5451  |  | 
|  5452     /* Corrupt if interior node referenced missing leaf node. */ |  | 
|  5453     if( rc==SQLITE_DONE ){ |  | 
|  5454       rc = SQLITE_CORRUPT_BKPT; |  | 
|  5455       goto err; |  | 
|  5456     } |  | 
|  5457  |  | 
|  5458     if( rc!=SQLITE_ROW ) goto err; |  | 
|  5459     rc = SQLITE_OK; |  | 
|  5460  |  | 
|  5461     /* Corrupt if leaf data isn't a blob. */ |  | 
|  5462     if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ |  | 
|  5463       rc = SQLITE_CORRUPT_BKPT; |  | 
|  5464     }else{ |  | 
|  5465       const char *pLeafData = sqlite3_column_blob(s, 0); |  | 
|  5466       int nLeafData = sqlite3_column_bytes(s, 0); |  | 
|  5467  |  | 
|  5468       /* Corrupt if this can't be a leaf node. */ |  | 
|  5469       if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ |  | 
|  5470         rc = SQLITE_CORRUPT_BKPT; |  | 
|  5471       }else{ |  | 
|  5472         rc = leafReaderInit(pLeafData, nLeafData, &pReader->leafReader); |  | 
|  5473       } |  | 
|  5474     } |  | 
|  5475  |  | 
|  5476  err: |  | 
|  5477     if( rc!=SQLITE_OK ){ |  | 
|  5478       if( idx==-1 ){ |  | 
|  5479         sqlite3_finalize(s); |  | 
|  5480       }else{ |  | 
|  5481         sqlite3_reset(s); |  | 
|  5482       } |  | 
|  5483       return rc; |  | 
|  5484     } |  | 
|  5485  |  | 
|  5486     pReader->pStmt = s; |  | 
|  5487   } |  | 
|  5488   return SQLITE_OK; |  | 
|  5489 } |  | 
|  5490  |  | 
|  5491 /* Step the current leaf forward to the next term.  If we reach the |  | 
|  5492 ** end of the current leaf, step forward to the next leaf block. |  | 
|  5493 */ |  | 
|  5494 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ |  | 
|  5495   int rc; |  | 
|  5496   assert( !leavesReaderAtEnd(pReader) ); |  | 
|  5497   rc = leafReaderStep(&pReader->leafReader); |  | 
|  5498   if( rc!=SQLITE_OK ) return rc; |  | 
|  5499  |  | 
|  5500   if( leafReaderAtEnd(&pReader->leafReader) ){ |  | 
|  5501     if( pReader->rootData.pData ){ |  | 
|  5502       pReader->eof = 1; |  | 
|  5503       return SQLITE_OK; |  | 
|  5504     } |  | 
|  5505     rc = sqlite3_step(pReader->pStmt); |  | 
|  5506     if( rc!=SQLITE_ROW ){ |  | 
|  5507       pReader->eof = 1; |  | 
|  5508       return rc==SQLITE_DONE ? SQLITE_OK : rc; |  | 
|  5509     } |  | 
|  5510  |  | 
|  5511     /* Corrupt if leaf data isn't a blob. */ |  | 
|  5512     if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){ |  | 
|  5513       return SQLITE_CORRUPT_BKPT; |  | 
|  5514     }else{ |  | 
|  5515       LeafReader tmp; |  | 
|  5516       const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0); |  | 
|  5517       int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0); |  | 
|  5518  |  | 
|  5519       /* Corrupt if this can't be a leaf node. */ |  | 
|  5520       if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ |  | 
|  5521         return SQLITE_CORRUPT_BKPT; |  | 
|  5522       } |  | 
|  5523  |  | 
|  5524       rc = leafReaderInit(pLeafData, nLeafData, &tmp); |  | 
|  5525       if( rc!=SQLITE_OK ) return rc; |  | 
|  5526       leafReaderDestroy(&pReader->leafReader); |  | 
|  5527       pReader->leafReader = tmp; |  | 
|  5528     } |  | 
|  5529   } |  | 
|  5530   return SQLITE_OK; |  | 
|  5531 } |  | 
|  5532  |  | 
|  5533 /* Order LeavesReaders by their term, ignoring idx.  Readers at eof |  | 
|  5534 ** always sort to the end. |  | 
|  5535 */ |  | 
|  5536 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ |  | 
|  5537   if( leavesReaderAtEnd(lr1) ){ |  | 
|  5538     if( leavesReaderAtEnd(lr2) ) return 0; |  | 
|  5539     return 1; |  | 
|  5540   } |  | 
|  5541   if( leavesReaderAtEnd(lr2) ) return -1; |  | 
|  5542  |  | 
|  5543   return leafReaderTermCmp(&lr1->leafReader, |  | 
|  5544                            leavesReaderTerm(lr2), leavesReaderTermBytes(lr2), |  | 
|  5545                            0); |  | 
|  5546 } |  | 
|  5547  |  | 
|  5548 /* Similar to leavesReaderTermCmp(), with additional ordering by idx |  | 
|  5549 ** so that older segments sort before newer segments. |  | 
|  5550 */ |  | 
|  5551 static int leavesReaderCmp(LeavesReader *lr1, LeavesReader *lr2){ |  | 
|  5552   int c = leavesReaderTermCmp(lr1, lr2); |  | 
|  5553   if( c!=0 ) return c; |  | 
|  5554   return lr1->idx-lr2->idx; |  | 
|  5555 } |  | 
|  5556  |  | 
|  5557 /* Assume that pLr[1]..pLr[nLr] are sorted.  Bubble pLr[0] into its |  | 
|  5558 ** sorted position. |  | 
|  5559 */ |  | 
|  5560 static void leavesReaderReorder(LeavesReader *pLr, int nLr){ |  | 
|  5561   while( nLr>1 && leavesReaderCmp(pLr, pLr+1)>0 ){ |  | 
|  5562     LeavesReader tmp = pLr[0]; |  | 
|  5563     pLr[0] = pLr[1]; |  | 
|  5564     pLr[1] = tmp; |  | 
|  5565     nLr--; |  | 
|  5566     pLr++; |  | 
|  5567   } |  | 
|  5568 } |  | 
|  5569  |  | 
|  5570 /* Initializes pReaders with the segments from level iLevel, returning |  | 
|  5571 ** the number of segments in *piReaders.  Leaves pReaders in sorted |  | 
|  5572 ** order. |  | 
|  5573 */ |  | 
|  5574 static int leavesReadersInit(fulltext_vtab *v, int iLevel, |  | 
|  5575                              LeavesReader *pReaders, int *piReaders){ |  | 
|  5576   sqlite3_stmt *s; |  | 
|  5577   int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s); |  | 
|  5578   if( rc!=SQLITE_OK ) return rc; |  | 
|  5579  |  | 
|  5580   rc = sqlite3_bind_int(s, 1, iLevel); |  | 
|  5581   if( rc!=SQLITE_OK ) return rc; |  | 
|  5582  |  | 
|  5583   i = 0; |  | 
|  5584   while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |  | 
|  5585     sqlite_int64 iStart = sqlite3_column_int64(s, 0); |  | 
|  5586     sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |  | 
|  5587     const char *pRootData = sqlite3_column_blob(s, 2); |  | 
|  5588     int nRootData = sqlite3_column_bytes(s, 2); |  | 
|  5589     sqlite_int64 iIndex = sqlite3_column_int64(s, 3); |  | 
|  5590  |  | 
|  5591     /* Corrupt if we get back different types than we stored. */ |  | 
|  5592     /* Also corrupt if the index is not sequential starting at 0. */ |  | 
|  5593     if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || |  | 
|  5594         sqlite3_column_type(s, 1)!=SQLITE_INTEGER || |  | 
|  5595         sqlite3_column_type(s, 2)!=SQLITE_BLOB || |  | 
|  5596         i!=iIndex || |  | 
|  5597         i>=MERGE_COUNT ){ |  | 
|  5598       rc = SQLITE_CORRUPT_BKPT; |  | 
|  5599       break; |  | 
|  5600     } |  | 
|  5601  |  | 
|  5602     rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, |  | 
|  5603                           &pReaders[i]); |  | 
|  5604     if( rc!=SQLITE_OK ) break; |  | 
|  5605  |  | 
|  5606     i++; |  | 
|  5607   } |  | 
|  5608   if( rc!=SQLITE_DONE ){ |  | 
|  5609     while( i-->0 ){ |  | 
|  5610       leavesReaderDestroy(&pReaders[i]); |  | 
|  5611     } |  | 
|  5612     sqlite3_reset(s);  /* So we don't leave a lock. */ |  | 
|  5613     return rc; |  | 
|  5614   } |  | 
|  5615  |  | 
|  5616   *piReaders = i; |  | 
|  5617  |  | 
|  5618   /* Leave our results sorted by term, then age. */ |  | 
|  5619   while( i-- ){ |  | 
|  5620     leavesReaderReorder(pReaders+i, *piReaders-i); |  | 
|  5621   } |  | 
|  5622   return SQLITE_OK; |  | 
|  5623 } |  | 
|  5624  |  | 
|  5625 /* Merge doclists from pReaders[nReaders] into a single doclist, which |  | 
|  5626 ** is written to pWriter.  Assumes pReaders is ordered oldest to |  | 
|  5627 ** newest. |  | 
|  5628 */ |  | 
|  5629 /* TODO(shess) Consider putting this inline in segmentMerge(). */ |  | 
|  5630 static int leavesReadersMerge(fulltext_vtab *v, |  | 
|  5631                               LeavesReader *pReaders, int nReaders, |  | 
|  5632                               LeafWriter *pWriter){ |  | 
|  5633   DLReader dlReaders[MERGE_COUNT]; |  | 
|  5634   const char *pTerm = leavesReaderTerm(pReaders); |  | 
|  5635   int i, nTerm = leavesReaderTermBytes(pReaders); |  | 
|  5636   int rc; |  | 
|  5637  |  | 
|  5638   assert( nReaders<=MERGE_COUNT ); |  | 
|  5639  |  | 
|  5640   for(i=0; i<nReaders; i++){ |  | 
|  5641     const char *pData = leavesReaderData(pReaders+i); |  | 
|  5642     if( pData==NULL ){ |  | 
|  5643       rc = SQLITE_CORRUPT_BKPT; |  | 
|  5644       break; |  | 
|  5645     } |  | 
|  5646     rc = dlrInit(&dlReaders[i], DL_DEFAULT, |  | 
|  5647                  pData, |  | 
|  5648                  leavesReaderDataBytes(pReaders+i)); |  | 
|  5649     if( rc!=SQLITE_OK ) break; |  | 
|  5650   } |  | 
|  5651   if( rc!=SQLITE_OK ){ |  | 
|  5652     while( i-->0 ){ |  | 
|  5653       dlrDestroy(&dlReaders[i]); |  | 
|  5654     } |  | 
|  5655     return rc; |  | 
|  5656   } |  | 
|  5657  |  | 
|  5658   return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); |  | 
|  5659 } |  | 
|  5660  |  | 
|  5661 /* Forward ref due to mutual recursion with segdirNextIndex(). */ |  | 
|  5662 static int segmentMerge(fulltext_vtab *v, int iLevel); |  | 
|  5663  |  | 
|  5664 /* Put the next available index at iLevel into *pidx.  If iLevel |  | 
|  5665 ** already has MERGE_COUNT segments, they are merged to a higher |  | 
|  5666 ** level to make room. |  | 
|  5667 */ |  | 
|  5668 static int segdirNextIndex(fulltext_vtab *v, int iLevel, int *pidx){ |  | 
|  5669   int rc = segdir_max_index(v, iLevel, pidx); |  | 
|  5670   if( rc==SQLITE_DONE ){              /* No segments at iLevel. */ |  | 
|  5671     *pidx = 0; |  | 
|  5672   }else if( rc==SQLITE_ROW ){ |  | 
|  5673     if( *pidx==(MERGE_COUNT-1) ){ |  | 
|  5674       rc = segmentMerge(v, iLevel); |  | 
|  5675       if( rc!=SQLITE_OK ) return rc; |  | 
|  5676       *pidx = 0; |  | 
|  5677     }else{ |  | 
|  5678       (*pidx)++; |  | 
|  5679     } |  | 
|  5680   }else{ |  | 
|  5681     return rc; |  | 
|  5682   } |  | 
|  5683   return SQLITE_OK; |  | 
|  5684 } |  | 
|  5685  |  | 
|  5686 /* Merge MERGE_COUNT segments at iLevel into a new segment at |  | 
|  5687 ** iLevel+1.  If iLevel+1 is already full of segments, those will be |  | 
|  5688 ** merged to make room. |  | 
|  5689 */ |  | 
|  5690 static int segmentMerge(fulltext_vtab *v, int iLevel){ |  | 
|  5691   LeafWriter writer; |  | 
|  5692   LeavesReader lrs[MERGE_COUNT]; |  | 
|  5693   int i, rc, idx = 0; |  | 
|  5694  |  | 
|  5695   /* Determine the next available segment index at the next level, |  | 
|  5696   ** merging as necessary. |  | 
|  5697   */ |  | 
|  5698   rc = segdirNextIndex(v, iLevel+1, &idx); |  | 
|  5699   if( rc!=SQLITE_OK ) return rc; |  | 
|  5700  |  | 
|  5701   /* TODO(shess) This assumes that we'll always see exactly |  | 
|  5702   ** MERGE_COUNT segments to merge at a given level.  That will be |  | 
|  5703   ** broken if we allow the developer to request preemptive or |  | 
|  5704   ** deferred merging. |  | 
|  5705   */ |  | 
|  5706   memset(&lrs, '\0', sizeof(lrs)); |  | 
|  5707   rc = leavesReadersInit(v, iLevel, lrs, &i); |  | 
|  5708   if( rc!=SQLITE_OK ) return rc; |  | 
|  5709  |  | 
|  5710   leafWriterInit(iLevel+1, idx, &writer); |  | 
|  5711  |  | 
|  5712   if( i!=MERGE_COUNT ){ |  | 
|  5713     rc = SQLITE_CORRUPT_BKPT; |  | 
|  5714     goto err; |  | 
|  5715   } |  | 
|  5716  |  | 
|  5717   /* Since leavesReaderReorder() pushes readers at eof to the end, |  | 
|  5718   ** when the first reader is empty, all will be empty. |  | 
|  5719   */ |  | 
|  5720   while( !leavesReaderAtEnd(lrs) ){ |  | 
|  5721     /* Figure out how many readers share their next term. */ |  | 
|  5722     for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ |  | 
|  5723       if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; |  | 
|  5724     } |  | 
|  5725  |  | 
|  5726     rc = leavesReadersMerge(v, lrs, i, &writer); |  | 
|  5727     if( rc!=SQLITE_OK ) goto err; |  | 
|  5728  |  | 
|  5729     /* Step forward those that were merged. */ |  | 
|  5730     while( i-->0 ){ |  | 
|  5731       rc = leavesReaderStep(v, lrs+i); |  | 
|  5732       if( rc!=SQLITE_OK ) goto err; |  | 
|  5733  |  | 
|  5734       /* Reorder by term, then by age. */ |  | 
|  5735       leavesReaderReorder(lrs+i, MERGE_COUNT-i); |  | 
|  5736     } |  | 
|  5737   } |  | 
|  5738  |  | 
|  5739   for(i=0; i<MERGE_COUNT; i++){ |  | 
|  5740     leavesReaderDestroy(&lrs[i]); |  | 
|  5741   } |  | 
|  5742  |  | 
|  5743   rc = leafWriterFinalize(v, &writer); |  | 
|  5744   leafWriterDestroy(&writer); |  | 
|  5745   if( rc!=SQLITE_OK ) return rc; |  | 
|  5746  |  | 
|  5747   /* Delete the merged segment data. */ |  | 
|  5748   return segdir_delete(v, iLevel); |  | 
|  5749  |  | 
|  5750  err: |  | 
|  5751   for(i=0; i<MERGE_COUNT; i++){ |  | 
|  5752     leavesReaderDestroy(&lrs[i]); |  | 
|  5753   } |  | 
|  5754   leafWriterDestroy(&writer); |  | 
|  5755   return rc; |  | 
|  5756 } |  | 
|  5757  |  | 
|  5758 /* Accumulate the union of *acc and *pData into *acc. */ |  | 
|  5759 static int docListAccumulateUnion(DataBuffer *acc, |  | 
|  5760                                   const char *pData, int nData) { |  | 
|  5761   DataBuffer tmp = *acc; |  | 
|  5762   int rc; |  | 
|  5763   dataBufferInit(acc, tmp.nData+nData); |  | 
|  5764   rc = docListUnion(tmp.pData, tmp.nData, pData, nData, acc); |  | 
|  5765   dataBufferDestroy(&tmp); |  | 
|  5766   return rc; |  | 
|  5767 } |  | 
|  5768  |  | 
|  5769 /* TODO(shess) It might be interesting to explore different merge |  | 
|  5770 ** strategies, here.  For instance, since this is a sorted merge, we |  | 
|  5771 ** could easily merge many doclists in parallel.  With some |  | 
|  5772 ** comprehension of the storage format, we could merge all of the |  | 
|  5773 ** doclists within a leaf node directly from the leaf node's storage. |  | 
|  5774 ** It may be worthwhile to merge smaller doclists before larger |  | 
|  5775 ** doclists, since they can be traversed more quickly - but the |  | 
|  5776 ** results may have less overlap, making them more expensive in a |  | 
|  5777 ** different way. |  | 
|  5778 */ |  | 
|  5779  |  | 
|  5780 /* Scan pReader for pTerm/nTerm, and merge the term's doclist over |  | 
|  5781 ** *out (any doclists with duplicate docids overwrite those in *out). |  | 
|  5782 ** Internal function for loadSegmentLeaf(). |  | 
|  5783 */ |  | 
|  5784 static int loadSegmentLeavesInt(fulltext_vtab *v, LeavesReader *pReader, |  | 
|  5785                                 const char *pTerm, int nTerm, int isPrefix, |  | 
|  5786                                 DataBuffer *out){ |  | 
|  5787   /* doclist data is accumulated into pBuffers similar to how one does |  | 
|  5788   ** increment in binary arithmetic.  If index 0 is empty, the data is |  | 
|  5789   ** stored there.  If there is data there, it is merged and the |  | 
|  5790   ** results carried into position 1, with further merge-and-carry |  | 
|  5791   ** until an empty position is found. |  | 
|  5792   */ |  | 
|  5793   DataBuffer *pBuffers = NULL; |  | 
|  5794   int nBuffers = 0, nMaxBuffers = 0, rc; |  | 
|  5795  |  | 
|  5796   assert( nTerm>0 ); |  | 
|  5797  |  | 
|  5798   for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); |  | 
|  5799       rc=leavesReaderStep(v, pReader)){ |  | 
|  5800     /* TODO(shess) Really want leavesReaderTermCmp(), but that name is |  | 
|  5801     ** already taken to compare the terms of two LeavesReaders.  Think |  | 
|  5802     ** on a better name.  [Meanwhile, break encapsulation rather than |  | 
|  5803     ** use a confusing name.] |  | 
|  5804     */ |  | 
|  5805     int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); |  | 
|  5806     if( c>0 ) break;      /* Past any possible matches. */ |  | 
|  5807     if( c==0 ){ |  | 
|  5808       int iBuffer, nData; |  | 
|  5809       const char *pData = leavesReaderData(pReader); |  | 
|  5810       if( pData==NULL ){ |  | 
|  5811         rc = SQLITE_CORRUPT_BKPT; |  | 
|  5812         break; |  | 
|  5813       } |  | 
|  5814       nData = leavesReaderDataBytes(pReader); |  | 
|  5815  |  | 
|  5816       /* Find the first empty buffer. */ |  | 
|  5817       for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |  | 
|  5818         if( 0==pBuffers[iBuffer].nData ) break; |  | 
|  5819       } |  | 
|  5820  |  | 
|  5821       /* Out of buffers, add an empty one. */ |  | 
|  5822       if( iBuffer==nBuffers ){ |  | 
|  5823         if( nBuffers==nMaxBuffers ){ |  | 
|  5824           DataBuffer *p; |  | 
|  5825           nMaxBuffers += 20; |  | 
|  5826  |  | 
|  5827           /* Manual realloc so we can handle NULL appropriately. */ |  | 
|  5828           p = sqlite3_malloc(nMaxBuffers*sizeof(*pBuffers)); |  | 
|  5829           if( p==NULL ){ |  | 
|  5830             rc = SQLITE_NOMEM; |  | 
|  5831             break; |  | 
|  5832           } |  | 
|  5833  |  | 
|  5834           if( nBuffers>0 ){ |  | 
|  5835             assert(pBuffers!=NULL); |  | 
|  5836             memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers)); |  | 
|  5837             sqlite3_free(pBuffers); |  | 
|  5838           } |  | 
|  5839           pBuffers = p; |  | 
|  5840         } |  | 
|  5841         dataBufferInit(&(pBuffers[nBuffers]), 0); |  | 
|  5842         nBuffers++; |  | 
|  5843       } |  | 
|  5844  |  | 
|  5845       /* At this point, must have an empty at iBuffer. */ |  | 
|  5846       assert(iBuffer<nBuffers && pBuffers[iBuffer].nData==0); |  | 
|  5847  |  | 
|  5848       /* If empty was first buffer, no need for merge logic. */ |  | 
|  5849       if( iBuffer==0 ){ |  | 
|  5850         dataBufferReplace(&(pBuffers[0]), pData, nData); |  | 
|  5851       }else{ |  | 
|  5852         /* pAcc is the empty buffer the merged data will end up in. */ |  | 
|  5853         DataBuffer *pAcc = &(pBuffers[iBuffer]); |  | 
|  5854         DataBuffer *p = &(pBuffers[0]); |  | 
|  5855  |  | 
|  5856         /* Handle position 0 specially to avoid need to prime pAcc |  | 
|  5857         ** with pData/nData. |  | 
|  5858         */ |  | 
|  5859         dataBufferSwap(p, pAcc); |  | 
|  5860         rc = docListAccumulateUnion(pAcc, pData, nData); |  | 
|  5861         if( rc!=SQLITE_OK ) goto err; |  | 
|  5862  |  | 
|  5863         /* Accumulate remaining doclists into pAcc. */ |  | 
|  5864         for(++p; p<pAcc; ++p){ |  | 
|  5865           rc = docListAccumulateUnion(pAcc, p->pData, p->nData); |  | 
|  5866           if( rc!=SQLITE_OK ) goto err; |  | 
|  5867  |  | 
|  5868           /* dataBufferReset() could allow a large doclist to blow up |  | 
|  5869           ** our memory requirements. |  | 
|  5870           */ |  | 
|  5871           if( p->nCapacity<1024 ){ |  | 
|  5872             dataBufferReset(p); |  | 
|  5873           }else{ |  | 
|  5874             dataBufferDestroy(p); |  | 
|  5875             dataBufferInit(p, 0); |  | 
|  5876           } |  | 
|  5877         } |  | 
|  5878       } |  | 
|  5879     } |  | 
|  5880   } |  | 
|  5881  |  | 
|  5882   /* Union all the doclists together into *out. */ |  | 
|  5883   /* TODO(shess) What if *out is big?  Sigh. */ |  | 
|  5884   if( rc==SQLITE_OK && nBuffers>0 ){ |  | 
|  5885     int iBuffer; |  | 
|  5886     for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |  | 
|  5887       if( pBuffers[iBuffer].nData>0 ){ |  | 
|  5888         if( out->nData==0 ){ |  | 
|  5889           dataBufferSwap(out, &(pBuffers[iBuffer])); |  | 
|  5890         }else{ |  | 
|  5891           rc = docListAccumulateUnion(out, pBuffers[iBuffer].pData, |  | 
|  5892                                       pBuffers[iBuffer].nData); |  | 
|  5893           if( rc!=SQLITE_OK ) break; |  | 
|  5894         } |  | 
|  5895       } |  | 
|  5896     } |  | 
|  5897   } |  | 
|  5898  |  | 
|  5899 err: |  | 
|  5900   while( nBuffers-- ){ |  | 
|  5901     dataBufferDestroy(&(pBuffers[nBuffers])); |  | 
|  5902   } |  | 
|  5903   if( pBuffers!=NULL ) sqlite3_free(pBuffers); |  | 
|  5904  |  | 
|  5905   return rc; |  | 
|  5906 } |  | 
|  5907  |  | 
|  5908 /* Call loadSegmentLeavesInt() with pData/nData as input. */ |  | 
|  5909 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, |  | 
|  5910                            const char *pTerm, int nTerm, int isPrefix, |  | 
|  5911                            DataBuffer *out){ |  | 
|  5912   LeavesReader reader; |  | 
|  5913   int rc; |  | 
|  5914  |  | 
|  5915   assert( nData>1 ); |  | 
|  5916   assert( *pData=='\0' ); |  | 
|  5917   rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader); |  | 
|  5918   if( rc!=SQLITE_OK ) return rc; |  | 
|  5919  |  | 
|  5920   rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); |  | 
|  5921   leavesReaderReset(&reader); |  | 
|  5922   leavesReaderDestroy(&reader); |  | 
|  5923   return rc; |  | 
|  5924 } |  | 
|  5925  |  | 
|  5926 /* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to |  | 
|  5927 ** iEndLeaf (inclusive) as input, and merge the resulting doclist into |  | 
|  5928 ** out. |  | 
|  5929 */ |  | 
|  5930 static int loadSegmentLeaves(fulltext_vtab *v, |  | 
|  5931                              sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf, |  | 
|  5932                              const char *pTerm, int nTerm, int isPrefix, |  | 
|  5933                              DataBuffer *out){ |  | 
|  5934   int rc; |  | 
|  5935   LeavesReader reader; |  | 
|  5936  |  | 
|  5937   assert( iStartLeaf<=iEndLeaf ); |  | 
|  5938   rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader); |  | 
|  5939   if( rc!=SQLITE_OK ) return rc; |  | 
|  5940  |  | 
|  5941   rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); |  | 
|  5942   leavesReaderReset(&reader); |  | 
|  5943   leavesReaderDestroy(&reader); |  | 
|  5944   return rc; |  | 
|  5945 } |  | 
|  5946  |  | 
|  5947 /* Taking pData/nData as an interior node, find the sequence of child |  | 
|  5948 ** nodes which could include pTerm/nTerm/isPrefix.  Note that the |  | 
|  5949 ** interior node terms logically come between the blocks, so there is |  | 
|  5950 ** one more blockid than there are terms (that block contains terms >= |  | 
|  5951 ** the last interior-node term). |  | 
|  5952 */ |  | 
|  5953 /* TODO(shess) The calling code may already know that the end child is |  | 
|  5954 ** not worth calculating, because the end may be in a later sibling |  | 
|  5955 ** node.  Consider whether breaking symmetry is worthwhile.  I suspect |  | 
|  5956 ** it is not worthwhile. |  | 
|  5957 */ |  | 
|  5958 static int getChildrenContaining(const char *pData, int nData, |  | 
|  5959                                  const char *pTerm, int nTerm, int isPrefix, |  | 
|  5960                                  sqlite_int64 *piStartChild, |  | 
|  5961                                  sqlite_int64 *piEndChild){ |  | 
|  5962   InteriorReader reader; |  | 
|  5963   int rc; |  | 
|  5964  |  | 
|  5965   assert( nData>1 ); |  | 
|  5966   assert( *pData!='\0' ); |  | 
|  5967   rc = interiorReaderInit(pData, nData, &reader); |  | 
|  5968   if( rc!=SQLITE_OK ) return rc; |  | 
|  5969  |  | 
|  5970   /* Scan for the first child which could contain pTerm/nTerm. */ |  | 
|  5971   while( !interiorReaderAtEnd(&reader) ){ |  | 
|  5972     if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; |  | 
|  5973     rc = interiorReaderStep(&reader); |  | 
|  5974     if( rc!=SQLITE_OK ){ |  | 
|  5975       interiorReaderDestroy(&reader); |  | 
|  5976       return rc; |  | 
|  5977     } |  | 
|  5978   } |  | 
|  5979   *piStartChild = interiorReaderCurrentBlockid(&reader); |  | 
|  5980  |  | 
|  5981   /* Keep scanning to find a term greater than our term, using prefix |  | 
|  5982   ** comparison if indicated.  If isPrefix is false, this will be the |  | 
|  5983   ** same blockid as the starting block. |  | 
|  5984   */ |  | 
|  5985   while( !interiorReaderAtEnd(&reader) ){ |  | 
|  5986     if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; |  | 
|  5987     rc = interiorReaderStep(&reader); |  | 
|  5988     if( rc!=SQLITE_OK ){ |  | 
|  5989       interiorReaderDestroy(&reader); |  | 
|  5990       return rc; |  | 
|  5991     } |  | 
|  5992   } |  | 
|  5993   *piEndChild = interiorReaderCurrentBlockid(&reader); |  | 
|  5994  |  | 
|  5995   interiorReaderDestroy(&reader); |  | 
|  5996  |  | 
|  5997   /* Children must ascend, and if !prefix, both must be the same. */ |  | 
|  5998   assert( *piEndChild>=*piStartChild ); |  | 
|  5999   assert( isPrefix || *piStartChild==*piEndChild ); |  | 
|  6000   return rc; |  | 
|  6001 } |  | 
|  6002  |  | 
|  6003 /* Read block at iBlockid and pass it with other params to |  | 
|  6004 ** getChildrenContaining(). |  | 
|  6005 */ |  | 
|  6006 static int loadAndGetChildrenContaining( |  | 
|  6007   fulltext_vtab *v, |  | 
|  6008   sqlite_int64 iBlockid, |  | 
|  6009   const char *pTerm, int nTerm, int isPrefix, |  | 
|  6010   sqlite_int64 *piStartChild, sqlite_int64 *piEndChild |  | 
|  6011 ){ |  | 
|  6012   sqlite3_stmt *s = NULL; |  | 
|  6013   int rc; |  | 
|  6014  |  | 
|  6015   assert( iBlockid!=0 ); |  | 
|  6016   assert( pTerm!=NULL ); |  | 
|  6017   assert( nTerm!=0 );        /* TODO(shess) Why not allow this? */ |  | 
|  6018   assert( piStartChild!=NULL ); |  | 
|  6019   assert( piEndChild!=NULL ); |  | 
|  6020  |  | 
|  6021   rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); |  | 
|  6022   if( rc!=SQLITE_OK ) return rc; |  | 
|  6023  |  | 
|  6024   rc = sqlite3_bind_int64(s, 1, iBlockid); |  | 
|  6025   if( rc!=SQLITE_OK ) return rc; |  | 
|  6026  |  | 
|  6027   rc = sqlite3_step(s); |  | 
|  6028   /* Corrupt if interior node references missing child node. */ |  | 
|  6029   if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; |  | 
|  6030   if( rc!=SQLITE_ROW ) return rc; |  | 
|  6031  |  | 
|  6032   /* Corrupt if child node isn't a blob. */ |  | 
|  6033   if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ |  | 
|  6034     sqlite3_reset(s);  /* So we don't leave a lock. */ |  | 
|  6035     return SQLITE_CORRUPT_BKPT; |  | 
|  6036   }else{ |  | 
|  6037     const char *pData = sqlite3_column_blob(s, 0); |  | 
|  6038     int nData = sqlite3_column_bytes(s, 0); |  | 
|  6039  |  | 
|  6040     /* Corrupt if child is not a valid interior node. */ |  | 
|  6041     if( pData==NULL || nData<1 || pData[0]=='\0' ){ |  | 
|  6042       sqlite3_reset(s);  /* So we don't leave a lock. */ |  | 
|  6043       return SQLITE_CORRUPT_BKPT; |  | 
|  6044     } |  | 
|  6045  |  | 
|  6046     rc = getChildrenContaining(pData, nData, pTerm, nTerm, |  | 
|  6047                                isPrefix, piStartChild, piEndChild); |  | 
|  6048     if( rc!=SQLITE_OK ){ |  | 
|  6049       sqlite3_reset(s); |  | 
|  6050       return rc; |  | 
|  6051     } |  | 
|  6052   } |  | 
|  6053  |  | 
|  6054   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  6055    * to complete the iteration; otherwise the table will remain |  | 
|  6056    * locked. */ |  | 
|  6057   rc = sqlite3_step(s); |  | 
|  6058   if( rc==SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  6059   if( rc!=SQLITE_DONE ) return rc; |  | 
|  6060  |  | 
|  6061   return SQLITE_OK; |  | 
|  6062 } |  | 
|  6063  |  | 
|  6064 /* Traverse the tree represented by pData[nData] looking for |  | 
|  6065 ** pTerm[nTerm], placing its doclist into *out.  This is internal to |  | 
|  6066 ** loadSegment() to make error-handling cleaner. |  | 
|  6067 */ |  | 
|  6068 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, |  | 
|  6069                           sqlite_int64 iLeavesEnd, |  | 
|  6070                           const char *pTerm, int nTerm, int isPrefix, |  | 
|  6071                           DataBuffer *out){ |  | 
|  6072   /* Special case where root is a leaf. */ |  | 
|  6073   if( *pData=='\0' ){ |  | 
|  6074     return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); |  | 
|  6075   }else{ |  | 
|  6076     int rc; |  | 
|  6077     sqlite_int64 iStartChild, iEndChild; |  | 
|  6078  |  | 
|  6079     /* Process pData as an interior node, then loop down the tree |  | 
|  6080     ** until we find the set of leaf nodes to scan for the term. |  | 
|  6081     */ |  | 
|  6082     rc = getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, |  | 
|  6083                                &iStartChild, &iEndChild); |  | 
|  6084     if( rc!=SQLITE_OK ) return rc; |  | 
|  6085     while( iStartChild>iLeavesEnd ){ |  | 
|  6086       sqlite_int64 iNextStart, iNextEnd; |  | 
|  6087       rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, |  | 
|  6088                                         &iNextStart, &iNextEnd); |  | 
|  6089       if( rc!=SQLITE_OK ) return rc; |  | 
|  6090  |  | 
|  6091       /* If we've branched, follow the end branch, too. */ |  | 
|  6092       if( iStartChild!=iEndChild ){ |  | 
|  6093         sqlite_int64 iDummy; |  | 
|  6094         rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, |  | 
|  6095                                           &iDummy, &iNextEnd); |  | 
|  6096         if( rc!=SQLITE_OK ) return rc; |  | 
|  6097       } |  | 
|  6098  |  | 
|  6099       assert( iNextStart<=iNextEnd ); |  | 
|  6100       iStartChild = iNextStart; |  | 
|  6101       iEndChild = iNextEnd; |  | 
|  6102     } |  | 
|  6103     assert( iStartChild<=iLeavesEnd ); |  | 
|  6104     assert( iEndChild<=iLeavesEnd ); |  | 
|  6105  |  | 
|  6106     /* Scan through the leaf segments for doclists. */ |  | 
|  6107     return loadSegmentLeaves(v, iStartChild, iEndChild, |  | 
|  6108                              pTerm, nTerm, isPrefix, out); |  | 
|  6109   } |  | 
|  6110 } |  | 
|  6111  |  | 
|  6112 /* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then |  | 
|  6113 ** merge its doclist over *out (any duplicate doclists read from the |  | 
|  6114 ** segment rooted at pData will overwrite those in *out). |  | 
|  6115 */ |  | 
|  6116 /* TODO(shess) Consider changing this to determine the depth of the |  | 
|  6117 ** leaves using either the first characters of interior nodes (when |  | 
|  6118 ** ==1, we're one level above the leaves), or the first character of |  | 
|  6119 ** the root (which will describe the height of the tree directly). |  | 
|  6120 ** Either feels somewhat tricky to me. |  | 
|  6121 */ |  | 
|  6122 /* TODO(shess) The current merge is likely to be slow for large |  | 
|  6123 ** doclists (though it should process from newest/smallest to |  | 
|  6124 ** oldest/largest, so it may not be that bad).  It might be useful to |  | 
|  6125 ** modify things to allow for N-way merging.  This could either be |  | 
|  6126 ** within a segment, with pairwise merges across segments, or across |  | 
|  6127 ** all segments at once. |  | 
|  6128 */ |  | 
|  6129 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, |  | 
|  6130                        sqlite_int64 iLeavesEnd, |  | 
|  6131                        const char *pTerm, int nTerm, int isPrefix, |  | 
|  6132                        DataBuffer *out){ |  | 
|  6133   DataBuffer result; |  | 
|  6134   int rc; |  | 
|  6135  |  | 
|  6136   /* Corrupt if segment root can't be valid. */ |  | 
|  6137   if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT; |  | 
|  6138  |  | 
|  6139   /* This code should never be called with buffered updates. */ |  | 
|  6140   assert( v->nPendingData<0 ); |  | 
|  6141  |  | 
|  6142   dataBufferInit(&result, 0); |  | 
|  6143   rc = loadSegmentInt(v, pData, nData, iLeavesEnd, |  | 
|  6144                       pTerm, nTerm, isPrefix, &result); |  | 
|  6145   if( rc==SQLITE_OK && result.nData>0 ){ |  | 
|  6146     if( out->nData==0 ){ |  | 
|  6147       DataBuffer tmp = *out; |  | 
|  6148       *out = result; |  | 
|  6149       result = tmp; |  | 
|  6150     }else{ |  | 
|  6151       DataBuffer merged; |  | 
|  6152       DLReader readers[2]; |  | 
|  6153  |  | 
|  6154       rc = dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); |  | 
|  6155       if( rc==SQLITE_OK ){ |  | 
|  6156         rc = dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); |  | 
|  6157         if( rc==SQLITE_OK ){ |  | 
|  6158           dataBufferInit(&merged, out->nData+result.nData); |  | 
|  6159           rc = docListMerge(&merged, readers, 2); |  | 
|  6160           dataBufferDestroy(out); |  | 
|  6161           *out = merged; |  | 
|  6162           dlrDestroy(&readers[1]); |  | 
|  6163         } |  | 
|  6164         dlrDestroy(&readers[0]); |  | 
|  6165       } |  | 
|  6166     } |  | 
|  6167   } |  | 
|  6168  |  | 
|  6169   dataBufferDestroy(&result); |  | 
|  6170   return rc; |  | 
|  6171 } |  | 
|  6172  |  | 
|  6173 /* Scan the database and merge together the posting lists for the term |  | 
|  6174 ** into *out. |  | 
|  6175 */ |  | 
|  6176 static int termSelect( |  | 
|  6177   fulltext_vtab *v,  |  | 
|  6178   int iColumn, |  | 
|  6179   const char *pTerm, int nTerm,             /* Term to query for */ |  | 
|  6180   int isPrefix,                             /* True for a prefix search */ |  | 
|  6181   DocListType iType,  |  | 
|  6182   DataBuffer *out                           /* Write results here */ |  | 
|  6183 ){ |  | 
|  6184   DataBuffer doclist; |  | 
|  6185   sqlite3_stmt *s; |  | 
|  6186   int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); |  | 
|  6187   if( rc!=SQLITE_OK ) return rc; |  | 
|  6188  |  | 
|  6189   /* This code should never be called with buffered updates. */ |  | 
|  6190   assert( v->nPendingData<0 ); |  | 
|  6191  |  | 
|  6192   dataBufferInit(&doclist, 0); |  | 
|  6193   dataBufferInit(out, 0); |  | 
|  6194  |  | 
|  6195   /* Traverse the segments from oldest to newest so that newer doclist |  | 
|  6196   ** elements for given docids overwrite older elements. |  | 
|  6197   */ |  | 
|  6198   while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |  | 
|  6199     const char *pData = sqlite3_column_blob(s, 2); |  | 
|  6200     const int nData = sqlite3_column_bytes(s, 2); |  | 
|  6201     const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); |  | 
|  6202  |  | 
|  6203     /* Corrupt if we get back different types than we stored. */ |  | 
|  6204     if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER || |  | 
|  6205         sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ |  | 
|  6206       rc = SQLITE_CORRUPT_BKPT; |  | 
|  6207       goto err; |  | 
|  6208     } |  | 
|  6209  |  | 
|  6210     rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, |  | 
|  6211                      &doclist); |  | 
|  6212     if( rc!=SQLITE_OK ) goto err; |  | 
|  6213   } |  | 
|  6214   if( rc==SQLITE_DONE ){ |  | 
|  6215     rc = SQLITE_OK; |  | 
|  6216     if( doclist.nData!=0 ){ |  | 
|  6217       /* TODO(shess) The old term_select_all() code applied the column |  | 
|  6218       ** restrict as we merged segments, leading to smaller buffers. |  | 
|  6219       ** This is probably worthwhile to bring back, once the new storage |  | 
|  6220       ** system is checked in. |  | 
|  6221       */ |  | 
|  6222       if( iColumn==v->nColumn) iColumn = -1; |  | 
|  6223       rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |  | 
|  6224                        iColumn, iType, out); |  | 
|  6225     } |  | 
|  6226   } |  | 
|  6227  |  | 
|  6228  err: |  | 
|  6229   sqlite3_reset(s);  /* So we don't leave a lock. */ |  | 
|  6230   dataBufferDestroy(&doclist); |  | 
|  6231   return rc; |  | 
|  6232 } |  | 
|  6233  |  | 
|  6234 /****************************************************************/ |  | 
|  6235 /* Used to hold hashtable data for sorting. */ |  | 
|  6236 typedef struct TermData { |  | 
|  6237   const char *pTerm; |  | 
|  6238   int nTerm; |  | 
|  6239   DLCollector *pCollector; |  | 
|  6240 } TermData; |  | 
|  6241  |  | 
|  6242 /* Orders TermData elements in strcmp fashion ( <0 for less-than, 0 |  | 
|  6243 ** for equal, >0 for greater-than). |  | 
|  6244 */ |  | 
|  6245 static int termDataCmp(const void *av, const void *bv){ |  | 
|  6246   const TermData *a = (const TermData *)av; |  | 
|  6247   const TermData *b = (const TermData *)bv; |  | 
|  6248   int n = a->nTerm<b->nTerm ? a->nTerm : b->nTerm; |  | 
|  6249   int c = memcmp(a->pTerm, b->pTerm, n); |  | 
|  6250   if( c!=0 ) return c; |  | 
|  6251   return a->nTerm-b->nTerm; |  | 
|  6252 } |  | 
|  6253  |  | 
|  6254 /* Order pTerms data by term, then write a new level 0 segment using |  | 
|  6255 ** LeafWriter. |  | 
|  6256 */ |  | 
|  6257 static int writeZeroSegment(fulltext_vtab *v, fts3Hash *pTerms){ |  | 
|  6258   fts3HashElem *e; |  | 
|  6259   int idx, rc, i, n; |  | 
|  6260   TermData *pData; |  | 
|  6261   LeafWriter writer; |  | 
|  6262   DataBuffer dl; |  | 
|  6263  |  | 
|  6264   /* Determine the next index at level 0, merging as necessary. */ |  | 
|  6265   rc = segdirNextIndex(v, 0, &idx); |  | 
|  6266   if( rc!=SQLITE_OK ) return rc; |  | 
|  6267  |  | 
|  6268   n = fts3HashCount(pTerms); |  | 
|  6269   pData = sqlite3_malloc(n*sizeof(TermData)); |  | 
|  6270  |  | 
|  6271   for(i = 0, e = fts3HashFirst(pTerms); e; i++, e = fts3HashNext(e)){ |  | 
|  6272     assert( i<n ); |  | 
|  6273     pData[i].pTerm = fts3HashKey(e); |  | 
|  6274     pData[i].nTerm = fts3HashKeysize(e); |  | 
|  6275     pData[i].pCollector = fts3HashData(e); |  | 
|  6276   } |  | 
|  6277   assert( i==n ); |  | 
|  6278  |  | 
|  6279   /* TODO(shess) Should we allow user-defined collation sequences, |  | 
|  6280   ** here?  I think we only need that once we support prefix searches. |  | 
|  6281   */ |  | 
|  6282   if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp); |  | 
|  6283  |  | 
|  6284   /* TODO(shess) Refactor so that we can write directly to the segment |  | 
|  6285   ** DataBuffer, as happens for segment merges. |  | 
|  6286   */ |  | 
|  6287   leafWriterInit(0, idx, &writer); |  | 
|  6288   dataBufferInit(&dl, 0); |  | 
|  6289   for(i=0; i<n; i++){ |  | 
|  6290     dataBufferReset(&dl); |  | 
|  6291     dlcAddDoclist(pData[i].pCollector, &dl); |  | 
|  6292     rc = leafWriterStep(v, &writer, |  | 
|  6293                         pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData); |  | 
|  6294     if( rc!=SQLITE_OK ) goto err; |  | 
|  6295   } |  | 
|  6296   rc = leafWriterFinalize(v, &writer); |  | 
|  6297  |  | 
|  6298  err: |  | 
|  6299   dataBufferDestroy(&dl); |  | 
|  6300   sqlite3_free(pData); |  | 
|  6301   leafWriterDestroy(&writer); |  | 
|  6302   return rc; |  | 
|  6303 } |  | 
|  6304  |  | 
|  6305 /* If pendingTerms has data, free it. */ |  | 
|  6306 static int clearPendingTerms(fulltext_vtab *v){ |  | 
|  6307   if( v->nPendingData>=0 ){ |  | 
|  6308     fts3HashElem *e; |  | 
|  6309     for(e=fts3HashFirst(&v->pendingTerms); e; e=fts3HashNext(e)){ |  | 
|  6310       dlcDelete(fts3HashData(e)); |  | 
|  6311     } |  | 
|  6312     fts3HashClear(&v->pendingTerms); |  | 
|  6313     v->nPendingData = -1; |  | 
|  6314   } |  | 
|  6315   return SQLITE_OK; |  | 
|  6316 } |  | 
|  6317  |  | 
|  6318 /* If pendingTerms has data, flush it to a level-zero segment, and |  | 
|  6319 ** free it. |  | 
|  6320 */ |  | 
|  6321 static int flushPendingTerms(fulltext_vtab *v){ |  | 
|  6322   if( v->nPendingData>=0 ){ |  | 
|  6323     int rc = writeZeroSegment(v, &v->pendingTerms); |  | 
|  6324     if( rc==SQLITE_OK ) clearPendingTerms(v); |  | 
|  6325     return rc; |  | 
|  6326   } |  | 
|  6327   return SQLITE_OK; |  | 
|  6328 } |  | 
|  6329  |  | 
|  6330 /* If pendingTerms is "too big", or docid is out of order, flush it. |  | 
|  6331 ** Regardless, be certain that pendingTerms is initialized for use. |  | 
|  6332 */ |  | 
|  6333 static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){ |  | 
|  6334   /* TODO(shess) Explore whether partially flushing the buffer on |  | 
|  6335   ** forced-flush would provide better performance.  I suspect that if |  | 
|  6336   ** we ordered the doclists by size and flushed the largest until the |  | 
|  6337   ** buffer was half empty, that would let the less frequent terms |  | 
|  6338   ** generate longer doclists. |  | 
|  6339   */ |  | 
|  6340   if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){ |  | 
|  6341     int rc = flushPendingTerms(v); |  | 
|  6342     if( rc!=SQLITE_OK ) return rc; |  | 
|  6343   } |  | 
|  6344   if( v->nPendingData<0 ){ |  | 
|  6345     fts3HashInit(&v->pendingTerms, FTS3_HASH_STRING, 1); |  | 
|  6346     v->nPendingData = 0; |  | 
|  6347   } |  | 
|  6348   v->iPrevDocid = iDocid; |  | 
|  6349   return SQLITE_OK; |  | 
|  6350 } |  | 
|  6351  |  | 
|  6352 /* This function implements the xUpdate callback; it is the top-level entry |  | 
|  6353  * point for inserting, deleting or updating a row in a full-text table. */ |  | 
|  6354 static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, |  | 
|  6355                           sqlite_int64 *pRowid){ |  | 
|  6356   fulltext_vtab *v = (fulltext_vtab *) pVtab; |  | 
|  6357   int rc; |  | 
|  6358  |  | 
|  6359   FTSTRACE(("FTS3 Update %p\n", pVtab)); |  | 
|  6360  |  | 
|  6361   if( nArg<2 ){ |  | 
|  6362     rc = index_delete(v, sqlite3_value_int64(ppArg[0])); |  | 
|  6363     if( rc==SQLITE_OK ){ |  | 
|  6364       /* If we just deleted the last row in the table, clear out the |  | 
|  6365       ** index data. |  | 
|  6366       */ |  | 
|  6367       rc = content_exists(v); |  | 
|  6368       if( rc==SQLITE_ROW ){ |  | 
|  6369         rc = SQLITE_OK; |  | 
|  6370       }else if( rc==SQLITE_DONE ){ |  | 
|  6371         /* Clear the pending terms so we don't flush a useless level-0 |  | 
|  6372         ** segment when the transaction closes. |  | 
|  6373         */ |  | 
|  6374         rc = clearPendingTerms(v); |  | 
|  6375         if( rc==SQLITE_OK ){ |  | 
|  6376           rc = segdir_delete_all(v); |  | 
|  6377         } |  | 
|  6378       } |  | 
|  6379     } |  | 
|  6380   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ |  | 
|  6381     /* An update: |  | 
|  6382      * ppArg[0] = old rowid |  | 
|  6383      * ppArg[1] = new rowid |  | 
|  6384      * ppArg[2..2+v->nColumn-1] = values |  | 
|  6385      * ppArg[2+v->nColumn] = value for magic column (we ignore this) |  | 
|  6386      * ppArg[2+v->nColumn+1] = value for docid |  | 
|  6387      */ |  | 
|  6388     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); |  | 
|  6389     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || |  | 
|  6390         sqlite3_value_int64(ppArg[1]) != rowid ){ |  | 
|  6391       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */ |  | 
|  6392     }else if( sqlite3_value_type(ppArg[2+v->nColumn+1]) != SQLITE_INTEGER || |  | 
|  6393               sqlite3_value_int64(ppArg[2+v->nColumn+1]) != rowid ){ |  | 
|  6394       rc = SQLITE_ERROR;  /* we don't allow changing the docid */ |  | 
|  6395     }else{ |  | 
|  6396       assert( nArg==2+v->nColumn+2); |  | 
|  6397       rc = index_update(v, rowid, &ppArg[2]); |  | 
|  6398     } |  | 
|  6399   } else { |  | 
|  6400     /* An insert: |  | 
|  6401      * ppArg[1] = requested rowid |  | 
|  6402      * ppArg[2..2+v->nColumn-1] = values |  | 
|  6403      * ppArg[2+v->nColumn] = value for magic column (we ignore this) |  | 
|  6404      * ppArg[2+v->nColumn+1] = value for docid |  | 
|  6405      */ |  | 
|  6406     sqlite3_value *pRequestDocid = ppArg[2+v->nColumn+1]; |  | 
|  6407     assert( nArg==2+v->nColumn+2); |  | 
|  6408     if( SQLITE_NULL != sqlite3_value_type(pRequestDocid) && |  | 
|  6409         SQLITE_NULL != sqlite3_value_type(ppArg[1]) ){ |  | 
|  6410       /* TODO(shess) Consider allowing this to work if the values are |  | 
|  6411       ** identical.  I'm inclined to discourage that usage, though, |  | 
|  6412       ** given that both rowid and docid are special columns.  Better |  | 
|  6413       ** would be to define one or the other as the default winner, |  | 
|  6414       ** but should it be fts3-centric (docid) or SQLite-centric |  | 
|  6415       ** (rowid)? |  | 
|  6416       */ |  | 
|  6417       rc = SQLITE_ERROR; |  | 
|  6418     }else{ |  | 
|  6419       if( SQLITE_NULL == sqlite3_value_type(pRequestDocid) ){ |  | 
|  6420         pRequestDocid = ppArg[1]; |  | 
|  6421       } |  | 
|  6422       rc = index_insert(v, pRequestDocid, &ppArg[2], pRowid); |  | 
|  6423     } |  | 
|  6424   } |  | 
|  6425  |  | 
|  6426   return rc; |  | 
|  6427 } |  | 
|  6428  |  | 
|  6429 static int fulltextSync(sqlite3_vtab *pVtab){ |  | 
|  6430   FTSTRACE(("FTS3 xSync()\n")); |  | 
|  6431   return flushPendingTerms((fulltext_vtab *)pVtab); |  | 
|  6432 } |  | 
|  6433  |  | 
|  6434 static int fulltextBegin(sqlite3_vtab *pVtab){ |  | 
|  6435   fulltext_vtab *v = (fulltext_vtab *) pVtab; |  | 
|  6436   FTSTRACE(("FTS3 xBegin()\n")); |  | 
|  6437  |  | 
|  6438   /* Any buffered updates should have been cleared by the previous |  | 
|  6439   ** transaction. |  | 
|  6440   */ |  | 
|  6441   assert( v->nPendingData<0 ); |  | 
|  6442   return clearPendingTerms(v); |  | 
|  6443 } |  | 
|  6444  |  | 
|  6445 static int fulltextCommit(sqlite3_vtab *pVtab){ |  | 
|  6446   fulltext_vtab *v = (fulltext_vtab *) pVtab; |  | 
|  6447   FTSTRACE(("FTS3 xCommit()\n")); |  | 
|  6448  |  | 
|  6449   /* Buffered updates should have been cleared by fulltextSync(). */ |  | 
|  6450   assert( v->nPendingData<0 ); |  | 
|  6451   return clearPendingTerms(v); |  | 
|  6452 } |  | 
|  6453  |  | 
|  6454 static int fulltextRollback(sqlite3_vtab *pVtab){ |  | 
|  6455   FTSTRACE(("FTS3 xRollback()\n")); |  | 
|  6456   return clearPendingTerms((fulltext_vtab *)pVtab); |  | 
|  6457 } |  | 
|  6458  |  | 
|  6459 /* |  | 
|  6460 ** Implementation of the snippet() function for FTS3 |  | 
|  6461 */ |  | 
|  6462 static void snippetFunc( |  | 
|  6463   sqlite3_context *pContext, |  | 
|  6464   int argc, |  | 
|  6465   sqlite3_value **argv |  | 
|  6466 ){ |  | 
|  6467   fulltext_cursor *pCursor; |  | 
|  6468   if( argc<1 ) return; |  | 
|  6469   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  6470       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  6471     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); |  | 
|  6472   }else{ |  | 
|  6473     const char *zStart = "<b>"; |  | 
|  6474     const char *zEnd = "</b>"; |  | 
|  6475     const char *zEllipsis = "<b>...</b>"; |  | 
|  6476     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  6477     if( argc>=2 ){ |  | 
|  6478       zStart = (const char*)sqlite3_value_text(argv[1]); |  | 
|  6479       if( argc>=3 ){ |  | 
|  6480         zEnd = (const char*)sqlite3_value_text(argv[2]); |  | 
|  6481         if( argc>=4 ){ |  | 
|  6482           zEllipsis = (const char*)sqlite3_value_text(argv[3]); |  | 
|  6483         } |  | 
|  6484       } |  | 
|  6485     } |  | 
|  6486     snippetAllOffsets(pCursor); |  | 
|  6487     snippetText(pCursor, zStart, zEnd, zEllipsis); |  | 
|  6488     sqlite3_result_text(pContext, pCursor->snippet.zSnippet, |  | 
|  6489                         pCursor->snippet.nSnippet, SQLITE_STATIC); |  | 
|  6490   } |  | 
|  6491 } |  | 
|  6492  |  | 
|  6493 /* |  | 
|  6494 ** Implementation of the offsets() function for FTS3 |  | 
|  6495 */ |  | 
|  6496 static void snippetOffsetsFunc( |  | 
|  6497   sqlite3_context *pContext, |  | 
|  6498   int argc, |  | 
|  6499   sqlite3_value **argv |  | 
|  6500 ){ |  | 
|  6501   fulltext_cursor *pCursor; |  | 
|  6502   if( argc<1 ) return; |  | 
|  6503   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  6504       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  6505     sqlite3_result_error(pContext, "illegal first argument to offsets",-1); |  | 
|  6506   }else{ |  | 
|  6507     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  6508     snippetAllOffsets(pCursor); |  | 
|  6509     snippetOffsetText(&pCursor->snippet); |  | 
|  6510     sqlite3_result_text(pContext, |  | 
|  6511                         pCursor->snippet.zOffset, pCursor->snippet.nOffset, |  | 
|  6512                         SQLITE_STATIC); |  | 
|  6513   } |  | 
|  6514 } |  | 
|  6515  |  | 
|  6516 /* OptLeavesReader is nearly identical to LeavesReader, except that |  | 
|  6517 ** where LeavesReader is geared towards the merging of complete |  | 
|  6518 ** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader |  | 
|  6519 ** is geared towards implementation of the optimize() function, and |  | 
|  6520 ** can merge all segments simultaneously.  This version may be |  | 
|  6521 ** somewhat less efficient than LeavesReader because it merges into an |  | 
|  6522 ** accumulator rather than doing an N-way merge, but since segment |  | 
|  6523 ** size grows exponentially (so segment count logrithmically) this is |  | 
|  6524 ** probably not an immediate problem. |  | 
|  6525 */ |  | 
|  6526 /* TODO(shess): Prove that assertion, or extend the merge code to |  | 
|  6527 ** merge tree fashion (like the prefix-searching code does). |  | 
|  6528 */ |  | 
|  6529 /* TODO(shess): OptLeavesReader and LeavesReader could probably be |  | 
|  6530 ** merged with little or no loss of performance for LeavesReader.  The |  | 
|  6531 ** merged code would need to handle >MERGE_COUNT segments, and would |  | 
|  6532 ** also need to be able to optionally optimize away deletes. |  | 
|  6533 */ |  | 
|  6534 typedef struct OptLeavesReader { |  | 
|  6535   /* Segment number, to order readers by age. */ |  | 
|  6536   int segment; |  | 
|  6537   LeavesReader reader; |  | 
|  6538 } OptLeavesReader; |  | 
|  6539  |  | 
|  6540 static int optLeavesReaderAtEnd(OptLeavesReader *pReader){ |  | 
|  6541   return leavesReaderAtEnd(&pReader->reader); |  | 
|  6542 } |  | 
|  6543 static int optLeavesReaderTermBytes(OptLeavesReader *pReader){ |  | 
|  6544   return leavesReaderTermBytes(&pReader->reader); |  | 
|  6545 } |  | 
|  6546 static const char *optLeavesReaderData(OptLeavesReader *pReader){ |  | 
|  6547   return leavesReaderData(&pReader->reader); |  | 
|  6548 } |  | 
|  6549 static int optLeavesReaderDataBytes(OptLeavesReader *pReader){ |  | 
|  6550   return leavesReaderDataBytes(&pReader->reader); |  | 
|  6551 } |  | 
|  6552 static const char *optLeavesReaderTerm(OptLeavesReader *pReader){ |  | 
|  6553   return leavesReaderTerm(&pReader->reader); |  | 
|  6554 } |  | 
|  6555 static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){ |  | 
|  6556   return leavesReaderStep(v, &pReader->reader); |  | 
|  6557 } |  | 
|  6558 static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ |  | 
|  6559   return leavesReaderTermCmp(&lr1->reader, &lr2->reader); |  | 
|  6560 } |  | 
|  6561 /* Order by term ascending, segment ascending (oldest to newest), with |  | 
|  6562 ** exhausted readers to the end. |  | 
|  6563 */ |  | 
|  6564 static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ |  | 
|  6565   int c = optLeavesReaderTermCmp(lr1, lr2); |  | 
|  6566   if( c!=0 ) return c; |  | 
|  6567   return lr1->segment-lr2->segment; |  | 
|  6568 } |  | 
|  6569 /* Bubble pLr[0] to appropriate place in pLr[1..nLr-1].  Assumes that |  | 
|  6570 ** pLr[1..nLr-1] is already sorted. |  | 
|  6571 */ |  | 
|  6572 static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){ |  | 
|  6573   while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){ |  | 
|  6574     OptLeavesReader tmp = pLr[0]; |  | 
|  6575     pLr[0] = pLr[1]; |  | 
|  6576     pLr[1] = tmp; |  | 
|  6577     nLr--; |  | 
|  6578     pLr++; |  | 
|  6579   } |  | 
|  6580 } |  | 
|  6581  |  | 
|  6582 /* optimize() helper function.  Put the readers in order and iterate |  | 
|  6583 ** through them, merging doclists for matching terms into pWriter. |  | 
|  6584 ** Returns SQLITE_OK on success, or the SQLite error code which |  | 
|  6585 ** prevented success. |  | 
|  6586 */ |  | 
|  6587 static int optimizeInternal(fulltext_vtab *v, |  | 
|  6588                             OptLeavesReader *readers, int nReaders, |  | 
|  6589                             LeafWriter *pWriter){ |  | 
|  6590   int i, rc = SQLITE_OK; |  | 
|  6591   DataBuffer doclist, merged, tmp; |  | 
|  6592   const char *pData; |  | 
|  6593  |  | 
|  6594   /* Order the readers. */ |  | 
|  6595   i = nReaders; |  | 
|  6596   while( i-- > 0 ){ |  | 
|  6597     optLeavesReaderReorder(&readers[i], nReaders-i); |  | 
|  6598   } |  | 
|  6599  |  | 
|  6600   dataBufferInit(&doclist, LEAF_MAX); |  | 
|  6601   dataBufferInit(&merged, LEAF_MAX); |  | 
|  6602  |  | 
|  6603   /* Exhausted readers bubble to the end, so when the first reader is |  | 
|  6604   ** at eof, all are at eof. |  | 
|  6605   */ |  | 
|  6606   while( !optLeavesReaderAtEnd(&readers[0]) ){ |  | 
|  6607  |  | 
|  6608     /* Figure out how many readers share the next term. */ |  | 
|  6609     for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ |  | 
|  6610       if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; |  | 
|  6611     } |  | 
|  6612  |  | 
|  6613     pData = optLeavesReaderData(&readers[0]); |  | 
|  6614     if( pData==NULL ){ |  | 
|  6615       rc = SQLITE_CORRUPT_BKPT; |  | 
|  6616       break; |  | 
|  6617     } |  | 
|  6618  |  | 
|  6619     /* Special-case for no merge. */ |  | 
|  6620     if( i==1 ){ |  | 
|  6621       /* Trim deletions from the doclist. */ |  | 
|  6622       dataBufferReset(&merged); |  | 
|  6623       rc = docListTrim(DL_DEFAULT, pData, |  | 
|  6624                        optLeavesReaderDataBytes(&readers[0]), |  | 
|  6625                        -1, DL_DEFAULT, &merged); |  | 
|  6626       if( rc!=SQLITE_OK ) break; |  | 
|  6627     }else{ |  | 
|  6628       DLReader dlReaders[MERGE_COUNT]; |  | 
|  6629       int iReader, nReaders; |  | 
|  6630  |  | 
|  6631       /* Prime the pipeline with the first reader's doclist.  After |  | 
|  6632       ** one pass index 0 will reference the accumulated doclist. |  | 
|  6633       */ |  | 
|  6634       rc = dlrInit(&dlReaders[0], DL_DEFAULT, |  | 
|  6635                    pData, |  | 
|  6636                    optLeavesReaderDataBytes(&readers[0])); |  | 
|  6637       if( rc!=SQLITE_OK ) break; |  | 
|  6638       iReader = 1; |  | 
|  6639  |  | 
|  6640       assert( iReader<i );  /* Must execute the loop at least once. */ |  | 
|  6641       while( iReader<i ){ |  | 
|  6642         /* Merge 16 inputs per pass. */ |  | 
|  6643         for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; |  | 
|  6644              iReader++, nReaders++ ){ |  | 
|  6645           pData = optLeavesReaderData(&readers[iReader]); |  | 
|  6646           if( pData==NULL ){ |  | 
|  6647             rc = SQLITE_CORRUPT_BKPT; |  | 
|  6648             break; |  | 
|  6649           } |  | 
|  6650           rc = dlrInit(&dlReaders[nReaders], DL_DEFAULT, pData, |  | 
|  6651                        optLeavesReaderDataBytes(&readers[iReader])); |  | 
|  6652           if( rc!=SQLITE_OK ) break; |  | 
|  6653         } |  | 
|  6654  |  | 
|  6655         /* Merge doclists and swap result into accumulator. */ |  | 
|  6656         if( rc==SQLITE_OK ){ |  | 
|  6657           dataBufferReset(&merged); |  | 
|  6658           rc = docListMerge(&merged, dlReaders, nReaders); |  | 
|  6659           tmp = merged; |  | 
|  6660           merged = doclist; |  | 
|  6661           doclist = tmp; |  | 
|  6662         } |  | 
|  6663  |  | 
|  6664         while( nReaders-- > 0 ){ |  | 
|  6665           dlrDestroy(&dlReaders[nReaders]); |  | 
|  6666         } |  | 
|  6667  |  | 
|  6668         if( rc!=SQLITE_OK ) goto err; |  | 
|  6669  |  | 
|  6670         /* Accumulated doclist to reader 0 for next pass. */ |  | 
|  6671         rc = dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); |  | 
|  6672         if( rc!=SQLITE_OK ) goto err; |  | 
|  6673       } |  | 
|  6674  |  | 
|  6675       /* Destroy reader that was left in the pipeline. */ |  | 
|  6676       dlrDestroy(&dlReaders[0]); |  | 
|  6677  |  | 
|  6678       /* Trim deletions from the doclist. */ |  | 
|  6679       dataBufferReset(&merged); |  | 
|  6680       rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |  | 
|  6681                        -1, DL_DEFAULT, &merged); |  | 
|  6682       if( rc!=SQLITE_OK ) goto err; |  | 
|  6683     } |  | 
|  6684  |  | 
|  6685     /* Only pass doclists with hits (skip if all hits deleted). */ |  | 
|  6686     if( merged.nData>0 ){ |  | 
|  6687       rc = leafWriterStep(v, pWriter, |  | 
|  6688                           optLeavesReaderTerm(&readers[0]), |  | 
|  6689                           optLeavesReaderTermBytes(&readers[0]), |  | 
|  6690                           merged.pData, merged.nData); |  | 
|  6691       if( rc!=SQLITE_OK ) goto err; |  | 
|  6692     } |  | 
|  6693  |  | 
|  6694     /* Step merged readers to next term and reorder. */ |  | 
|  6695     while( i-- > 0 ){ |  | 
|  6696       rc = optLeavesReaderStep(v, &readers[i]); |  | 
|  6697       if( rc!=SQLITE_OK ) goto err; |  | 
|  6698  |  | 
|  6699       optLeavesReaderReorder(&readers[i], nReaders-i); |  | 
|  6700     } |  | 
|  6701   } |  | 
|  6702  |  | 
|  6703  err: |  | 
|  6704   dataBufferDestroy(&doclist); |  | 
|  6705   dataBufferDestroy(&merged); |  | 
|  6706   return rc; |  | 
|  6707 } |  | 
|  6708  |  | 
|  6709 /* Implement optimize() function for FTS3.  optimize(t) merges all |  | 
|  6710 ** segments in the fts index into a single segment.  't' is the magic |  | 
|  6711 ** table-named column. |  | 
|  6712 */ |  | 
|  6713 static void optimizeFunc(sqlite3_context *pContext, |  | 
|  6714                          int argc, sqlite3_value **argv){ |  | 
|  6715   fulltext_cursor *pCursor; |  | 
|  6716   if( argc>1 ){ |  | 
|  6717     sqlite3_result_error(pContext, "excess arguments to optimize()",-1); |  | 
|  6718   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  6719             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  6720     sqlite3_result_error(pContext, "illegal first argument to optimize",-1); |  | 
|  6721   }else{ |  | 
|  6722     fulltext_vtab *v; |  | 
|  6723     int i, rc, iMaxLevel; |  | 
|  6724     OptLeavesReader *readers; |  | 
|  6725     int nReaders; |  | 
|  6726     LeafWriter writer; |  | 
|  6727     sqlite3_stmt *s; |  | 
|  6728  |  | 
|  6729     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  6730     v = cursor_vtab(pCursor); |  | 
|  6731  |  | 
|  6732     /* Flush any buffered updates before optimizing. */ |  | 
|  6733     rc = flushPendingTerms(v); |  | 
|  6734     if( rc!=SQLITE_OK ) goto err; |  | 
|  6735  |  | 
|  6736     rc = segdir_count(v, &nReaders, &iMaxLevel); |  | 
|  6737     if( rc!=SQLITE_OK ) goto err; |  | 
|  6738     if( nReaders==0 || nReaders==1 ){ |  | 
|  6739       sqlite3_result_text(pContext, "Index already optimal", -1, |  | 
|  6740                           SQLITE_STATIC); |  | 
|  6741       return; |  | 
|  6742     } |  | 
|  6743  |  | 
|  6744     rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); |  | 
|  6745     if( rc!=SQLITE_OK ) goto err; |  | 
|  6746  |  | 
|  6747     readers = sqlite3_malloc(nReaders*sizeof(readers[0])); |  | 
|  6748     if( readers==NULL ) goto err; |  | 
|  6749  |  | 
|  6750     /* Note that there will already be a segment at this position |  | 
|  6751     ** until we call segdir_delete() on iMaxLevel. |  | 
|  6752     */ |  | 
|  6753     leafWriterInit(iMaxLevel, 0, &writer); |  | 
|  6754  |  | 
|  6755     i = 0; |  | 
|  6756     while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |  | 
|  6757       sqlite_int64 iStart = sqlite3_column_int64(s, 0); |  | 
|  6758       sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |  | 
|  6759       const char *pRootData = sqlite3_column_blob(s, 2); |  | 
|  6760       int nRootData = sqlite3_column_bytes(s, 2); |  | 
|  6761  |  | 
|  6762       /* Corrupt if we get back different types than we stored. */ |  | 
|  6763       if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || |  | 
|  6764           sqlite3_column_type(s, 1)!=SQLITE_INTEGER || |  | 
|  6765           sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ |  | 
|  6766         rc = SQLITE_CORRUPT_BKPT; |  | 
|  6767         break; |  | 
|  6768       } |  | 
|  6769  |  | 
|  6770       assert( i<nReaders ); |  | 
|  6771       rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, |  | 
|  6772                             &readers[i].reader); |  | 
|  6773       if( rc!=SQLITE_OK ) break; |  | 
|  6774  |  | 
|  6775       readers[i].segment = i; |  | 
|  6776       i++; |  | 
|  6777     } |  | 
|  6778  |  | 
|  6779     /* If we managed to successfully read them all, optimize them. */ |  | 
|  6780     if( rc==SQLITE_DONE ){ |  | 
|  6781       assert( i==nReaders ); |  | 
|  6782       rc = optimizeInternal(v, readers, nReaders, &writer); |  | 
|  6783     }else{ |  | 
|  6784       sqlite3_reset(s);  /* So we don't leave a lock. */ |  | 
|  6785     } |  | 
|  6786  |  | 
|  6787     while( i-- > 0 ){ |  | 
|  6788       leavesReaderDestroy(&readers[i].reader); |  | 
|  6789     } |  | 
|  6790     sqlite3_free(readers); |  | 
|  6791  |  | 
|  6792     /* If we've successfully gotten to here, delete the old segments |  | 
|  6793     ** and flush the interior structure of the new segment. |  | 
|  6794     */ |  | 
|  6795     if( rc==SQLITE_OK ){ |  | 
|  6796       for( i=0; i<=iMaxLevel; i++ ){ |  | 
|  6797         rc = segdir_delete(v, i); |  | 
|  6798         if( rc!=SQLITE_OK ) break; |  | 
|  6799       } |  | 
|  6800  |  | 
|  6801       if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer); |  | 
|  6802     } |  | 
|  6803  |  | 
|  6804     leafWriterDestroy(&writer); |  | 
|  6805  |  | 
|  6806     if( rc!=SQLITE_OK ) goto err; |  | 
|  6807  |  | 
|  6808     sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); |  | 
|  6809     return; |  | 
|  6810  |  | 
|  6811     /* TODO(shess): Error-handling needs to be improved along the |  | 
|  6812     ** lines of the dump_ functions. |  | 
|  6813     */ |  | 
|  6814  err: |  | 
|  6815     { |  | 
|  6816       char buf[512]; |  | 
|  6817       sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s", |  | 
|  6818                        sqlite3_errmsg(sqlite3_context_db_handle(pContext))); |  | 
|  6819       sqlite3_result_error(pContext, buf, -1); |  | 
|  6820     } |  | 
|  6821   } |  | 
|  6822 } |  | 
|  6823  |  | 
|  6824 #ifdef SQLITE_TEST |  | 
|  6825 /* Generate an error of the form "<prefix>: <msg>".  If msg is NULL, |  | 
|  6826 ** pull the error from the context's db handle. |  | 
|  6827 */ |  | 
|  6828 static void generateError(sqlite3_context *pContext, |  | 
|  6829                           const char *prefix, const char *msg){ |  | 
|  6830   char buf[512]; |  | 
|  6831   if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext)); |  | 
|  6832   sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg); |  | 
|  6833   sqlite3_result_error(pContext, buf, -1); |  | 
|  6834 } |  | 
|  6835  |  | 
|  6836 /* Helper function to collect the set of terms in the segment into |  | 
|  6837 ** pTerms.  The segment is defined by the leaf nodes between |  | 
|  6838 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of |  | 
|  6839 ** pRootData if iStartBlockid is 0 (in which case the entire segment |  | 
|  6840 ** fit in a leaf). |  | 
|  6841 */ |  | 
|  6842 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, |  | 
|  6843                                fts3Hash *pTerms){ |  | 
|  6844   const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); |  | 
|  6845   const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); |  | 
|  6846   const char *pRootData = sqlite3_column_blob(s, 2); |  | 
|  6847   const int nRootData = sqlite3_column_bytes(s, 2); |  | 
|  6848   int rc; |  | 
|  6849   LeavesReader reader; |  | 
|  6850  |  | 
|  6851   /* Corrupt if we get back different types than we stored. */ |  | 
|  6852   if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || |  | 
|  6853       sqlite3_column_type(s, 1)!=SQLITE_INTEGER || |  | 
|  6854       sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ |  | 
|  6855     return SQLITE_CORRUPT_BKPT; |  | 
|  6856   } |  | 
|  6857  |  | 
|  6858   rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, |  | 
|  6859                         pRootData, nRootData, &reader); |  | 
|  6860   if( rc!=SQLITE_OK ) return rc; |  | 
|  6861  |  | 
|  6862   while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ |  | 
|  6863     const char *pTerm = leavesReaderTerm(&reader); |  | 
|  6864     const int nTerm = leavesReaderTermBytes(&reader); |  | 
|  6865     void *oldValue = sqlite3Fts3HashFind(pTerms, pTerm, nTerm); |  | 
|  6866     void *newValue = (void *)((char *)oldValue+1); |  | 
|  6867  |  | 
|  6868     /* From the comment before sqlite3Fts3HashInsert in fts3_hash.c, |  | 
|  6869     ** the data value passed is returned in case of malloc failure. |  | 
|  6870     */ |  | 
|  6871     if( newValue==sqlite3Fts3HashInsert(pTerms, pTerm, nTerm, newValue) ){ |  | 
|  6872       rc = SQLITE_NOMEM; |  | 
|  6873     }else{ |  | 
|  6874       rc = leavesReaderStep(v, &reader); |  | 
|  6875     } |  | 
|  6876   } |  | 
|  6877  |  | 
|  6878   leavesReaderDestroy(&reader); |  | 
|  6879   return rc; |  | 
|  6880 } |  | 
|  6881  |  | 
|  6882 /* Helper function to build the result string for dump_terms(). */ |  | 
|  6883 static int generateTermsResult(sqlite3_context *pContext, fts3Hash *pTerms){ |  | 
|  6884   int iTerm, nTerms, nResultBytes, iByte; |  | 
|  6885   char *result; |  | 
|  6886   TermData *pData; |  | 
|  6887   fts3HashElem *e; |  | 
|  6888  |  | 
|  6889   /* Iterate pTerms to generate an array of terms in pData for |  | 
|  6890   ** sorting. |  | 
|  6891   */ |  | 
|  6892   nTerms = fts3HashCount(pTerms); |  | 
|  6893   assert( nTerms>0 ); |  | 
|  6894   pData = sqlite3_malloc(nTerms*sizeof(TermData)); |  | 
|  6895   if( pData==NULL ) return SQLITE_NOMEM; |  | 
|  6896  |  | 
|  6897   nResultBytes = 0; |  | 
|  6898   for(iTerm = 0, e = fts3HashFirst(pTerms); e; iTerm++, e = fts3HashNext(e)){ |  | 
|  6899     nResultBytes += fts3HashKeysize(e)+1;   /* Term plus trailing space */ |  | 
|  6900     assert( iTerm<nTerms ); |  | 
|  6901     pData[iTerm].pTerm = fts3HashKey(e); |  | 
|  6902     pData[iTerm].nTerm = fts3HashKeysize(e); |  | 
|  6903     pData[iTerm].pCollector = fts3HashData(e);  /* unused */ |  | 
|  6904   } |  | 
|  6905   assert( iTerm==nTerms ); |  | 
|  6906  |  | 
|  6907   assert( nResultBytes>0 );   /* nTerms>0, nResultsBytes must be, too. */ |  | 
|  6908   result = sqlite3_malloc(nResultBytes); |  | 
|  6909   if( result==NULL ){ |  | 
|  6910     sqlite3_free(pData); |  | 
|  6911     return SQLITE_NOMEM; |  | 
|  6912   } |  | 
|  6913  |  | 
|  6914   if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp); |  | 
|  6915  |  | 
|  6916   /* Read the terms in order to build the result. */ |  | 
|  6917   iByte = 0; |  | 
|  6918   for(iTerm=0; iTerm<nTerms; ++iTerm){ |  | 
|  6919     memcpy(result+iByte, pData[iTerm].pTerm, pData[iTerm].nTerm); |  | 
|  6920     iByte += pData[iTerm].nTerm; |  | 
|  6921     result[iByte++] = ' '; |  | 
|  6922   } |  | 
|  6923   assert( iByte==nResultBytes ); |  | 
|  6924   assert( result[nResultBytes-1]==' ' ); |  | 
|  6925   result[nResultBytes-1] = '\0'; |  | 
|  6926  |  | 
|  6927   /* Passes away ownership of result. */ |  | 
|  6928   sqlite3_result_text(pContext, result, nResultBytes-1, sqlite3_free); |  | 
|  6929   sqlite3_free(pData); |  | 
|  6930   return SQLITE_OK; |  | 
|  6931 } |  | 
|  6932  |  | 
|  6933 /* Implements dump_terms() for use in inspecting the fts3 index from |  | 
|  6934 ** tests.  TEXT result containing the ordered list of terms joined by |  | 
|  6935 ** spaces.  dump_terms(t, level, idx) dumps the terms for the segment |  | 
|  6936 ** specified by level, idx (in %_segdir), while dump_terms(t) dumps |  | 
|  6937 ** all terms in the index.  In both cases t is the fts table's magic |  | 
|  6938 ** table-named column. |  | 
|  6939 */ |  | 
|  6940 static void dumpTermsFunc( |  | 
|  6941   sqlite3_context *pContext, |  | 
|  6942   int argc, sqlite3_value **argv |  | 
|  6943 ){ |  | 
|  6944   fulltext_cursor *pCursor; |  | 
|  6945   if( argc!=3 && argc!=1 ){ |  | 
|  6946     generateError(pContext, "dump_terms", "incorrect arguments"); |  | 
|  6947   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  6948             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  6949     generateError(pContext, "dump_terms", "illegal first argument"); |  | 
|  6950   }else{ |  | 
|  6951     fulltext_vtab *v; |  | 
|  6952     fts3Hash terms; |  | 
|  6953     sqlite3_stmt *s = NULL; |  | 
|  6954     int rc; |  | 
|  6955  |  | 
|  6956     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  6957     v = cursor_vtab(pCursor); |  | 
|  6958  |  | 
|  6959     /* If passed only the cursor column, get all segments.  Otherwise |  | 
|  6960     ** get the segment described by the following two arguments. |  | 
|  6961     */ |  | 
|  6962     if( argc==1 ){ |  | 
|  6963       rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); |  | 
|  6964     }else{ |  | 
|  6965       rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); |  | 
|  6966       if( rc==SQLITE_OK ){ |  | 
|  6967         rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[1])); |  | 
|  6968         if( rc==SQLITE_OK ){ |  | 
|  6969           rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[2])); |  | 
|  6970         } |  | 
|  6971       } |  | 
|  6972     } |  | 
|  6973  |  | 
|  6974     if( rc!=SQLITE_OK ){ |  | 
|  6975       generateError(pContext, "dump_terms", NULL); |  | 
|  6976       return; |  | 
|  6977     } |  | 
|  6978  |  | 
|  6979     /* Collect the terms for each segment. */ |  | 
|  6980     sqlite3Fts3HashInit(&terms, FTS3_HASH_STRING, 1); |  | 
|  6981     while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |  | 
|  6982       rc = collectSegmentTerms(v, s, &terms); |  | 
|  6983       if( rc!=SQLITE_OK ) break; |  | 
|  6984     } |  | 
|  6985  |  | 
|  6986     if( rc!=SQLITE_DONE ){ |  | 
|  6987       sqlite3_reset(s); |  | 
|  6988       generateError(pContext, "dump_terms", NULL); |  | 
|  6989     }else{ |  | 
|  6990       const int nTerms = fts3HashCount(&terms); |  | 
|  6991       if( nTerms>0 ){ |  | 
|  6992         rc = generateTermsResult(pContext, &terms); |  | 
|  6993         if( rc==SQLITE_NOMEM ){ |  | 
|  6994           generateError(pContext, "dump_terms", "out of memory"); |  | 
|  6995         }else{ |  | 
|  6996           assert( rc==SQLITE_OK ); |  | 
|  6997         } |  | 
|  6998       }else if( argc==3 ){ |  | 
|  6999         /* The specific segment asked for could not be found. */ |  | 
|  7000         generateError(pContext, "dump_terms", "segment not found"); |  | 
|  7001       }else{ |  | 
|  7002         /* No segments found. */ |  | 
|  7003         /* TODO(shess): It should be impossible to reach this.  This |  | 
|  7004         ** case can only happen for an empty table, in which case |  | 
|  7005         ** SQLite has no rows to call this function on. |  | 
|  7006         */ |  | 
|  7007         sqlite3_result_null(pContext); |  | 
|  7008       } |  | 
|  7009     } |  | 
|  7010     sqlite3Fts3HashClear(&terms); |  | 
|  7011   } |  | 
|  7012 } |  | 
|  7013  |  | 
|  7014 /* Expand the DL_DEFAULT doclist in pData into a text result in |  | 
|  7015 ** pContext. |  | 
|  7016 */ |  | 
|  7017 static void createDoclistResult(sqlite3_context *pContext, |  | 
|  7018                                 const char *pData, int nData){ |  | 
|  7019   DataBuffer dump; |  | 
|  7020   DLReader dlReader; |  | 
|  7021   int rc; |  | 
|  7022  |  | 
|  7023   assert( pData!=NULL && nData>0 ); |  | 
|  7024  |  | 
|  7025   rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); |  | 
|  7026   if( rc!=SQLITE_OK ) return rc; |  | 
|  7027   dataBufferInit(&dump, 0); |  | 
|  7028   for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ |  | 
|  7029     char buf[256]; |  | 
|  7030     PLReader plReader; |  | 
|  7031  |  | 
|  7032     rc = plrInit(&plReader, &dlReader); |  | 
|  7033     if( rc!=SQLITE_OK ) break; |  | 
|  7034     if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ |  | 
|  7035       sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); |  | 
|  7036       dataBufferAppend(&dump, buf, strlen(buf)); |  | 
|  7037     }else{ |  | 
|  7038       int iColumn = plrColumn(&plReader); |  | 
|  7039  |  | 
|  7040       sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", |  | 
|  7041                        dlrDocid(&dlReader), iColumn); |  | 
|  7042       dataBufferAppend(&dump, buf, strlen(buf)); |  | 
|  7043  |  | 
|  7044       for( ; !plrAtEnd(&plReader); rc = plrStep(&plReader) ){ |  | 
|  7045         if( rc!=SQLITE_OK ) break; |  | 
|  7046         if( plrColumn(&plReader)!=iColumn ){ |  | 
|  7047           iColumn = plrColumn(&plReader); |  | 
|  7048           sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); |  | 
|  7049           assert( dump.nData>0 ); |  | 
|  7050           dump.nData--;                     /* Overwrite trailing space. */ |  | 
|  7051           assert( dump.pData[dump.nData]==' '); |  | 
|  7052           dataBufferAppend(&dump, buf, strlen(buf)); |  | 
|  7053         } |  | 
|  7054         if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ |  | 
|  7055           sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", |  | 
|  7056                            plrPosition(&plReader), |  | 
|  7057                            plrStartOffset(&plReader), plrEndOffset(&plReader)); |  | 
|  7058         }else if( DL_DEFAULT==DL_POSITIONS ){ |  | 
|  7059           sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); |  | 
|  7060         }else{ |  | 
|  7061           assert( NULL=="Unhandled DL_DEFAULT value"); |  | 
|  7062         } |  | 
|  7063         dataBufferAppend(&dump, buf, strlen(buf)); |  | 
|  7064       } |  | 
|  7065       plrDestroy(&plReader); |  | 
|  7066       if( rc!= SQLITE_OK ) break; |  | 
|  7067  |  | 
|  7068       assert( dump.nData>0 ); |  | 
|  7069       dump.nData--;                     /* Overwrite trailing space. */ |  | 
|  7070       assert( dump.pData[dump.nData]==' '); |  | 
|  7071       dataBufferAppend(&dump, "]] ", 3); |  | 
|  7072     } |  | 
|  7073   } |  | 
|  7074   dlrDestroy(&dlReader); |  | 
|  7075   if( rc!=SQLITE_OK ){ |  | 
|  7076     dataBufferDestroy(&dump); |  | 
|  7077     return rc; |  | 
|  7078   } |  | 
|  7079  |  | 
|  7080   assert( dump.nData>0 ); |  | 
|  7081   dump.nData--;                     /* Overwrite trailing space. */ |  | 
|  7082   assert( dump.pData[dump.nData]==' '); |  | 
|  7083   dump.pData[dump.nData] = '\0'; |  | 
|  7084   assert( dump.nData>0 ); |  | 
|  7085  |  | 
|  7086   /* Passes ownership of dump's buffer to pContext. */ |  | 
|  7087   sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); |  | 
|  7088   dump.pData = NULL; |  | 
|  7089   dump.nData = dump.nCapacity = 0; |  | 
|  7090   return SQLITE_OK; |  | 
|  7091 } |  | 
|  7092  |  | 
|  7093 /* Implements dump_doclist() for use in inspecting the fts3 index from |  | 
|  7094 ** tests.  TEXT result containing a string representation of the |  | 
|  7095 ** doclist for the indicated term.  dump_doclist(t, term, level, idx) |  | 
|  7096 ** dumps the doclist for term from the segment specified by level, idx |  | 
|  7097 ** (in %_segdir), while dump_doclist(t, term) dumps the logical |  | 
|  7098 ** doclist for the term across all segments.  The per-segment doclist |  | 
|  7099 ** can contain deletions, while the full-index doclist will not |  | 
|  7100 ** (deletions are omitted). |  | 
|  7101 ** |  | 
|  7102 ** Result formats differ with the setting of DL_DEFAULTS.  Examples: |  | 
|  7103 ** |  | 
|  7104 ** DL_DOCIDS: [1] [3] [7] |  | 
|  7105 ** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]] |  | 
|  7106 ** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]] |  | 
|  7107 ** |  | 
|  7108 ** In each case the number after the outer '[' is the docid.  In the |  | 
|  7109 ** latter two cases, the number before the inner '[' is the column |  | 
|  7110 ** associated with the values within.  For DL_POSITIONS the numbers |  | 
|  7111 ** within are the positions, for DL_POSITIONS_OFFSETS they are the |  | 
|  7112 ** position, the start offset, and the end offset. |  | 
|  7113 */ |  | 
|  7114 static void dumpDoclistFunc( |  | 
|  7115   sqlite3_context *pContext, |  | 
|  7116   int argc, sqlite3_value **argv |  | 
|  7117 ){ |  | 
|  7118   fulltext_cursor *pCursor; |  | 
|  7119   if( argc!=2 && argc!=4 ){ |  | 
|  7120     generateError(pContext, "dump_doclist", "incorrect arguments"); |  | 
|  7121   }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  7122             sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  7123     generateError(pContext, "dump_doclist", "illegal first argument"); |  | 
|  7124   }else if( sqlite3_value_text(argv[1])==NULL || |  | 
|  7125             sqlite3_value_text(argv[1])[0]=='\0' ){ |  | 
|  7126     generateError(pContext, "dump_doclist", "empty second argument"); |  | 
|  7127   }else{ |  | 
|  7128     const char *pTerm = (const char *)sqlite3_value_text(argv[1]); |  | 
|  7129     const int nTerm = strlen(pTerm); |  | 
|  7130     fulltext_vtab *v; |  | 
|  7131     int rc; |  | 
|  7132     DataBuffer doclist; |  | 
|  7133  |  | 
|  7134     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  7135     v = cursor_vtab(pCursor); |  | 
|  7136  |  | 
|  7137     dataBufferInit(&doclist, 0); |  | 
|  7138  |  | 
|  7139     /* termSelect() yields the same logical doclist that queries are |  | 
|  7140     ** run against. |  | 
|  7141     */ |  | 
|  7142     if( argc==2 ){ |  | 
|  7143       rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist); |  | 
|  7144     }else{ |  | 
|  7145       sqlite3_stmt *s = NULL; |  | 
|  7146  |  | 
|  7147       /* Get our specific segment's information. */ |  | 
|  7148       rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); |  | 
|  7149       if( rc==SQLITE_OK ){ |  | 
|  7150         rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2])); |  | 
|  7151         if( rc==SQLITE_OK ){ |  | 
|  7152           rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3])); |  | 
|  7153         } |  | 
|  7154       } |  | 
|  7155  |  | 
|  7156       if( rc==SQLITE_OK ){ |  | 
|  7157         rc = sqlite3_step(s); |  | 
|  7158  |  | 
|  7159         if( rc==SQLITE_DONE ){ |  | 
|  7160           dataBufferDestroy(&doclist); |  | 
|  7161           generateError(pContext, "dump_doclist", "segment not found"); |  | 
|  7162           return; |  | 
|  7163         } |  | 
|  7164  |  | 
|  7165         /* Found a segment, load it into doclist. */ |  | 
|  7166         if( rc==SQLITE_ROW ){ |  | 
|  7167           const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); |  | 
|  7168           const char *pData = sqlite3_column_blob(s, 2); |  | 
|  7169           const int nData = sqlite3_column_bytes(s, 2); |  | 
|  7170  |  | 
|  7171           /* loadSegment() is used by termSelect() to load each |  | 
|  7172           ** segment's data. |  | 
|  7173           */ |  | 
|  7174           rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0, |  | 
|  7175                            &doclist); |  | 
|  7176           if( rc==SQLITE_OK ){ |  | 
|  7177             rc = sqlite3_step(s); |  | 
|  7178  |  | 
|  7179             /* Should not have more than one matching segment. */ |  | 
|  7180             if( rc!=SQLITE_DONE ){ |  | 
|  7181               sqlite3_reset(s); |  | 
|  7182               dataBufferDestroy(&doclist); |  | 
|  7183               generateError(pContext, "dump_doclist", "invalid segdir"); |  | 
|  7184               return; |  | 
|  7185             } |  | 
|  7186             rc = SQLITE_OK; |  | 
|  7187           } |  | 
|  7188         } |  | 
|  7189       } |  | 
|  7190  |  | 
|  7191       sqlite3_reset(s); |  | 
|  7192     } |  | 
|  7193  |  | 
|  7194     if( rc==SQLITE_OK ){ |  | 
|  7195       if( doclist.nData>0 ){ |  | 
|  7196         createDoclistResult(pContext, doclist.pData, doclist.nData); |  | 
|  7197       }else{ |  | 
|  7198         /* TODO(shess): This can happen if the term is not present, or |  | 
|  7199         ** if all instances of the term have been deleted and this is |  | 
|  7200         ** an all-index dump.  It may be interesting to distinguish |  | 
|  7201         ** these cases. |  | 
|  7202         */ |  | 
|  7203         sqlite3_result_text(pContext, "", 0, SQLITE_STATIC); |  | 
|  7204       } |  | 
|  7205     }else if( rc==SQLITE_NOMEM ){ |  | 
|  7206       /* Handle out-of-memory cases specially because if they are |  | 
|  7207       ** generated in fts3 code they may not be reflected in the db |  | 
|  7208       ** handle. |  | 
|  7209       */ |  | 
|  7210       /* TODO(shess): Handle this more comprehensively. |  | 
|  7211       ** sqlite3ErrStr() has what I need, but is internal. |  | 
|  7212       */ |  | 
|  7213       generateError(pContext, "dump_doclist", "out of memory"); |  | 
|  7214     }else{ |  | 
|  7215       generateError(pContext, "dump_doclist", NULL); |  | 
|  7216     } |  | 
|  7217  |  | 
|  7218     dataBufferDestroy(&doclist); |  | 
|  7219   } |  | 
|  7220 } |  | 
|  7221 #endif |  | 
|  7222  |  3471  | 
|  7223 /* |  3472 /* | 
|  7224 ** This routine implements the xFindFunction method for the FTS3 |  3473 ** This routine implements the xFindFunction method for the FTS3 | 
|  7225 ** virtual table. |  3474 ** virtual table. | 
|  7226 */ |  3475 */ | 
|  7227 static int fulltextFindFunction( |  3476 static int fts3FindFunctionMethod( | 
|  7228   sqlite3_vtab *pVtab, |  3477   sqlite3_vtab *pVtab,            /* Virtual table handle */ | 
|  7229   int nArg, |  3478   int nArg,                       /* Number of SQL function arguments */ | 
|  7230   const char *zName, |  3479   const char *zName,              /* Name of SQL function */ | 
|  7231   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), |  3480   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ | 
|  7232   void **ppArg |  3481   void **ppArg                    /* Unused */ | 
|  7233 ){ |  3482 ){ | 
|  7234   if( strcmp(zName,"snippet")==0 ){ |  3483   struct Overloaded { | 
|  7235     *pxFunc = snippetFunc; |  3484     const char *zName; | 
|  7236     return 1; |  3485     void (*xFunc)(sqlite3_context*,int,sqlite3_value**); | 
|  7237   }else if( strcmp(zName,"offsets")==0 ){ |  3486   } aOverload[] = { | 
|  7238     *pxFunc = snippetOffsetsFunc; |  3487     { "snippet", fts3SnippetFunc }, | 
|  7239     return 1; |  3488     { "offsets", fts3OffsetsFunc }, | 
|  7240   }else if( strcmp(zName,"optimize")==0 ){ |  3489     { "optimize", fts3OptimizeFunc }, | 
|  7241     *pxFunc = optimizeFunc; |  3490     { "matchinfo", fts3MatchinfoFunc }, | 
|  7242     return 1; |  3491   }; | 
|  7243 #ifdef SQLITE_TEST |  3492   int i;                          /* Iterator variable */ | 
|  7244     /* NOTE(shess): These functions are present only for testing |  3493  | 
|  7245     ** purposes.  No particular effort is made to optimize their |  3494   UNUSED_PARAMETER(pVtab); | 
|  7246     ** execution or how they build their results. |  3495   UNUSED_PARAMETER(nArg); | 
|  7247     */ |  3496   UNUSED_PARAMETER(ppArg); | 
|  7248   }else if( strcmp(zName,"dump_terms")==0 ){ |  3497  | 
|  7249     /* fprintf(stderr, "Found dump_terms\n"); */ |  3498   for(i=0; i<SizeofArray(aOverload); i++){ | 
|  7250     *pxFunc = dumpTermsFunc; |  3499     if( strcmp(zName, aOverload[i].zName)==0 ){ | 
|  7251     return 1; |  3500       *pxFunc = aOverload[i].xFunc; | 
|  7252   }else if( strcmp(zName,"dump_doclist")==0 ){ |  3501       return 1; | 
|  7253     /* fprintf(stderr, "Found dump_doclist\n"); */ |  3502     } | 
|  7254     *pxFunc = dumpDoclistFunc; |  3503   } | 
|  7255     return 1; |  3504  | 
|  7256 #endif |  3505   /* No function of the specified name was found. Return 0. */ | 
|  7257   } |  | 
|  7258   return 0; |  3506   return 0; | 
|  7259 } |  3507 } | 
|  7260  |  3508  | 
|  7261 /* |  3509 /* | 
|  7262 ** Rename an fts3 table. |  3510 ** Implementation of FTS3 xRename method. Rename an fts3 table. | 
|  7263 */ |  3511 */ | 
|  7264 static int fulltextRename( |  3512 static int fts3RenameMethod( | 
|  7265   sqlite3_vtab *pVtab, |  3513   sqlite3_vtab *pVtab,            /* Virtual table handle */ | 
|  7266   const char *zName |  3514   const char *zName               /* New name of table */ | 
|  7267 ){ |  3515 ){ | 
|  7268   fulltext_vtab *p = (fulltext_vtab *)pVtab; |  3516   Fts3Table *p = (Fts3Table *)pVtab; | 
|  7269   int rc = SQLITE_NOMEM; |  3517   sqlite3 *db = p->db;            /* Database connection */ | 
|  7270   char *zSql = sqlite3_mprintf( |  3518   int rc;                         /* Return Code */ | 
|  7271     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';" |  3519  | 
|  7272     "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';" |  3520   rc = sqlite3Fts3PendingTermsFlush(p); | 
|  7273     "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';" |  3521   if( rc!=SQLITE_OK ){ | 
|  7274     , p->zDb, p->zName, zName  |  3522     return rc; | 
|  7275     , p->zDb, p->zName, zName  |  3523   } | 
|  7276     , p->zDb, p->zName, zName |  3524  | 
 |  3525   fts3DbExec(&rc, db, | 
 |  3526     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';", | 
 |  3527     p->zDb, p->zName, zName | 
|  7277   ); |  3528   ); | 
|  7278   if( zSql ){ |  3529   if( p->bHasDocsize ){ | 
|  7279     rc = sqlite3_exec(p->db, zSql, 0, 0, 0); |  3530     fts3DbExec(&rc, db, | 
|  7280     sqlite3_free(zSql); |  3531       "ALTER TABLE %Q.'%q_docsize'  RENAME TO '%q_docsize';", | 
|  7281   } |  3532       p->zDb, p->zName, zName | 
 |  3533     ); | 
 |  3534   } | 
 |  3535   if( p->bHasStat ){ | 
 |  3536     fts3DbExec(&rc, db, | 
 |  3537       "ALTER TABLE %Q.'%q_stat'  RENAME TO '%q_stat';", | 
 |  3538       p->zDb, p->zName, zName | 
 |  3539     ); | 
 |  3540   } | 
 |  3541   fts3DbExec(&rc, db, | 
 |  3542     "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';", | 
 |  3543     p->zDb, p->zName, zName | 
 |  3544   ); | 
 |  3545   fts3DbExec(&rc, db, | 
 |  3546     "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';", | 
 |  3547     p->zDb, p->zName, zName | 
 |  3548   ); | 
|  7282   return rc; |  3549   return rc; | 
|  7283 } |  3550 } | 
|  7284  |  3551  | 
|  7285 static const sqlite3_module fts3Module = { |  3552 static const sqlite3_module fts3Module = { | 
|  7286   /* iVersion      */ 0, |  3553   /* iVersion      */ 0, | 
|  7287   /* xCreate       */ fulltextCreate, |  3554   /* xCreate       */ fts3CreateMethod, | 
|  7288   /* xConnect      */ fulltextConnect, |  3555   /* xConnect      */ fts3ConnectMethod, | 
|  7289   /* xBestIndex    */ fulltextBestIndex, |  3556   /* xBestIndex    */ fts3BestIndexMethod, | 
|  7290   /* xDisconnect   */ fulltextDisconnect, |  3557   /* xDisconnect   */ fts3DisconnectMethod, | 
|  7291   /* xDestroy      */ fulltextDestroy, |  3558   /* xDestroy      */ fts3DestroyMethod, | 
|  7292   /* xOpen         */ fulltextOpen, |  3559   /* xOpen         */ fts3OpenMethod, | 
|  7293   /* xClose        */ fulltextClose, |  3560   /* xClose        */ fts3CloseMethod, | 
|  7294   /* xFilter       */ fulltextFilter, |  3561   /* xFilter       */ fts3FilterMethod, | 
|  7295   /* xNext         */ fulltextNext, |  3562   /* xNext         */ fts3NextMethod, | 
|  7296   /* xEof          */ fulltextEof, |  3563   /* xEof          */ fts3EofMethod, | 
|  7297   /* xColumn       */ fulltextColumn, |  3564   /* xColumn       */ fts3ColumnMethod, | 
|  7298   /* xRowid        */ fulltextRowid, |  3565   /* xRowid        */ fts3RowidMethod, | 
|  7299   /* xUpdate       */ fulltextUpdate, |  3566   /* xUpdate       */ fts3UpdateMethod, | 
|  7300   /* xBegin        */ fulltextBegin, |  3567   /* xBegin        */ fts3BeginMethod, | 
|  7301   /* xSync         */ fulltextSync, |  3568   /* xSync         */ fts3SyncMethod, | 
|  7302   /* xCommit       */ fulltextCommit, |  3569   /* xCommit       */ fts3CommitMethod, | 
|  7303   /* xRollback     */ fulltextRollback, |  3570   /* xRollback     */ fts3RollbackMethod, | 
|  7304   /* xFindFunction */ fulltextFindFunction, |  3571   /* xFindFunction */ fts3FindFunctionMethod, | 
|  7305   /* xRename */       fulltextRename, |  3572   /* xRename */       fts3RenameMethod, | 
|  7306 }; |  3573 }; | 
|  7307  |  3574  | 
 |  3575 /* | 
 |  3576 ** This function is registered as the module destructor (called when an | 
 |  3577 ** FTS3 enabled database connection is closed). It frees the memory | 
 |  3578 ** allocated for the tokenizer hash table. | 
 |  3579 */ | 
|  7308 static void hashDestroy(void *p){ |  3580 static void hashDestroy(void *p){ | 
|  7309   fts3Hash *pHash = (fts3Hash *)p; |  3581   Fts3Hash *pHash = (Fts3Hash *)p; | 
|  7310   sqlite3Fts3HashClear(pHash); |  3582   sqlite3Fts3HashClear(pHash); | 
|  7311   sqlite3_free(pHash); |  3583   sqlite3_free(pHash); | 
|  7312 } |  3584 } | 
|  7313  |  3585  | 
|  7314 /* |  3586 /* | 
|  7315 ** The fts3 built-in tokenizers - "simple" and "porter" - are implemented |  3587 ** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are  | 
|  7316 ** in files fts3_tokenizer1.c and fts3_porter.c respectively. The following |  3588 ** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c | 
|  7317 ** two forward declarations are for functions declared in these files |  3589 ** respectively. The following three forward declarations are for functions | 
|  7318 ** used to retrieve the respective implementations. |  3590 ** declared in these files used to retrieve the respective implementations. | 
|  7319 ** |  3591 ** | 
|  7320 ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed |  3592 ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed | 
|  7321 ** to by the argument to point a the "simple" tokenizer implementation. |  3593 ** to by the argument to point to the "simple" tokenizer implementation. | 
|  7322 ** Function ...PorterTokenizerModule() sets *pModule to point to the |  3594 ** And so on. | 
|  7323 ** porter tokenizer/stemmer implementation. |  | 
|  7324 */ |  3595 */ | 
|  7325 void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); |  3596 void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 
|  7326 void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); |  3597 void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 
 |  3598 #ifdef SQLITE_ENABLE_ICU | 
|  7327 void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); |  3599 void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 
|  7328  |  3600 #endif | 
|  7329 int sqlite3Fts3InitHashTable(sqlite3 *, fts3Hash *, const char *); |  | 
|  7330  |  3601  | 
|  7331 /* |  3602 /* | 
|  7332 ** Initialise the fts3 extension. If this extension is built as part |  3603 ** Initialise the fts3 extension. If this extension is built as part | 
|  7333 ** of the sqlite library, then this function is called directly by |  3604 ** of the sqlite library, then this function is called directly by | 
|  7334 ** SQLite. If fts3 is built as a dynamically loadable extension, this |  3605 ** SQLite. If fts3 is built as a dynamically loadable extension, this | 
|  7335 ** function is called by the sqlite3_extension_init() entry point. |  3606 ** function is called by the sqlite3_extension_init() entry point. | 
|  7336 */ |  3607 */ | 
|  7337 int sqlite3Fts3Init(sqlite3 *db){ |  3608 int sqlite3Fts3Init(sqlite3 *db){ | 
|  7338   int rc = SQLITE_OK; |  3609   int rc = SQLITE_OK; | 
|  7339   fts3Hash *pHash = 0; |  3610   Fts3Hash *pHash = 0; | 
|  7340   const sqlite3_tokenizer_module *pSimple = 0; |  3611   const sqlite3_tokenizer_module *pSimple = 0; | 
|  7341   const sqlite3_tokenizer_module *pPorter = 0; |  3612   const sqlite3_tokenizer_module *pPorter = 0; | 
 |  3613  | 
 |  3614 #ifdef SQLITE_ENABLE_ICU | 
|  7342   const sqlite3_tokenizer_module *pIcu = 0; |  3615   const sqlite3_tokenizer_module *pIcu = 0; | 
 |  3616   sqlite3Fts3IcuTokenizerModule(&pIcu); | 
 |  3617 #endif | 
 |  3618  | 
 |  3619   rc = sqlite3Fts3InitAux(db); | 
 |  3620   if( rc!=SQLITE_OK ) return rc; | 
|  7343  |  3621  | 
|  7344   sqlite3Fts3SimpleTokenizerModule(&pSimple); |  3622   sqlite3Fts3SimpleTokenizerModule(&pSimple); | 
|  7345   sqlite3Fts3PorterTokenizerModule(&pPorter); |  3623   sqlite3Fts3PorterTokenizerModule(&pPorter); | 
|  7346 #ifdef SQLITE_ENABLE_ICU |  | 
|  7347   sqlite3Fts3IcuTokenizerModule(&pIcu); |  | 
|  7348 #endif |  | 
|  7349  |  3624  | 
|  7350   /* Allocate and initialise the hash-table used to store tokenizers. */ |  3625   /* Allocate and initialise the hash-table used to store tokenizers. */ | 
|  7351   pHash = sqlite3_malloc(sizeof(fts3Hash)); |  3626   pHash = sqlite3_malloc(sizeof(Fts3Hash)); | 
|  7352   if( !pHash ){ |  3627   if( !pHash ){ | 
|  7353     rc = SQLITE_NOMEM; |  3628     rc = SQLITE_NOMEM; | 
|  7354   }else{ |  3629   }else{ | 
|  7355     sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |  3630     sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); | 
|  7356   } |  3631   } | 
|  7357  |  3632  | 
|  7358   /* Load the built-in tokenizers into the hash table */ |  3633   /* Load the built-in tokenizers into the hash table */ | 
|  7359   if( rc==SQLITE_OK ){ |  3634   if( rc==SQLITE_OK ){ | 
|  7360     if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) |  3635     if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) | 
|  7361      || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)  |  3636      || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)  | 
 |  3637 #ifdef SQLITE_ENABLE_ICU | 
|  7362      || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) |  3638      || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) | 
 |  3639 #endif | 
|  7363     ){ |  3640     ){ | 
|  7364       rc = SQLITE_NOMEM; |  3641       rc = SQLITE_NOMEM; | 
|  7365     } |  3642     } | 
|  7366   } |  3643   } | 
|  7367  |  3644  | 
|  7368 #ifdef SQLITE_TEST |  3645 #ifdef SQLITE_TEST | 
|  7369   sqlite3Fts3ExprInitTestInterface(db); |  3646   if( rc==SQLITE_OK ){ | 
 |  3647     rc = sqlite3Fts3ExprInitTestInterface(db); | 
 |  3648   } | 
|  7370 #endif |  3649 #endif | 
|  7371  |  3650  | 
|  7372   /* Create the virtual table wrapper around the hash-table and overload  |  3651   /* Create the virtual table wrapper around the hash-table and overload  | 
|  7373   ** the two scalar functions. If this is successful, register the |  3652   ** the two scalar functions. If this is successful, register the | 
|  7374   ** module with sqlite. |  3653   ** module with sqlite. | 
|  7375   */ |  3654   */ | 
|  7376   if( SQLITE_OK==rc  |  3655   if( SQLITE_OK==rc  | 
|  7377 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST |  3656 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST | 
|  7378       /* fts3_tokenizer() disabled for security reasons. */ |  3657       /* fts3_tokenizer() disabled for security reasons. */ | 
|  7379 #else |  3658 #else | 
|  7380    && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) |  3659    && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) | 
|  7381 #endif |  3660 #endif | 
|  7382    && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) |  3661    && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) | 
|  7383    && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) |  3662    && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) | 
|  7384    && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) |  3663    && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1)) | 
|  7385 #ifdef SQLITE_TEST |  3664    && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2)) | 
|  7386    && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) |  3665    && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) | 
|  7387    && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) |  | 
|  7388 #endif |  | 
|  7389   ){ |  3666   ){ | 
|  7390     return sqlite3_create_module_v2( |  3667     rc = sqlite3_create_module_v2( | 
|  7391         db, "fts3", &fts3Module, (void *)pHash, hashDestroy |  3668         db, "fts3", &fts3Module, (void *)pHash, hashDestroy | 
|  7392     ); |  3669     ); | 
 |  3670 #if CHROMIUM_FTS3_CHANGES && !SQLITE_TEST | 
 |  3671     /* Disable fts4 pending review. */ | 
 |  3672 #else | 
 |  3673     if( rc==SQLITE_OK ){ | 
 |  3674       rc = sqlite3_create_module_v2( | 
 |  3675           db, "fts4", &fts3Module, (void *)pHash, 0 | 
 |  3676       ); | 
 |  3677     } | 
 |  3678 #endif | 
 |  3679     return rc; | 
|  7393   } |  3680   } | 
|  7394  |  3681  | 
|  7395   /* An error has occurred. Delete the hash table and return the error code. */ |  3682   /* An error has occurred. Delete the hash table and return the error code. */ | 
|  7396   assert( rc!=SQLITE_OK ); |  3683   assert( rc!=SQLITE_OK ); | 
|  7397   if( pHash ){ |  3684   if( pHash ){ | 
|  7398     sqlite3Fts3HashClear(pHash); |  3685     sqlite3Fts3HashClear(pHash); | 
|  7399     sqlite3_free(pHash); |  3686     sqlite3_free(pHash); | 
|  7400   } |  3687   } | 
|  7401   return rc; |  3688   return rc; | 
|  7402 } |  3689 } | 
|  7403  |  3690  | 
|  7404 #if !SQLITE_CORE |  3691 #if !SQLITE_CORE | 
|  7405 int sqlite3_extension_init( |  3692 int sqlite3_extension_init( | 
|  7406   sqlite3 *db,  |  3693   sqlite3 *db,  | 
|  7407   char **pzErrMsg, |  3694   char **pzErrMsg, | 
|  7408   const sqlite3_api_routines *pApi |  3695   const sqlite3_api_routines *pApi | 
|  7409 ){ |  3696 ){ | 
|  7410   SQLITE_EXTENSION_INIT2(pApi) |  3697   SQLITE_EXTENSION_INIT2(pApi) | 
|  7411   return sqlite3Fts3Init(db); |  3698   return sqlite3Fts3Init(db); | 
|  7412 } |  3699 } | 
|  7413 #endif |  3700 #endif | 
|  7414  |  3701  | 
|  7415 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |  3702 #endif | 
| OLD | NEW |