| OLD | NEW | 
 | (Empty) | 
|     1 /* fts1 has a design flaw which can lead to database corruption (see |  | 
|     2 ** below).  It is recommended not to use it any longer, instead use |  | 
|     3 ** fts3 (or higher).  If you believe that your use of fts1 is safe, |  | 
|     4 ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS. |  | 
|     5 */ |  | 
|     6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \ |  | 
|     7         && !defined(SQLITE_ENABLE_BROKEN_FTS1) |  | 
|     8 #error fts1 has a design flaw and has been deprecated. |  | 
|     9 #endif |  | 
|    10 /* The flaw is that fts1 uses the content table's unaliased rowid as |  | 
|    11 ** the unique docid.  fts1 embeds the rowid in the index it builds, |  | 
|    12 ** and expects the rowid to not change.  The SQLite VACUUM operation |  | 
|    13 ** will renumber such rowids, thereby breaking fts1.  If you are using |  | 
|    14 ** fts1 in a system which has disabled VACUUM, then you can continue |  | 
|    15 ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable |  | 
|    16 ** VACUUM, though systems using auto_vacuum are unlikely to invoke |  | 
|    17 ** VACUUM. |  | 
|    18 ** |  | 
|    19 ** fts1 should be safe even across VACUUM if you only insert documents |  | 
|    20 ** and never delete. |  | 
|    21 */ |  | 
|    22  |  | 
|    23 /* The author disclaims copyright to this source code. |  | 
|    24  * |  | 
|    25  * This is an SQLite module implementing full-text search. |  | 
|    26  */ |  | 
|    27  |  | 
|    28 /* |  | 
|    29 ** The code in this file is only compiled if: |  | 
|    30 ** |  | 
|    31 **     * The FTS1 module is being built as an extension |  | 
|    32 **       (in which case SQLITE_CORE is not defined), or |  | 
|    33 ** |  | 
|    34 **     * The FTS1 module is being built into the core of |  | 
|    35 **       SQLite (in which case SQLITE_ENABLE_FTS1 is defined). |  | 
|    36 */ |  | 
|    37 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) |  | 
|    38  |  | 
|    39 #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE) |  | 
|    40 # define SQLITE_CORE 1 |  | 
|    41 #endif |  | 
|    42  |  | 
|    43 #include <assert.h> |  | 
|    44 #include <stdlib.h> |  | 
|    45 #include <stdio.h> |  | 
|    46 #include <string.h> |  | 
|    47 #include <ctype.h> |  | 
|    48  |  | 
|    49 #include "fts1.h" |  | 
|    50 #include "fts1_hash.h" |  | 
|    51 #include "fts1_tokenizer.h" |  | 
|    52 #include "sqlite3.h" |  | 
|    53 #include "sqlite3ext.h" |  | 
|    54 SQLITE_EXTENSION_INIT1 |  | 
|    55  |  | 
|    56  |  | 
|    57 #if 0 |  | 
|    58 # define TRACE(A)  printf A; fflush(stdout) |  | 
|    59 #else |  | 
|    60 # define TRACE(A) |  | 
|    61 #endif |  | 
|    62  |  | 
|    63 /* utility functions */ |  | 
|    64  |  | 
|    65 typedef struct StringBuffer { |  | 
|    66   int len;      /* length, not including null terminator */ |  | 
|    67   int alloced;  /* Space allocated for s[] */  |  | 
|    68   char *s;      /* Content of the string */ |  | 
|    69 } StringBuffer; |  | 
|    70  |  | 
|    71 static void initStringBuffer(StringBuffer *sb){ |  | 
|    72   sb->len = 0; |  | 
|    73   sb->alloced = 100; |  | 
|    74   sb->s = malloc(100); |  | 
|    75   sb->s[0] = '\0'; |  | 
|    76 } |  | 
|    77  |  | 
|    78 static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ |  | 
|    79   if( sb->len + nFrom >= sb->alloced ){ |  | 
|    80     sb->alloced = sb->len + nFrom + 100; |  | 
|    81     sb->s = realloc(sb->s, sb->alloced+1); |  | 
|    82     if( sb->s==0 ){ |  | 
|    83       initStringBuffer(sb); |  | 
|    84       return; |  | 
|    85     } |  | 
|    86   } |  | 
|    87   memcpy(sb->s + sb->len, zFrom, nFrom); |  | 
|    88   sb->len += nFrom; |  | 
|    89   sb->s[sb->len] = 0; |  | 
|    90 } |  | 
|    91 static void append(StringBuffer *sb, const char *zFrom){ |  | 
|    92   nappend(sb, zFrom, strlen(zFrom)); |  | 
|    93 } |  | 
|    94  |  | 
|    95 /* We encode variable-length integers in little-endian order using seven bits |  | 
|    96  * per byte as follows: |  | 
|    97 ** |  | 
|    98 ** KEY: |  | 
|    99 **         A = 0xxxxxxx    7 bits of data and one flag bit |  | 
|   100 **         B = 1xxxxxxx    7 bits of data and one flag bit |  | 
|   101 ** |  | 
|   102 **  7 bits - A |  | 
|   103 ** 14 bits - BA |  | 
|   104 ** 21 bits - BBA |  | 
|   105 ** and so on. |  | 
|   106 */ |  | 
|   107  |  | 
|   108 /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ |  | 
|   109 #define VARINT_MAX 10 |  | 
|   110  |  | 
|   111 /* Write a 64-bit variable-length integer to memory starting at p[0]. |  | 
|   112  * The length of data written will be between 1 and VARINT_MAX bytes. |  | 
|   113  * The number of bytes written is returned. */ |  | 
|   114 static int putVarint(char *p, sqlite_int64 v){ |  | 
|   115   unsigned char *q = (unsigned char *) p; |  | 
|   116   sqlite_uint64 vu = v; |  | 
|   117   do{ |  | 
|   118     *q++ = (unsigned char) ((vu & 0x7f) | 0x80); |  | 
|   119     vu >>= 7; |  | 
|   120   }while( vu!=0 ); |  | 
|   121   q[-1] &= 0x7f;  /* turn off high bit in final byte */ |  | 
|   122   assert( q - (unsigned char *)p <= VARINT_MAX ); |  | 
|   123   return (int) (q - (unsigned char *)p); |  | 
|   124 } |  | 
|   125  |  | 
|   126 /* Read a 64-bit variable-length integer from memory starting at p[0]. |  | 
|   127  * Return the number of bytes read, or 0 on error. |  | 
|   128  * The value is stored in *v. */ |  | 
|   129 static int getVarint(const char *p, sqlite_int64 *v){ |  | 
|   130   const unsigned char *q = (const unsigned char *) p; |  | 
|   131   sqlite_uint64 x = 0, y = 1; |  | 
|   132   while( (*q & 0x80) == 0x80 ){ |  | 
|   133     x += y * (*q++ & 0x7f); |  | 
|   134     y <<= 7; |  | 
|   135     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */ |  | 
|   136       assert( 0 ); |  | 
|   137       return 0; |  | 
|   138     } |  | 
|   139   } |  | 
|   140   x += y * (*q++); |  | 
|   141   *v = (sqlite_int64) x; |  | 
|   142   return (int) (q - (unsigned char *)p); |  | 
|   143 } |  | 
|   144  |  | 
|   145 static int getVarint32(const char *p, int *pi){ |  | 
|   146  sqlite_int64 i; |  | 
|   147  int ret = getVarint(p, &i); |  | 
|   148  *pi = (int) i; |  | 
|   149  assert( *pi==i ); |  | 
|   150  return ret; |  | 
|   151 } |  | 
|   152  |  | 
|   153 /*** Document lists *** |  | 
|   154  * |  | 
|   155  * A document list holds a sorted list of varint-encoded document IDs. |  | 
|   156  * |  | 
|   157  * A doclist with type DL_POSITIONS_OFFSETS is stored like this: |  | 
|   158  * |  | 
|   159  * array { |  | 
|   160  *   varint docid; |  | 
|   161  *   array { |  | 
|   162  *     varint position;     (delta from previous position plus POS_BASE) |  | 
|   163  *     varint startOffset;  (delta from previous startOffset) |  | 
|   164  *     varint endOffset;    (delta from startOffset) |  | 
|   165  *   } |  | 
|   166  * } |  | 
|   167  * |  | 
|   168  * Here, array { X } means zero or more occurrences of X, adjacent in memory. |  | 
|   169  * |  | 
|   170  * A position list may hold positions for text in multiple columns.  A position |  | 
|   171  * POS_COLUMN is followed by a varint containing the index of the column for |  | 
|   172  * following positions in the list.  Any positions appearing before any |  | 
|   173  * occurrences of POS_COLUMN are for column 0. |  | 
|   174  * |  | 
|   175  * A doclist with type DL_POSITIONS is like the above, but holds only docids |  | 
|   176  * and positions without offset information. |  | 
|   177  * |  | 
|   178  * A doclist with type DL_DOCIDS is like the above, but holds only docids |  | 
|   179  * without positions or offset information. |  | 
|   180  * |  | 
|   181  * On disk, every document list has positions and offsets, so we don't bother |  | 
|   182  * to serialize a doclist's type. |  | 
|   183  *  |  | 
|   184  * We don't yet delta-encode document IDs; doing so will probably be a |  | 
|   185  * modest win. |  | 
|   186  * |  | 
|   187  * NOTE(shess) I've thought of a slightly (1%) better offset encoding. |  | 
|   188  * After the first offset, estimate the next offset by using the |  | 
|   189  * current token position and the previous token position and offset, |  | 
|   190  * offset to handle some variance.  So the estimate would be |  | 
|   191  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded |  | 
|   192  * as normal.  Offsets more than 64 chars from the estimate are |  | 
|   193  * encoded as the delta to the previous start offset + 128.  An |  | 
|   194  * additional tiny increment can be gained by using the end offset of |  | 
|   195  * the previous token to make the estimate a tiny bit more precise. |  | 
|   196 */ |  | 
|   197  |  | 
|   198 /* It is not safe to call isspace(), tolower(), or isalnum() on |  | 
|   199 ** hi-bit-set characters.  This is the same solution used in the |  | 
|   200 ** tokenizer. |  | 
|   201 */ |  | 
|   202 /* TODO(shess) The snippet-generation code should be using the |  | 
|   203 ** tokenizer-generated tokens rather than doing its own local |  | 
|   204 ** tokenization. |  | 
|   205 */ |  | 
|   206 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ |  | 
|   207 static int safe_isspace(char c){ |  | 
|   208   return (c&0x80)==0 ? isspace(c) : 0; |  | 
|   209 } |  | 
|   210 static int safe_tolower(char c){ |  | 
|   211   return (c>='A' && c<='Z') ? (c-'A'+'a') : c; |  | 
|   212 } |  | 
|   213 static int safe_isalnum(char c){ |  | 
|   214   return (c&0x80)==0 ? isalnum(c) : 0; |  | 
|   215 } |  | 
|   216  |  | 
|   217 typedef enum DocListType { |  | 
|   218   DL_DOCIDS,              /* docids only */ |  | 
|   219   DL_POSITIONS,           /* docids + positions */ |  | 
|   220   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */ |  | 
|   221 } DocListType; |  | 
|   222  |  | 
|   223 /* |  | 
|   224 ** By default, only positions and not offsets are stored in the doclists. |  | 
|   225 ** To change this so that offsets are stored too, compile with |  | 
|   226 ** |  | 
|   227 **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS |  | 
|   228 ** |  | 
|   229 */ |  | 
|   230 #ifndef DL_DEFAULT |  | 
|   231 # define DL_DEFAULT DL_POSITIONS |  | 
|   232 #endif |  | 
|   233  |  | 
|   234 typedef struct DocList { |  | 
|   235   char *pData; |  | 
|   236   int nData; |  | 
|   237   DocListType iType; |  | 
|   238   int iLastColumn;    /* the last column written */ |  | 
|   239   int iLastPos;       /* the last position written */ |  | 
|   240   int iLastOffset;    /* the last start offset written */ |  | 
|   241 } DocList; |  | 
|   242  |  | 
|   243 enum { |  | 
|   244   POS_END = 0,        /* end of this position list */ |  | 
|   245   POS_COLUMN,         /* followed by new column number */ |  | 
|   246   POS_BASE |  | 
|   247 }; |  | 
|   248  |  | 
|   249 /* Initialize a new DocList to hold the given data. */ |  | 
|   250 static void docListInit(DocList *d, DocListType iType, |  | 
|   251                         const char *pData, int nData){ |  | 
|   252   d->nData = nData; |  | 
|   253   if( nData>0 ){ |  | 
|   254     d->pData = malloc(nData); |  | 
|   255     memcpy(d->pData, pData, nData); |  | 
|   256   } else { |  | 
|   257     d->pData = NULL; |  | 
|   258   } |  | 
|   259   d->iType = iType; |  | 
|   260   d->iLastColumn = 0; |  | 
|   261   d->iLastPos = d->iLastOffset = 0; |  | 
|   262 } |  | 
|   263  |  | 
|   264 /* Create a new dynamically-allocated DocList. */ |  | 
|   265 static DocList *docListNew(DocListType iType){ |  | 
|   266   DocList *d = (DocList *) malloc(sizeof(DocList)); |  | 
|   267   docListInit(d, iType, 0, 0); |  | 
|   268   return d; |  | 
|   269 } |  | 
|   270  |  | 
|   271 static void docListDestroy(DocList *d){ |  | 
|   272   free(d->pData); |  | 
|   273 #ifndef NDEBUG |  | 
|   274   memset(d, 0x55, sizeof(*d)); |  | 
|   275 #endif |  | 
|   276 } |  | 
|   277  |  | 
|   278 static void docListDelete(DocList *d){ |  | 
|   279   docListDestroy(d); |  | 
|   280   free(d); |  | 
|   281 } |  | 
|   282  |  | 
|   283 static char *docListEnd(DocList *d){ |  | 
|   284   return d->pData + d->nData; |  | 
|   285 } |  | 
|   286  |  | 
|   287 /* Append a varint to a DocList's data. */ |  | 
|   288 static void appendVarint(DocList *d, sqlite_int64 i){ |  | 
|   289   char c[VARINT_MAX]; |  | 
|   290   int n = putVarint(c, i); |  | 
|   291   d->pData = realloc(d->pData, d->nData + n); |  | 
|   292   memcpy(d->pData + d->nData, c, n); |  | 
|   293   d->nData += n; |  | 
|   294 } |  | 
|   295  |  | 
|   296 static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ |  | 
|   297   appendVarint(d, iDocid); |  | 
|   298   if( d->iType>=DL_POSITIONS ){ |  | 
|   299     appendVarint(d, POS_END);  /* initially empty position list */ |  | 
|   300     d->iLastColumn = 0; |  | 
|   301     d->iLastPos = d->iLastOffset = 0; |  | 
|   302   } |  | 
|   303 } |  | 
|   304  |  | 
|   305 /* helper function for docListAddPos and docListAddPosOffset */ |  | 
|   306 static void addPos(DocList *d, int iColumn, int iPos){ |  | 
|   307   assert( d->nData>0 ); |  | 
|   308   --d->nData;  /* remove previous terminator */ |  | 
|   309   if( iColumn!=d->iLastColumn ){ |  | 
|   310     assert( iColumn>d->iLastColumn ); |  | 
|   311     appendVarint(d, POS_COLUMN); |  | 
|   312     appendVarint(d, iColumn); |  | 
|   313     d->iLastColumn = iColumn; |  | 
|   314     d->iLastPos = d->iLastOffset = 0; |  | 
|   315   } |  | 
|   316   assert( iPos>=d->iLastPos ); |  | 
|   317   appendVarint(d, iPos-d->iLastPos+POS_BASE); |  | 
|   318   d->iLastPos = iPos; |  | 
|   319 } |  | 
|   320  |  | 
|   321 /* Add a position to the last position list in a doclist. */ |  | 
|   322 static void docListAddPos(DocList *d, int iColumn, int iPos){ |  | 
|   323   assert( d->iType==DL_POSITIONS ); |  | 
|   324   addPos(d, iColumn, iPos); |  | 
|   325   appendVarint(d, POS_END);  /* add new terminator */ |  | 
|   326 } |  | 
|   327  |  | 
|   328 /* |  | 
|   329 ** Add a position and starting and ending offsets to a doclist. |  | 
|   330 ** |  | 
|   331 ** If the doclist is setup to handle only positions, then insert |  | 
|   332 ** the position only and ignore the offsets. |  | 
|   333 */ |  | 
|   334 static void docListAddPosOffset( |  | 
|   335   DocList *d,             /* Doclist under construction */ |  | 
|   336   int iColumn,            /* Column the inserted term is part of */ |  | 
|   337   int iPos,               /* Position of the inserted term */ |  | 
|   338   int iStartOffset,       /* Starting offset of inserted term */ |  | 
|   339   int iEndOffset          /* Ending offset of inserted term */ |  | 
|   340 ){ |  | 
|   341   assert( d->iType>=DL_POSITIONS ); |  | 
|   342   addPos(d, iColumn, iPos); |  | 
|   343   if( d->iType==DL_POSITIONS_OFFSETS ){ |  | 
|   344     assert( iStartOffset>=d->iLastOffset ); |  | 
|   345     appendVarint(d, iStartOffset-d->iLastOffset); |  | 
|   346     d->iLastOffset = iStartOffset; |  | 
|   347     assert( iEndOffset>=iStartOffset ); |  | 
|   348     appendVarint(d, iEndOffset-iStartOffset); |  | 
|   349   } |  | 
|   350   appendVarint(d, POS_END);  /* add new terminator */ |  | 
|   351 } |  | 
|   352  |  | 
|   353 /* |  | 
|   354 ** A DocListReader object is a cursor into a doclist.  Initialize |  | 
|   355 ** the cursor to the beginning of the doclist by calling readerInit(). |  | 
|   356 ** Then use routines |  | 
|   357 ** |  | 
|   358 **      peekDocid() |  | 
|   359 **      readDocid() |  | 
|   360 **      readPosition() |  | 
|   361 **      skipPositionList() |  | 
|   362 **      and so forth... |  | 
|   363 ** |  | 
|   364 ** to read information out of the doclist.  When we reach the end |  | 
|   365 ** of the doclist, atEnd() returns TRUE. |  | 
|   366 */ |  | 
|   367 typedef struct DocListReader { |  | 
|   368   DocList *pDoclist;  /* The document list we are stepping through */ |  | 
|   369   char *p;            /* Pointer to next unread byte in the doclist */ |  | 
|   370   int iLastColumn; |  | 
|   371   int iLastPos;  /* the last position read, or -1 when not in a position list */ |  | 
|   372 } DocListReader; |  | 
|   373  |  | 
|   374 /* |  | 
|   375 ** Initialize the DocListReader r to point to the beginning of pDoclist. |  | 
|   376 */ |  | 
|   377 static void readerInit(DocListReader *r, DocList *pDoclist){ |  | 
|   378   r->pDoclist = pDoclist; |  | 
|   379   if( pDoclist!=NULL ){ |  | 
|   380     r->p = pDoclist->pData; |  | 
|   381   } |  | 
|   382   r->iLastColumn = -1; |  | 
|   383   r->iLastPos = -1; |  | 
|   384 } |  | 
|   385  |  | 
|   386 /* |  | 
|   387 ** Return TRUE if we have reached then end of pReader and there is |  | 
|   388 ** nothing else left to read. |  | 
|   389 */ |  | 
|   390 static int atEnd(DocListReader *pReader){ |  | 
|   391   return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist)); |  | 
|   392 } |  | 
|   393  |  | 
|   394 /* Peek at the next docid without advancing the read pointer.  |  | 
|   395 */ |  | 
|   396 static sqlite_int64 peekDocid(DocListReader *pReader){ |  | 
|   397   sqlite_int64 ret; |  | 
|   398   assert( !atEnd(pReader) ); |  | 
|   399   assert( pReader->iLastPos==-1 ); |  | 
|   400   getVarint(pReader->p, &ret); |  | 
|   401   return ret; |  | 
|   402 } |  | 
|   403  |  | 
|   404 /* Read the next docid.   See also nextDocid(). |  | 
|   405 */ |  | 
|   406 static sqlite_int64 readDocid(DocListReader *pReader){ |  | 
|   407   sqlite_int64 ret; |  | 
|   408   assert( !atEnd(pReader) ); |  | 
|   409   assert( pReader->iLastPos==-1 ); |  | 
|   410   pReader->p += getVarint(pReader->p, &ret); |  | 
|   411   if( pReader->pDoclist->iType>=DL_POSITIONS ){ |  | 
|   412     pReader->iLastColumn = 0; |  | 
|   413     pReader->iLastPos = 0; |  | 
|   414   } |  | 
|   415   return ret; |  | 
|   416 } |  | 
|   417  |  | 
|   418 /* Read the next position and column index from a position list. |  | 
|   419  * Returns the position, or -1 at the end of the list. */ |  | 
|   420 static int readPosition(DocListReader *pReader, int *iColumn){ |  | 
|   421   int i; |  | 
|   422   int iType = pReader->pDoclist->iType; |  | 
|   423  |  | 
|   424   if( pReader->iLastPos==-1 ){ |  | 
|   425     return -1; |  | 
|   426   } |  | 
|   427   assert( !atEnd(pReader) ); |  | 
|   428  |  | 
|   429   if( iType<DL_POSITIONS ){ |  | 
|   430     return -1; |  | 
|   431   } |  | 
|   432   pReader->p += getVarint32(pReader->p, &i); |  | 
|   433   if( i==POS_END ){ |  | 
|   434     pReader->iLastColumn = pReader->iLastPos = -1; |  | 
|   435     *iColumn = -1; |  | 
|   436     return -1; |  | 
|   437   } |  | 
|   438   if( i==POS_COLUMN ){ |  | 
|   439     pReader->p += getVarint32(pReader->p, &pReader->iLastColumn); |  | 
|   440     pReader->iLastPos = 0; |  | 
|   441     pReader->p += getVarint32(pReader->p, &i); |  | 
|   442     assert( i>=POS_BASE ); |  | 
|   443   } |  | 
|   444   pReader->iLastPos += ((int) i)-POS_BASE; |  | 
|   445   if( iType>=DL_POSITIONS_OFFSETS ){ |  | 
|   446     /* Skip over offsets, ignoring them for now. */ |  | 
|   447     int iStart, iEnd; |  | 
|   448     pReader->p += getVarint32(pReader->p, &iStart); |  | 
|   449     pReader->p += getVarint32(pReader->p, &iEnd); |  | 
|   450   } |  | 
|   451   *iColumn = pReader->iLastColumn; |  | 
|   452   return pReader->iLastPos; |  | 
|   453 } |  | 
|   454  |  | 
|   455 /* Skip past the end of a position list. */ |  | 
|   456 static void skipPositionList(DocListReader *pReader){ |  | 
|   457   DocList *p = pReader->pDoclist; |  | 
|   458   if( p && p->iType>=DL_POSITIONS ){ |  | 
|   459     int iColumn; |  | 
|   460     while( readPosition(pReader, &iColumn)!=-1 ){} |  | 
|   461   } |  | 
|   462 } |  | 
|   463  |  | 
|   464 /* Skip over a docid, including its position list if the doclist has |  | 
|   465  * positions. */ |  | 
|   466 static void skipDocument(DocListReader *pReader){ |  | 
|   467   readDocid(pReader); |  | 
|   468   skipPositionList(pReader); |  | 
|   469 } |  | 
|   470  |  | 
|   471 /* Skip past all docids which are less than [iDocid].  Returns 1 if a docid |  | 
|   472  * matching [iDocid] was found.  */ |  | 
|   473 static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){ |  | 
|   474   sqlite_int64 d = 0; |  | 
|   475   while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){ |  | 
|   476     skipDocument(pReader); |  | 
|   477   } |  | 
|   478   return !atEnd(pReader) && d==iDocid; |  | 
|   479 } |  | 
|   480  |  | 
|   481 /* Return the first document in a document list. |  | 
|   482 */ |  | 
|   483 static sqlite_int64 firstDocid(DocList *d){ |  | 
|   484   DocListReader r; |  | 
|   485   readerInit(&r, d); |  | 
|   486   return readDocid(&r); |  | 
|   487 } |  | 
|   488  |  | 
|   489 #ifdef SQLITE_DEBUG |  | 
|   490 /* |  | 
|   491 ** This routine is used for debugging purpose only. |  | 
|   492 ** |  | 
|   493 ** Write the content of a doclist to standard output. |  | 
|   494 */ |  | 
|   495 static void printDoclist(DocList *p){ |  | 
|   496   DocListReader r; |  | 
|   497   const char *zSep = ""; |  | 
|   498  |  | 
|   499   readerInit(&r, p); |  | 
|   500   while( !atEnd(&r) ){ |  | 
|   501     sqlite_int64 docid = readDocid(&r); |  | 
|   502     if( docid==0 ){ |  | 
|   503       skipPositionList(&r); |  | 
|   504       continue; |  | 
|   505     } |  | 
|   506     printf("%s%lld", zSep, docid); |  | 
|   507     zSep =  ","; |  | 
|   508     if( p->iType>=DL_POSITIONS ){ |  | 
|   509       int iPos, iCol; |  | 
|   510       const char *zDiv = ""; |  | 
|   511       printf("("); |  | 
|   512       while( (iPos = readPosition(&r, &iCol))>=0 ){ |  | 
|   513         printf("%s%d:%d", zDiv, iCol, iPos); |  | 
|   514         zDiv = ":"; |  | 
|   515       } |  | 
|   516       printf(")"); |  | 
|   517     } |  | 
|   518   } |  | 
|   519   printf("\n"); |  | 
|   520   fflush(stdout); |  | 
|   521 } |  | 
|   522 #endif /* SQLITE_DEBUG */ |  | 
|   523  |  | 
|   524 /* Trim the given doclist to contain only positions in column |  | 
|   525  * [iRestrictColumn]. */ |  | 
|   526 static void docListRestrictColumn(DocList *in, int iRestrictColumn){ |  | 
|   527   DocListReader r; |  | 
|   528   DocList out; |  | 
|   529  |  | 
|   530   assert( in->iType>=DL_POSITIONS ); |  | 
|   531   readerInit(&r, in); |  | 
|   532   docListInit(&out, DL_POSITIONS, NULL, 0); |  | 
|   533  |  | 
|   534   while( !atEnd(&r) ){ |  | 
|   535     sqlite_int64 iDocid = readDocid(&r); |  | 
|   536     int iPos, iColumn; |  | 
|   537  |  | 
|   538     docListAddDocid(&out, iDocid); |  | 
|   539     while( (iPos = readPosition(&r, &iColumn)) != -1 ){ |  | 
|   540       if( iColumn==iRestrictColumn ){ |  | 
|   541         docListAddPos(&out, iColumn, iPos); |  | 
|   542       } |  | 
|   543     } |  | 
|   544   } |  | 
|   545  |  | 
|   546   docListDestroy(in); |  | 
|   547   *in = out; |  | 
|   548 } |  | 
|   549  |  | 
|   550 /* Trim the given doclist by discarding any docids without any remaining |  | 
|   551  * positions. */ |  | 
|   552 static void docListDiscardEmpty(DocList *in) { |  | 
|   553   DocListReader r; |  | 
|   554   DocList out; |  | 
|   555  |  | 
|   556   /* TODO: It would be nice to implement this operation in place; that |  | 
|   557    * could save a significant amount of memory in queries with long doclists. */ |  | 
|   558   assert( in->iType>=DL_POSITIONS ); |  | 
|   559   readerInit(&r, in); |  | 
|   560   docListInit(&out, DL_POSITIONS, NULL, 0); |  | 
|   561  |  | 
|   562   while( !atEnd(&r) ){ |  | 
|   563     sqlite_int64 iDocid = readDocid(&r); |  | 
|   564     int match = 0; |  | 
|   565     int iPos, iColumn; |  | 
|   566     while( (iPos = readPosition(&r, &iColumn)) != -1 ){ |  | 
|   567       if( !match ){ |  | 
|   568         docListAddDocid(&out, iDocid); |  | 
|   569         match = 1; |  | 
|   570       } |  | 
|   571       docListAddPos(&out, iColumn, iPos); |  | 
|   572     } |  | 
|   573   } |  | 
|   574  |  | 
|   575   docListDestroy(in); |  | 
|   576   *in = out; |  | 
|   577 } |  | 
|   578  |  | 
|   579 /* Helper function for docListUpdate() and docListAccumulate(). |  | 
|   580 ** Splices a doclist element into the doclist represented by r, |  | 
|   581 ** leaving r pointing after the newly spliced element. |  | 
|   582 */ |  | 
|   583 static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid, |  | 
|   584                                  const char *pSource, int nSource){ |  | 
|   585   DocList *d = r->pDoclist; |  | 
|   586   char *pTarget; |  | 
|   587   int nTarget, found; |  | 
|   588  |  | 
|   589   found = skipToDocid(r, iDocid); |  | 
|   590  |  | 
|   591   /* Describe slice in d to place pSource/nSource. */ |  | 
|   592   pTarget = r->p; |  | 
|   593   if( found ){ |  | 
|   594     skipDocument(r); |  | 
|   595     nTarget = r->p-pTarget; |  | 
|   596   }else{ |  | 
|   597     nTarget = 0; |  | 
|   598   } |  | 
|   599  |  | 
|   600   /* The sense of the following is that there are three possibilities. |  | 
|   601   ** If nTarget==nSource, we should not move any memory nor realloc. |  | 
|   602   ** If nTarget>nSource, trim target and realloc. |  | 
|   603   ** If nTarget<nSource, realloc then expand target. |  | 
|   604   */ |  | 
|   605   if( nTarget>nSource ){ |  | 
|   606     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget)); |  | 
|   607   } |  | 
|   608   if( nTarget!=nSource ){ |  | 
|   609     int iDoclist = pTarget-d->pData; |  | 
|   610     d->pData = realloc(d->pData, d->nData+nSource-nTarget); |  | 
|   611     pTarget = d->pData+iDoclist; |  | 
|   612   } |  | 
|   613   if( nTarget<nSource ){ |  | 
|   614     memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget)); |  | 
|   615   } |  | 
|   616  |  | 
|   617   memcpy(pTarget, pSource, nSource); |  | 
|   618   d->nData += nSource-nTarget; |  | 
|   619   r->p = pTarget+nSource; |  | 
|   620 } |  | 
|   621  |  | 
|   622 /* Insert/update pUpdate into the doclist. */ |  | 
|   623 static void docListUpdate(DocList *d, DocList *pUpdate){ |  | 
|   624   DocListReader reader; |  | 
|   625  |  | 
|   626   assert( d!=NULL && pUpdate!=NULL ); |  | 
|   627   assert( d->iType==pUpdate->iType); |  | 
|   628  |  | 
|   629   readerInit(&reader, d); |  | 
|   630   docListSpliceElement(&reader, firstDocid(pUpdate), |  | 
|   631                        pUpdate->pData, pUpdate->nData); |  | 
|   632 } |  | 
|   633  |  | 
|   634 /* Propagate elements from pUpdate to pAcc, overwriting elements with |  | 
|   635 ** matching docids. |  | 
|   636 */ |  | 
|   637 static void docListAccumulate(DocList *pAcc, DocList *pUpdate){ |  | 
|   638   DocListReader accReader, updateReader; |  | 
|   639  |  | 
|   640   /* Handle edge cases where one doclist is empty. */ |  | 
|   641   assert( pAcc!=NULL ); |  | 
|   642   if( pUpdate==NULL || pUpdate->nData==0 ) return; |  | 
|   643   if( pAcc->nData==0 ){ |  | 
|   644     pAcc->pData = malloc(pUpdate->nData); |  | 
|   645     memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData); |  | 
|   646     pAcc->nData = pUpdate->nData; |  | 
|   647     return; |  | 
|   648   } |  | 
|   649  |  | 
|   650   readerInit(&accReader, pAcc); |  | 
|   651   readerInit(&updateReader, pUpdate); |  | 
|   652  |  | 
|   653   while( !atEnd(&updateReader) ){ |  | 
|   654     char *pSource = updateReader.p; |  | 
|   655     sqlite_int64 iDocid = readDocid(&updateReader); |  | 
|   656     skipPositionList(&updateReader); |  | 
|   657     docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource); |  | 
|   658   } |  | 
|   659 } |  | 
|   660  |  | 
|   661 /* |  | 
|   662 ** Read the next docid off of pIn.  Return 0 if we reach the end. |  | 
|   663 * |  | 
|   664 * TODO: This assumes that docids are never 0, but they may actually be 0 since |  | 
|   665 * users can choose docids when inserting into a full-text table.  Fix this. |  | 
|   666 */ |  | 
|   667 static sqlite_int64 nextDocid(DocListReader *pIn){ |  | 
|   668   skipPositionList(pIn); |  | 
|   669   return atEnd(pIn) ? 0 : readDocid(pIn); |  | 
|   670 } |  | 
|   671  |  | 
|   672 /* |  | 
|   673 ** pLeft and pRight are two DocListReaders that are pointing to |  | 
|   674 ** positions lists of the same document: iDocid.  |  | 
|   675 ** |  | 
|   676 ** If there are no instances in pLeft or pRight where the position |  | 
|   677 ** of pLeft is one less than the position of pRight, then this |  | 
|   678 ** routine adds nothing to pOut. |  | 
|   679 ** |  | 
|   680 ** If there are one or more instances where positions from pLeft |  | 
|   681 ** are exactly one less than positions from pRight, then add a new |  | 
|   682 ** document record to pOut.  If pOut wants to hold positions, then |  | 
|   683 ** include the positions from pRight that are one more than a |  | 
|   684 ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1. |  | 
|   685 ** |  | 
|   686 ** pLeft and pRight are left pointing at the next document record. |  | 
|   687 */ |  | 
|   688 static void mergePosList( |  | 
|   689   DocListReader *pLeft,    /* Left position list */ |  | 
|   690   DocListReader *pRight,   /* Right position list */ |  | 
|   691   sqlite_int64 iDocid,     /* The docid from pLeft and pRight */ |  | 
|   692   DocList *pOut            /* Write the merged document record here */ |  | 
|   693 ){ |  | 
|   694   int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol); |  | 
|   695   int iRightCol, iRightPos = readPosition(pRight, &iRightCol); |  | 
|   696   int match = 0; |  | 
|   697  |  | 
|   698   /* Loop until we've reached the end of both position lists. */ |  | 
|   699   while( iLeftPos!=-1 && iRightPos!=-1 ){ |  | 
|   700     if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){ |  | 
|   701       if( !match ){ |  | 
|   702         docListAddDocid(pOut, iDocid); |  | 
|   703         match = 1; |  | 
|   704       } |  | 
|   705       if( pOut->iType>=DL_POSITIONS ){ |  | 
|   706         docListAddPos(pOut, iRightCol, iRightPos); |  | 
|   707       } |  | 
|   708       iLeftPos = readPosition(pLeft, &iLeftCol); |  | 
|   709       iRightPos = readPosition(pRight, &iRightCol); |  | 
|   710     }else if( iRightCol<iLeftCol || |  | 
|   711               (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){ |  | 
|   712       iRightPos = readPosition(pRight, &iRightCol); |  | 
|   713     }else{ |  | 
|   714       iLeftPos = readPosition(pLeft, &iLeftCol); |  | 
|   715     } |  | 
|   716   } |  | 
|   717   if( iLeftPos>=0 ) skipPositionList(pLeft); |  | 
|   718   if( iRightPos>=0 ) skipPositionList(pRight); |  | 
|   719 } |  | 
|   720  |  | 
|   721 /* We have two doclists:  pLeft and pRight. |  | 
|   722 ** Write the phrase intersection of these two doclists into pOut. |  | 
|   723 ** |  | 
|   724 ** A phrase intersection means that two documents only match |  | 
|   725 ** if pLeft.iPos+1==pRight.iPos. |  | 
|   726 ** |  | 
|   727 ** The output pOut may or may not contain positions.  If pOut |  | 
|   728 ** does contain positions, they are the positions of pRight. |  | 
|   729 */ |  | 
|   730 static void docListPhraseMerge( |  | 
|   731   DocList *pLeft,    /* Doclist resulting from the words on the left */ |  | 
|   732   DocList *pRight,   /* Doclist for the next word to the right */ |  | 
|   733   DocList *pOut      /* Write the combined doclist here */ |  | 
|   734 ){ |  | 
|   735   DocListReader left, right; |  | 
|   736   sqlite_int64 docidLeft, docidRight; |  | 
|   737  |  | 
|   738   readerInit(&left, pLeft); |  | 
|   739   readerInit(&right, pRight); |  | 
|   740   docidLeft = nextDocid(&left); |  | 
|   741   docidRight = nextDocid(&right); |  | 
|   742  |  | 
|   743   while( docidLeft>0 && docidRight>0 ){ |  | 
|   744     if( docidLeft<docidRight ){ |  | 
|   745       docidLeft = nextDocid(&left); |  | 
|   746     }else if( docidRight<docidLeft ){ |  | 
|   747       docidRight = nextDocid(&right); |  | 
|   748     }else{ |  | 
|   749       mergePosList(&left, &right, docidLeft, pOut); |  | 
|   750       docidLeft = nextDocid(&left); |  | 
|   751       docidRight = nextDocid(&right); |  | 
|   752     } |  | 
|   753   } |  | 
|   754 } |  | 
|   755  |  | 
|   756 /* We have two doclists:  pLeft and pRight. |  | 
|   757 ** Write the intersection of these two doclists into pOut. |  | 
|   758 ** Only docids are matched.  Position information is ignored. |  | 
|   759 ** |  | 
|   760 ** The output pOut never holds positions. |  | 
|   761 */ |  | 
|   762 static void docListAndMerge( |  | 
|   763   DocList *pLeft,    /* Doclist resulting from the words on the left */ |  | 
|   764   DocList *pRight,   /* Doclist for the next word to the right */ |  | 
|   765   DocList *pOut      /* Write the combined doclist here */ |  | 
|   766 ){ |  | 
|   767   DocListReader left, right; |  | 
|   768   sqlite_int64 docidLeft, docidRight; |  | 
|   769  |  | 
|   770   assert( pOut->iType<DL_POSITIONS ); |  | 
|   771  |  | 
|   772   readerInit(&left, pLeft); |  | 
|   773   readerInit(&right, pRight); |  | 
|   774   docidLeft = nextDocid(&left); |  | 
|   775   docidRight = nextDocid(&right); |  | 
|   776  |  | 
|   777   while( docidLeft>0 && docidRight>0 ){ |  | 
|   778     if( docidLeft<docidRight ){ |  | 
|   779       docidLeft = nextDocid(&left); |  | 
|   780     }else if( docidRight<docidLeft ){ |  | 
|   781       docidRight = nextDocid(&right); |  | 
|   782     }else{ |  | 
|   783       docListAddDocid(pOut, docidLeft); |  | 
|   784       docidLeft = nextDocid(&left); |  | 
|   785       docidRight = nextDocid(&right); |  | 
|   786     } |  | 
|   787   } |  | 
|   788 } |  | 
|   789  |  | 
|   790 /* We have two doclists:  pLeft and pRight. |  | 
|   791 ** Write the union of these two doclists into pOut. |  | 
|   792 ** Only docids are matched.  Position information is ignored. |  | 
|   793 ** |  | 
|   794 ** The output pOut never holds positions. |  | 
|   795 */ |  | 
|   796 static void docListOrMerge( |  | 
|   797   DocList *pLeft,    /* Doclist resulting from the words on the left */ |  | 
|   798   DocList *pRight,   /* Doclist for the next word to the right */ |  | 
|   799   DocList *pOut      /* Write the combined doclist here */ |  | 
|   800 ){ |  | 
|   801   DocListReader left, right; |  | 
|   802   sqlite_int64 docidLeft, docidRight, priorLeft; |  | 
|   803  |  | 
|   804   readerInit(&left, pLeft); |  | 
|   805   readerInit(&right, pRight); |  | 
|   806   docidLeft = nextDocid(&left); |  | 
|   807   docidRight = nextDocid(&right); |  | 
|   808  |  | 
|   809   while( docidLeft>0 && docidRight>0 ){ |  | 
|   810     if( docidLeft<=docidRight ){ |  | 
|   811       docListAddDocid(pOut, docidLeft); |  | 
|   812     }else{ |  | 
|   813       docListAddDocid(pOut, docidRight); |  | 
|   814     } |  | 
|   815     priorLeft = docidLeft; |  | 
|   816     if( docidLeft<=docidRight ){ |  | 
|   817       docidLeft = nextDocid(&left); |  | 
|   818     } |  | 
|   819     if( docidRight>0 && docidRight<=priorLeft ){ |  | 
|   820       docidRight = nextDocid(&right); |  | 
|   821     } |  | 
|   822   } |  | 
|   823   while( docidLeft>0 ){ |  | 
|   824     docListAddDocid(pOut, docidLeft); |  | 
|   825     docidLeft = nextDocid(&left); |  | 
|   826   } |  | 
|   827   while( docidRight>0 ){ |  | 
|   828     docListAddDocid(pOut, docidRight); |  | 
|   829     docidRight = nextDocid(&right); |  | 
|   830   } |  | 
|   831 } |  | 
|   832  |  | 
|   833 /* We have two doclists:  pLeft and pRight. |  | 
|   834 ** Write into pOut all documents that occur in pLeft but not |  | 
|   835 ** in pRight. |  | 
|   836 ** |  | 
|   837 ** Only docids are matched.  Position information is ignored. |  | 
|   838 ** |  | 
|   839 ** The output pOut never holds positions. |  | 
|   840 */ |  | 
|   841 static void docListExceptMerge( |  | 
|   842   DocList *pLeft,    /* Doclist resulting from the words on the left */ |  | 
|   843   DocList *pRight,   /* Doclist for the next word to the right */ |  | 
|   844   DocList *pOut      /* Write the combined doclist here */ |  | 
|   845 ){ |  | 
|   846   DocListReader left, right; |  | 
|   847   sqlite_int64 docidLeft, docidRight, priorLeft; |  | 
|   848  |  | 
|   849   readerInit(&left, pLeft); |  | 
|   850   readerInit(&right, pRight); |  | 
|   851   docidLeft = nextDocid(&left); |  | 
|   852   docidRight = nextDocid(&right); |  | 
|   853  |  | 
|   854   while( docidLeft>0 && docidRight>0 ){ |  | 
|   855     priorLeft = docidLeft; |  | 
|   856     if( docidLeft<docidRight ){ |  | 
|   857       docListAddDocid(pOut, docidLeft); |  | 
|   858     } |  | 
|   859     if( docidLeft<=docidRight ){ |  | 
|   860       docidLeft = nextDocid(&left); |  | 
|   861     } |  | 
|   862     if( docidRight>0 && docidRight<=priorLeft ){ |  | 
|   863       docidRight = nextDocid(&right); |  | 
|   864     } |  | 
|   865   } |  | 
|   866   while( docidLeft>0 ){ |  | 
|   867     docListAddDocid(pOut, docidLeft); |  | 
|   868     docidLeft = nextDocid(&left); |  | 
|   869   } |  | 
|   870 } |  | 
|   871  |  | 
|   872 static char *string_dup_n(const char *s, int n){ |  | 
|   873   char *str = malloc(n + 1); |  | 
|   874   memcpy(str, s, n); |  | 
|   875   str[n] = '\0'; |  | 
|   876   return str; |  | 
|   877 } |  | 
|   878  |  | 
|   879 /* Duplicate a string; the caller must free() the returned string. |  | 
|   880  * (We don't use strdup() since it is not part of the standard C library and |  | 
|   881  * may not be available everywhere.) */ |  | 
|   882 static char *string_dup(const char *s){ |  | 
|   883   return string_dup_n(s, strlen(s)); |  | 
|   884 } |  | 
|   885  |  | 
|   886 /* Format a string, replacing each occurrence of the % character with |  | 
|   887  * zDb.zName.  This may be more convenient than sqlite_mprintf() |  | 
|   888  * when one string is used repeatedly in a format string. |  | 
|   889  * The caller must free() the returned string. */ |  | 
|   890 static char *string_format(const char *zFormat, |  | 
|   891                            const char *zDb, const char *zName){ |  | 
|   892   const char *p; |  | 
|   893   size_t len = 0; |  | 
|   894   size_t nDb = strlen(zDb); |  | 
|   895   size_t nName = strlen(zName); |  | 
|   896   size_t nFullTableName = nDb+1+nName; |  | 
|   897   char *result; |  | 
|   898   char *r; |  | 
|   899  |  | 
|   900   /* first compute length needed */ |  | 
|   901   for(p = zFormat ; *p ; ++p){ |  | 
|   902     len += (*p=='%' ? nFullTableName : 1); |  | 
|   903   } |  | 
|   904   len += 1;  /* for null terminator */ |  | 
|   905  |  | 
|   906   r = result = malloc(len); |  | 
|   907   for(p = zFormat; *p; ++p){ |  | 
|   908     if( *p=='%' ){ |  | 
|   909       memcpy(r, zDb, nDb); |  | 
|   910       r += nDb; |  | 
|   911       *r++ = '.'; |  | 
|   912       memcpy(r, zName, nName); |  | 
|   913       r += nName; |  | 
|   914     } else { |  | 
|   915       *r++ = *p; |  | 
|   916     } |  | 
|   917   } |  | 
|   918   *r++ = '\0'; |  | 
|   919   assert( r == result + len ); |  | 
|   920   return result; |  | 
|   921 } |  | 
|   922  |  | 
|   923 static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, |  | 
|   924                     const char *zFormat){ |  | 
|   925   char *zCommand = string_format(zFormat, zDb, zName); |  | 
|   926   int rc; |  | 
|   927   TRACE(("FTS1 sql: %s\n", zCommand)); |  | 
|   928   rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); |  | 
|   929   free(zCommand); |  | 
|   930   return rc; |  | 
|   931 } |  | 
|   932  |  | 
|   933 static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, |  | 
|   934                        sqlite3_stmt **ppStmt, const char *zFormat){ |  | 
|   935   char *zCommand = string_format(zFormat, zDb, zName); |  | 
|   936   int rc; |  | 
|   937   TRACE(("FTS1 prepare: %s\n", zCommand)); |  | 
|   938   rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); |  | 
|   939   free(zCommand); |  | 
|   940   return rc; |  | 
|   941 } |  | 
|   942  |  | 
|   943 /* end utility functions */ |  | 
|   944  |  | 
|   945 /* Forward reference */ |  | 
|   946 typedef struct fulltext_vtab fulltext_vtab; |  | 
|   947  |  | 
|   948 /* A single term in a query is represented by an instances of |  | 
|   949 ** the following structure. |  | 
|   950 */ |  | 
|   951 typedef struct QueryTerm { |  | 
|   952   short int nPhrase; /* How many following terms are part of the same phrase */ |  | 
|   953   short int iPhrase; /* This is the i-th term of a phrase. */ |  | 
|   954   short int iColumn; /* Column of the index that must match this term */ |  | 
|   955   signed char isOr;  /* this term is preceded by "OR" */ |  | 
|   956   signed char isNot; /* this term is preceded by "-" */ |  | 
|   957   char *pTerm;       /* text of the term.  '\000' terminated.  malloced */ |  | 
|   958   int nTerm;         /* Number of bytes in pTerm[] */ |  | 
|   959 } QueryTerm; |  | 
|   960  |  | 
|   961  |  | 
|   962 /* A query string is parsed into a Query structure. |  | 
|   963  * |  | 
|   964  * We could, in theory, allow query strings to be complicated |  | 
|   965  * nested expressions with precedence determined by parentheses. |  | 
|   966  * But none of the major search engines do this.  (Perhaps the |  | 
|   967  * feeling is that an parenthesized expression is two complex of |  | 
|   968  * an idea for the average user to grasp.)  Taking our lead from |  | 
|   969  * the major search engines, we will allow queries to be a list |  | 
|   970  * of terms (with an implied AND operator) or phrases in double-quotes, |  | 
|   971  * with a single optional "-" before each non-phrase term to designate |  | 
|   972  * negation and an optional OR connector. |  | 
|   973  * |  | 
|   974  * OR binds more tightly than the implied AND, which is what the |  | 
|   975  * major search engines seem to do.  So, for example: |  | 
|   976  *  |  | 
|   977  *    [one two OR three]     ==>    one AND (two OR three) |  | 
|   978  *    [one OR two three]     ==>    (one OR two) AND three |  | 
|   979  * |  | 
|   980  * A "-" before a term matches all entries that lack that term. |  | 
|   981  * The "-" must occur immediately before the term with in intervening |  | 
|   982  * space.  This is how the search engines do it. |  | 
|   983  * |  | 
|   984  * A NOT term cannot be the right-hand operand of an OR.  If this |  | 
|   985  * occurs in the query string, the NOT is ignored: |  | 
|   986  * |  | 
|   987  *    [one OR -two]          ==>    one OR two |  | 
|   988  * |  | 
|   989  */ |  | 
|   990 typedef struct Query { |  | 
|   991   fulltext_vtab *pFts;  /* The full text index */ |  | 
|   992   int nTerms;           /* Number of terms in the query */ |  | 
|   993   QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */ |  | 
|   994   int nextIsOr;         /* Set the isOr flag on the next inserted term */ |  | 
|   995   int nextColumn;       /* Next word parsed must be in this column */ |  | 
|   996   int dfltColumn;       /* The default column */ |  | 
|   997 } Query; |  | 
|   998  |  | 
|   999  |  | 
|  1000 /* |  | 
|  1001 ** An instance of the following structure keeps track of generated |  | 
|  1002 ** matching-word offset information and snippets. |  | 
|  1003 */ |  | 
|  1004 typedef struct Snippet { |  | 
|  1005   int nMatch;     /* Total number of matches */ |  | 
|  1006   int nAlloc;     /* Space allocated for aMatch[] */ |  | 
|  1007   struct snippetMatch { /* One entry for each matching term */ |  | 
|  1008     char snStatus;       /* Status flag for use while constructing snippets */ |  | 
|  1009     short int iCol;      /* The column that contains the match */ |  | 
|  1010     short int iTerm;     /* The index in Query.pTerms[] of the matching term */ |  | 
|  1011     short int nByte;     /* Number of bytes in the term */ |  | 
|  1012     int iStart;          /* The offset to the first character of the term */ |  | 
|  1013   } *aMatch;      /* Points to space obtained from malloc */ |  | 
|  1014   char *zOffset;  /* Text rendering of aMatch[] */ |  | 
|  1015   int nOffset;    /* strlen(zOffset) */ |  | 
|  1016   char *zSnippet; /* Snippet text */ |  | 
|  1017   int nSnippet;   /* strlen(zSnippet) */ |  | 
|  1018 } Snippet; |  | 
|  1019  |  | 
|  1020  |  | 
|  1021 typedef enum QueryType { |  | 
|  1022   QUERY_GENERIC,   /* table scan */ |  | 
|  1023   QUERY_ROWID,     /* lookup by rowid */ |  | 
|  1024   QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ |  | 
|  1025 } QueryType; |  | 
|  1026  |  | 
|  1027 /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0 |  | 
|  1028 ** before we start aggregating into larger segments.  Lower CHUNK_MAX |  | 
|  1029 ** means that for a given input we have more individual segments per |  | 
|  1030 ** term, which means more rows in the table and a bigger index (due to |  | 
|  1031 ** both more rows and bigger rowids).  But it also reduces the average |  | 
|  1032 ** cost of adding new elements to the segment 0 doclist, and it seems |  | 
|  1033 ** to reduce the number of pages read and written during inserts.  256 |  | 
|  1034 ** was chosen by measuring insertion times for a certain input (first |  | 
|  1035 ** 10k documents of Enron corpus), though including query performance |  | 
|  1036 ** in the decision may argue for a larger value. |  | 
|  1037 */ |  | 
|  1038 #define CHUNK_MAX 256 |  | 
|  1039  |  | 
|  1040 typedef enum fulltext_statement { |  | 
|  1041   CONTENT_INSERT_STMT, |  | 
|  1042   CONTENT_SELECT_STMT, |  | 
|  1043   CONTENT_UPDATE_STMT, |  | 
|  1044   CONTENT_DELETE_STMT, |  | 
|  1045  |  | 
|  1046   TERM_SELECT_STMT, |  | 
|  1047   TERM_SELECT_ALL_STMT, |  | 
|  1048   TERM_INSERT_STMT, |  | 
|  1049   TERM_UPDATE_STMT, |  | 
|  1050   TERM_DELETE_STMT, |  | 
|  1051  |  | 
|  1052   MAX_STMT                     /* Always at end! */ |  | 
|  1053 } fulltext_statement; |  | 
|  1054  |  | 
|  1055 /* These must exactly match the enum above. */ |  | 
|  1056 /* TODO(adam): Is there some risk that a statement (in particular, |  | 
|  1057 ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a |  | 
|  1058 ** query joins a virtual table to itself?  If so perhaps we should |  | 
|  1059 ** move some of these to the cursor object. |  | 
|  1060 */ |  | 
|  1061 static const char *const fulltext_zStatement[MAX_STMT] = { |  | 
|  1062   /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */ |  | 
|  1063   /* CONTENT_SELECT */ "select * from %_content where rowid = ?", |  | 
|  1064   /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */ |  | 
|  1065   /* CONTENT_DELETE */ "delete from %_content where rowid = ?", |  | 
|  1066  |  | 
|  1067   /* TERM_SELECT */ |  | 
|  1068   "select rowid, doclist from %_term where term = ? and segment = ?", |  | 
|  1069   /* TERM_SELECT_ALL */ |  | 
|  1070   "select doclist from %_term where term = ? order by segment", |  | 
|  1071   /* TERM_INSERT */ |  | 
|  1072   "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)", |  | 
|  1073   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", |  | 
|  1074   /* TERM_DELETE */ "delete from %_term where rowid = ?", |  | 
|  1075 }; |  | 
|  1076  |  | 
|  1077 /* |  | 
|  1078 ** A connection to a fulltext index is an instance of the following |  | 
|  1079 ** structure.  The xCreate and xConnect methods create an instance |  | 
|  1080 ** of this structure and xDestroy and xDisconnect free that instance. |  | 
|  1081 ** All other methods receive a pointer to the structure as one of their |  | 
|  1082 ** arguments. |  | 
|  1083 */ |  | 
|  1084 struct fulltext_vtab { |  | 
|  1085   sqlite3_vtab base;               /* Base class used by SQLite core */ |  | 
|  1086   sqlite3 *db;                     /* The database connection */ |  | 
|  1087   const char *zDb;                 /* logical database name */ |  | 
|  1088   const char *zName;               /* virtual table name */ |  | 
|  1089   int nColumn;                     /* number of columns in virtual table */ |  | 
|  1090   char **azColumn;                 /* column names.  malloced */ |  | 
|  1091   char **azContentColumn;          /* column names in content table; malloced */ |  | 
|  1092   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */ |  | 
|  1093  |  | 
|  1094   /* Precompiled statements which we keep as long as the table is |  | 
|  1095   ** open. |  | 
|  1096   */ |  | 
|  1097   sqlite3_stmt *pFulltextStatements[MAX_STMT]; |  | 
|  1098 }; |  | 
|  1099  |  | 
|  1100 /* |  | 
|  1101 ** When the core wants to do a query, it create a cursor using a |  | 
|  1102 ** call to xOpen.  This structure is an instance of a cursor.  It |  | 
|  1103 ** is destroyed by xClose. |  | 
|  1104 */ |  | 
|  1105 typedef struct fulltext_cursor { |  | 
|  1106   sqlite3_vtab_cursor base;        /* Base class used by SQLite core */ |  | 
|  1107   QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */ |  | 
|  1108   sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */ |  | 
|  1109   int eof;                         /* True if at End Of Results */ |  | 
|  1110   Query q;                         /* Parsed query string */ |  | 
|  1111   Snippet snippet;                 /* Cached snippet for the current row */ |  | 
|  1112   int iColumn;                     /* Column being searched */ |  | 
|  1113   DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */  |  | 
|  1114 } fulltext_cursor; |  | 
|  1115  |  | 
|  1116 static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ |  | 
|  1117   return (fulltext_vtab *) c->base.pVtab; |  | 
|  1118 } |  | 
|  1119  |  | 
|  1120 static const sqlite3_module fulltextModule;   /* forward declaration */ |  | 
|  1121  |  | 
|  1122 /* Append a list of strings separated by commas to a StringBuffer. */ |  | 
|  1123 static void appendList(StringBuffer *sb, int nString, char **azString){ |  | 
|  1124   int i; |  | 
|  1125   for(i=0; i<nString; ++i){ |  | 
|  1126     if( i>0 ) append(sb, ", "); |  | 
|  1127     append(sb, azString[i]); |  | 
|  1128   } |  | 
|  1129 } |  | 
|  1130  |  | 
|  1131 /* Return a dynamically generated statement of the form |  | 
|  1132  *   insert into %_content (rowid, ...) values (?, ...) |  | 
|  1133  */ |  | 
|  1134 static const char *contentInsertStatement(fulltext_vtab *v){ |  | 
|  1135   StringBuffer sb; |  | 
|  1136   int i; |  | 
|  1137  |  | 
|  1138   initStringBuffer(&sb); |  | 
|  1139   append(&sb, "insert into %_content (rowid, "); |  | 
|  1140   appendList(&sb, v->nColumn, v->azContentColumn); |  | 
|  1141   append(&sb, ") values (?"); |  | 
|  1142   for(i=0; i<v->nColumn; ++i) |  | 
|  1143     append(&sb, ", ?"); |  | 
|  1144   append(&sb, ")"); |  | 
|  1145   return sb.s; |  | 
|  1146 } |  | 
|  1147  |  | 
|  1148 /* Return a dynamically generated statement of the form |  | 
|  1149  *   update %_content set [col_0] = ?, [col_1] = ?, ... |  | 
|  1150  *                    where rowid = ? |  | 
|  1151  */ |  | 
|  1152 static const char *contentUpdateStatement(fulltext_vtab *v){ |  | 
|  1153   StringBuffer sb; |  | 
|  1154   int i; |  | 
|  1155  |  | 
|  1156   initStringBuffer(&sb); |  | 
|  1157   append(&sb, "update %_content set "); |  | 
|  1158   for(i=0; i<v->nColumn; ++i) { |  | 
|  1159     if( i>0 ){ |  | 
|  1160       append(&sb, ", "); |  | 
|  1161     } |  | 
|  1162     append(&sb, v->azContentColumn[i]); |  | 
|  1163     append(&sb, " = ?"); |  | 
|  1164   } |  | 
|  1165   append(&sb, " where rowid = ?"); |  | 
|  1166   return sb.s; |  | 
|  1167 } |  | 
|  1168  |  | 
|  1169 /* Puts a freshly-prepared statement determined by iStmt in *ppStmt. |  | 
|  1170 ** If the indicated statement has never been prepared, it is prepared |  | 
|  1171 ** and cached, otherwise the cached version is reset. |  | 
|  1172 */ |  | 
|  1173 static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, |  | 
|  1174                              sqlite3_stmt **ppStmt){ |  | 
|  1175   assert( iStmt<MAX_STMT ); |  | 
|  1176   if( v->pFulltextStatements[iStmt]==NULL ){ |  | 
|  1177     const char *zStmt; |  | 
|  1178     int rc; |  | 
|  1179     switch( iStmt ){ |  | 
|  1180       case CONTENT_INSERT_STMT: |  | 
|  1181         zStmt = contentInsertStatement(v); break; |  | 
|  1182       case CONTENT_UPDATE_STMT: |  | 
|  1183         zStmt = contentUpdateStatement(v); break; |  | 
|  1184       default: |  | 
|  1185         zStmt = fulltext_zStatement[iStmt]; |  | 
|  1186     } |  | 
|  1187     rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], |  | 
|  1188                          zStmt); |  | 
|  1189     if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt); |  | 
|  1190     if( rc!=SQLITE_OK ) return rc; |  | 
|  1191   } else { |  | 
|  1192     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); |  | 
|  1193     if( rc!=SQLITE_OK ) return rc; |  | 
|  1194   } |  | 
|  1195  |  | 
|  1196   *ppStmt = v->pFulltextStatements[iStmt]; |  | 
|  1197   return SQLITE_OK; |  | 
|  1198 } |  | 
|  1199  |  | 
|  1200 /* Step the indicated statement, handling errors SQLITE_BUSY (by |  | 
|  1201 ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring |  | 
|  1202 ** bindings to the new statement). |  | 
|  1203 ** TODO(adam): We should extend this function so that it can work with |  | 
|  1204 ** statements declared locally, not only globally cached statements. |  | 
|  1205 */ |  | 
|  1206 static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, |  | 
|  1207                               sqlite3_stmt **ppStmt){ |  | 
|  1208   int rc; |  | 
|  1209   sqlite3_stmt *s = *ppStmt; |  | 
|  1210   assert( iStmt<MAX_STMT ); |  | 
|  1211   assert( s==v->pFulltextStatements[iStmt] ); |  | 
|  1212  |  | 
|  1213   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ |  | 
|  1214     if( rc==SQLITE_BUSY ) continue; |  | 
|  1215     if( rc!=SQLITE_ERROR ) return rc; |  | 
|  1216  |  | 
|  1217     /* If an SQLITE_SCHEMA error has occurred, then finalizing this |  | 
|  1218      * statement is going to delete the fulltext_vtab structure. If |  | 
|  1219      * the statement just executed is in the pFulltextStatements[] |  | 
|  1220      * array, it will be finalized twice. So remove it before |  | 
|  1221      * calling sqlite3_finalize(). |  | 
|  1222      */ |  | 
|  1223     v->pFulltextStatements[iStmt] = NULL; |  | 
|  1224     rc = sqlite3_finalize(s); |  | 
|  1225     break; |  | 
|  1226   } |  | 
|  1227   return rc; |  | 
|  1228 } |  | 
|  1229  |  | 
|  1230 /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. |  | 
|  1231 ** Useful for statements like UPDATE, where we expect no results. |  | 
|  1232 */ |  | 
|  1233 static int sql_single_step_statement(fulltext_vtab *v, |  | 
|  1234                                      fulltext_statement iStmt, |  | 
|  1235                                      sqlite3_stmt **ppStmt){ |  | 
|  1236   int rc = sql_step_statement(v, iStmt, ppStmt); |  | 
|  1237   return (rc==SQLITE_DONE) ? SQLITE_OK : rc; |  | 
|  1238 } |  | 
|  1239  |  | 
|  1240 /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ |  | 
|  1241 static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, |  | 
|  1242                           sqlite3_value **pValues){ |  | 
|  1243   sqlite3_stmt *s; |  | 
|  1244   int i; |  | 
|  1245   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); |  | 
|  1246   if( rc!=SQLITE_OK ) return rc; |  | 
|  1247  |  | 
|  1248   rc = sqlite3_bind_value(s, 1, rowid); |  | 
|  1249   if( rc!=SQLITE_OK ) return rc; |  | 
|  1250  |  | 
|  1251   for(i=0; i<v->nColumn; ++i){ |  | 
|  1252     rc = sqlite3_bind_value(s, 2+i, pValues[i]); |  | 
|  1253     if( rc!=SQLITE_OK ) return rc; |  | 
|  1254   } |  | 
|  1255  |  | 
|  1256   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); |  | 
|  1257 } |  | 
|  1258  |  | 
|  1259 /* update %_content set col0 = pValues[0], col1 = pValues[1], ... |  | 
|  1260  *                  where rowid = [iRowid] */ |  | 
|  1261 static int content_update(fulltext_vtab *v, sqlite3_value **pValues, |  | 
|  1262                           sqlite_int64 iRowid){ |  | 
|  1263   sqlite3_stmt *s; |  | 
|  1264   int i; |  | 
|  1265   int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); |  | 
|  1266   if( rc!=SQLITE_OK ) return rc; |  | 
|  1267  |  | 
|  1268   for(i=0; i<v->nColumn; ++i){ |  | 
|  1269     rc = sqlite3_bind_value(s, 1+i, pValues[i]); |  | 
|  1270     if( rc!=SQLITE_OK ) return rc; |  | 
|  1271   } |  | 
|  1272  |  | 
|  1273   rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); |  | 
|  1274   if( rc!=SQLITE_OK ) return rc; |  | 
|  1275  |  | 
|  1276   return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s); |  | 
|  1277 } |  | 
|  1278  |  | 
|  1279 static void freeStringArray(int nString, const char **pString){ |  | 
|  1280   int i; |  | 
|  1281  |  | 
|  1282   for (i=0 ; i < nString ; ++i) { |  | 
|  1283     if( pString[i]!=NULL ) free((void *) pString[i]); |  | 
|  1284   } |  | 
|  1285   free((void *) pString); |  | 
|  1286 } |  | 
|  1287  |  | 
|  1288 /* select * from %_content where rowid = [iRow] |  | 
|  1289  * The caller must delete the returned array and all strings in it. |  | 
|  1290  * null fields will be NULL in the returned array. |  | 
|  1291  * |  | 
|  1292  * TODO: Perhaps we should return pointer/length strings here for consistency |  | 
|  1293  * with other code which uses pointer/length. */ |  | 
|  1294 static int content_select(fulltext_vtab *v, sqlite_int64 iRow, |  | 
|  1295                           const char ***pValues){ |  | 
|  1296   sqlite3_stmt *s; |  | 
|  1297   const char **values; |  | 
|  1298   int i; |  | 
|  1299   int rc; |  | 
|  1300  |  | 
|  1301   *pValues = NULL; |  | 
|  1302  |  | 
|  1303   rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); |  | 
|  1304   if( rc!=SQLITE_OK ) return rc; |  | 
|  1305  |  | 
|  1306   rc = sqlite3_bind_int64(s, 1, iRow); |  | 
|  1307   if( rc!=SQLITE_OK ) return rc; |  | 
|  1308  |  | 
|  1309   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); |  | 
|  1310   if( rc!=SQLITE_ROW ) return rc; |  | 
|  1311  |  | 
|  1312   values = (const char **) malloc(v->nColumn * sizeof(const char *)); |  | 
|  1313   for(i=0; i<v->nColumn; ++i){ |  | 
|  1314     if( sqlite3_column_type(s, i)==SQLITE_NULL ){ |  | 
|  1315       values[i] = NULL; |  | 
|  1316     }else{ |  | 
|  1317       values[i] = string_dup((char*)sqlite3_column_text(s, i)); |  | 
|  1318     } |  | 
|  1319   } |  | 
|  1320  |  | 
|  1321   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  1322    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  1323   rc = sqlite3_step(s); |  | 
|  1324   if( rc==SQLITE_DONE ){ |  | 
|  1325     *pValues = values; |  | 
|  1326     return SQLITE_OK; |  | 
|  1327   } |  | 
|  1328  |  | 
|  1329   freeStringArray(v->nColumn, values); |  | 
|  1330   return rc; |  | 
|  1331 } |  | 
|  1332  |  | 
|  1333 /* delete from %_content where rowid = [iRow ] */ |  | 
|  1334 static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ |  | 
|  1335   sqlite3_stmt *s; |  | 
|  1336   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); |  | 
|  1337   if( rc!=SQLITE_OK ) return rc; |  | 
|  1338  |  | 
|  1339   rc = sqlite3_bind_int64(s, 1, iRow); |  | 
|  1340   if( rc!=SQLITE_OK ) return rc; |  | 
|  1341  |  | 
|  1342   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); |  | 
|  1343 } |  | 
|  1344  |  | 
|  1345 /* select rowid, doclist from %_term |  | 
|  1346  *  where term = [pTerm] and segment = [iSegment] |  | 
|  1347  * If found, returns SQLITE_ROW; the caller must free the |  | 
|  1348  * returned doclist.  If no rows found, returns SQLITE_DONE. */ |  | 
|  1349 static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm, |  | 
|  1350                        int iSegment, |  | 
|  1351                        sqlite_int64 *rowid, DocList *out){ |  | 
|  1352   sqlite3_stmt *s; |  | 
|  1353   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); |  | 
|  1354   if( rc!=SQLITE_OK ) return rc; |  | 
|  1355  |  | 
|  1356   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); |  | 
|  1357   if( rc!=SQLITE_OK ) return rc; |  | 
|  1358  |  | 
|  1359   rc = sqlite3_bind_int(s, 2, iSegment); |  | 
|  1360   if( rc!=SQLITE_OK ) return rc; |  | 
|  1361  |  | 
|  1362   rc = sql_step_statement(v, TERM_SELECT_STMT, &s); |  | 
|  1363   if( rc!=SQLITE_ROW ) return rc; |  | 
|  1364  |  | 
|  1365   *rowid = sqlite3_column_int64(s, 0); |  | 
|  1366   docListInit(out, DL_DEFAULT, |  | 
|  1367               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); |  | 
|  1368  |  | 
|  1369   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|  1370    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|  1371   rc = sqlite3_step(s); |  | 
|  1372   return rc==SQLITE_DONE ? SQLITE_ROW : rc; |  | 
|  1373 } |  | 
|  1374  |  | 
|  1375 /* Load the segment doclists for term pTerm and merge them in |  | 
|  1376 ** appropriate order into out.  Returns SQLITE_OK if successful.  If |  | 
|  1377 ** there are no segments for pTerm, successfully returns an empty |  | 
|  1378 ** doclist in out. |  | 
|  1379 ** |  | 
|  1380 ** Each document consists of 1 or more "columns".  The number of |  | 
|  1381 ** columns is v->nColumn.  If iColumn==v->nColumn, then return |  | 
|  1382 ** position information about all columns.  If iColumn<v->nColumn, |  | 
|  1383 ** then only return position information about the iColumn-th column |  | 
|  1384 ** (where the first column is 0). |  | 
|  1385 */ |  | 
|  1386 static int term_select_all( |  | 
|  1387   fulltext_vtab *v,     /* The fulltext index we are querying against */ |  | 
|  1388   int iColumn,          /* If <nColumn, only look at the iColumn-th column */ |  | 
|  1389   const char *pTerm,    /* The term whose posting lists we want */ |  | 
|  1390   int nTerm,            /* Number of bytes in pTerm */ |  | 
|  1391   DocList *out          /* Write the resulting doclist here */ |  | 
|  1392 ){ |  | 
|  1393   DocList doclist; |  | 
|  1394   sqlite3_stmt *s; |  | 
|  1395   int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s); |  | 
|  1396   if( rc!=SQLITE_OK ) return rc; |  | 
|  1397  |  | 
|  1398   rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); |  | 
|  1399   if( rc!=SQLITE_OK ) return rc; |  | 
|  1400  |  | 
|  1401   docListInit(&doclist, DL_DEFAULT, 0, 0); |  | 
|  1402  |  | 
|  1403   /* TODO(shess) Handle schema and busy errors. */ |  | 
|  1404   while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){ |  | 
|  1405     DocList old; |  | 
|  1406  |  | 
|  1407     /* TODO(shess) If we processed doclists from oldest to newest, we |  | 
|  1408     ** could skip the malloc() involved with the following call.  For |  | 
|  1409     ** now, I'd rather keep this logic similar to index_insert_term(). |  | 
|  1410     ** We could additionally drop elements when we see deletes, but |  | 
|  1411     ** that would require a distinct version of docListAccumulate(). |  | 
|  1412     */ |  | 
|  1413     docListInit(&old, DL_DEFAULT, |  | 
|  1414                 sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0)); |  | 
|  1415  |  | 
|  1416     if( iColumn<v->nColumn ){   /* querying a single column */ |  | 
|  1417       docListRestrictColumn(&old, iColumn); |  | 
|  1418     } |  | 
|  1419  |  | 
|  1420     /* doclist contains the newer data, so write it over old.  Then |  | 
|  1421     ** steal accumulated result for doclist. |  | 
|  1422     */ |  | 
|  1423     docListAccumulate(&old, &doclist); |  | 
|  1424     docListDestroy(&doclist); |  | 
|  1425     doclist = old; |  | 
|  1426   } |  | 
|  1427   if( rc!=SQLITE_DONE ){ |  | 
|  1428     docListDestroy(&doclist); |  | 
|  1429     return rc; |  | 
|  1430   } |  | 
|  1431  |  | 
|  1432   docListDiscardEmpty(&doclist); |  | 
|  1433   *out = doclist; |  | 
|  1434   return SQLITE_OK; |  | 
|  1435 } |  | 
|  1436  |  | 
|  1437 /* insert into %_term (rowid, term, segment, doclist) |  | 
|  1438                values ([piRowid], [pTerm], [iSegment], [doclist]) |  | 
|  1439 ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid. |  | 
|  1440 ** |  | 
|  1441 ** NOTE(shess) piRowid is IN, with values of "space of int64" plus |  | 
|  1442 ** null, it is not used to pass data back to the caller. |  | 
|  1443 */ |  | 
|  1444 static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid, |  | 
|  1445                        const char *pTerm, int nTerm, |  | 
|  1446                        int iSegment, DocList *doclist){ |  | 
|  1447   sqlite3_stmt *s; |  | 
|  1448   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); |  | 
|  1449   if( rc!=SQLITE_OK ) return rc; |  | 
|  1450  |  | 
|  1451   if( piRowid==NULL ){ |  | 
|  1452     rc = sqlite3_bind_null(s, 1); |  | 
|  1453   }else{ |  | 
|  1454     rc = sqlite3_bind_int64(s, 1, *piRowid); |  | 
|  1455   } |  | 
|  1456   if( rc!=SQLITE_OK ) return rc; |  | 
|  1457  |  | 
|  1458   rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC); |  | 
|  1459   if( rc!=SQLITE_OK ) return rc; |  | 
|  1460  |  | 
|  1461   rc = sqlite3_bind_int(s, 3, iSegment); |  | 
|  1462   if( rc!=SQLITE_OK ) return rc; |  | 
|  1463  |  | 
|  1464   rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC); |  | 
|  1465   if( rc!=SQLITE_OK ) return rc; |  | 
|  1466  |  | 
|  1467   return sql_single_step_statement(v, TERM_INSERT_STMT, &s); |  | 
|  1468 } |  | 
|  1469  |  | 
|  1470 /* update %_term set doclist = [doclist] where rowid = [rowid] */ |  | 
|  1471 static int term_update(fulltext_vtab *v, sqlite_int64 rowid, |  | 
|  1472                        DocList *doclist){ |  | 
|  1473   sqlite3_stmt *s; |  | 
|  1474   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); |  | 
|  1475   if( rc!=SQLITE_OK ) return rc; |  | 
|  1476  |  | 
|  1477   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC); |  | 
|  1478   if( rc!=SQLITE_OK ) return rc; |  | 
|  1479  |  | 
|  1480   rc = sqlite3_bind_int64(s, 2, rowid); |  | 
|  1481   if( rc!=SQLITE_OK ) return rc; |  | 
|  1482  |  | 
|  1483   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); |  | 
|  1484 } |  | 
|  1485  |  | 
|  1486 static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ |  | 
|  1487   sqlite3_stmt *s; |  | 
|  1488   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); |  | 
|  1489   if( rc!=SQLITE_OK ) return rc; |  | 
|  1490  |  | 
|  1491   rc = sqlite3_bind_int64(s, 1, rowid); |  | 
|  1492   if( rc!=SQLITE_OK ) return rc; |  | 
|  1493  |  | 
|  1494   return sql_single_step_statement(v, TERM_DELETE_STMT, &s); |  | 
|  1495 } |  | 
|  1496  |  | 
|  1497 /* |  | 
|  1498 ** Free the memory used to contain a fulltext_vtab structure. |  | 
|  1499 */ |  | 
|  1500 static void fulltext_vtab_destroy(fulltext_vtab *v){ |  | 
|  1501   int iStmt, i; |  | 
|  1502  |  | 
|  1503   TRACE(("FTS1 Destroy %p\n", v)); |  | 
|  1504   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){ |  | 
|  1505     if( v->pFulltextStatements[iStmt]!=NULL ){ |  | 
|  1506       sqlite3_finalize(v->pFulltextStatements[iStmt]); |  | 
|  1507       v->pFulltextStatements[iStmt] = NULL; |  | 
|  1508     } |  | 
|  1509   } |  | 
|  1510  |  | 
|  1511   if( v->pTokenizer!=NULL ){ |  | 
|  1512     v->pTokenizer->pModule->xDestroy(v->pTokenizer); |  | 
|  1513     v->pTokenizer = NULL; |  | 
|  1514   } |  | 
|  1515    |  | 
|  1516   free(v->azColumn); |  | 
|  1517   for(i = 0; i < v->nColumn; ++i) { |  | 
|  1518     sqlite3_free(v->azContentColumn[i]); |  | 
|  1519   } |  | 
|  1520   free(v->azContentColumn); |  | 
|  1521   free(v); |  | 
|  1522 } |  | 
|  1523  |  | 
|  1524 /* |  | 
|  1525 ** Token types for parsing the arguments to xConnect or xCreate. |  | 
|  1526 */ |  | 
|  1527 #define TOKEN_EOF         0    /* End of file */ |  | 
|  1528 #define TOKEN_SPACE       1    /* Any kind of whitespace */ |  | 
|  1529 #define TOKEN_ID          2    /* An identifier */ |  | 
|  1530 #define TOKEN_STRING      3    /* A string literal */ |  | 
|  1531 #define TOKEN_PUNCT       4    /* A single punctuation character */ |  | 
|  1532  |  | 
|  1533 /* |  | 
|  1534 ** If X is a character that can be used in an identifier then |  | 
|  1535 ** IdChar(X) will be true.  Otherwise it is false. |  | 
|  1536 ** |  | 
|  1537 ** For ASCII, any character with the high-order bit set is |  | 
|  1538 ** allowed in an identifier.  For 7-bit characters,  |  | 
|  1539 ** sqlite3IsIdChar[X] must be 1. |  | 
|  1540 ** |  | 
|  1541 ** Ticket #1066.  the SQL standard does not allow '$' in the |  | 
|  1542 ** middle of identfiers.  But many SQL implementations do.  |  | 
|  1543 ** SQLite will allow '$' in identifiers for compatibility. |  | 
|  1544 ** But the feature is undocumented. |  | 
|  1545 */ |  | 
|  1546 static const char isIdChar[] = { |  | 
|  1547 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |  | 
|  1548     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */ |  | 
|  1549     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */ |  | 
|  1550     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */ |  | 
|  1551     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */ |  | 
|  1552     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */ |  | 
|  1553     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */ |  | 
|  1554 }; |  | 
|  1555 #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) |  | 
|  1556  |  | 
|  1557  |  | 
|  1558 /* |  | 
|  1559 ** Return the length of the token that begins at z[0].  |  | 
|  1560 ** Store the token type in *tokenType before returning. |  | 
|  1561 */ |  | 
|  1562 static int getToken(const char *z, int *tokenType){ |  | 
|  1563   int i, c; |  | 
|  1564   switch( *z ){ |  | 
|  1565     case 0: { |  | 
|  1566       *tokenType = TOKEN_EOF; |  | 
|  1567       return 0; |  | 
|  1568     } |  | 
|  1569     case ' ': case '\t': case '\n': case '\f': case '\r': { |  | 
|  1570       for(i=1; safe_isspace(z[i]); i++){} |  | 
|  1571       *tokenType = TOKEN_SPACE; |  | 
|  1572       return i; |  | 
|  1573     } |  | 
|  1574     case '`': |  | 
|  1575     case '\'': |  | 
|  1576     case '"': { |  | 
|  1577       int delim = z[0]; |  | 
|  1578       for(i=1; (c=z[i])!=0; i++){ |  | 
|  1579         if( c==delim ){ |  | 
|  1580           if( z[i+1]==delim ){ |  | 
|  1581             i++; |  | 
|  1582           }else{ |  | 
|  1583             break; |  | 
|  1584           } |  | 
|  1585         } |  | 
|  1586       } |  | 
|  1587       *tokenType = TOKEN_STRING; |  | 
|  1588       return i + (c!=0); |  | 
|  1589     } |  | 
|  1590     case '[': { |  | 
|  1591       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |  | 
|  1592       *tokenType = TOKEN_ID; |  | 
|  1593       return i; |  | 
|  1594     } |  | 
|  1595     default: { |  | 
|  1596       if( !IdChar(*z) ){ |  | 
|  1597         break; |  | 
|  1598       } |  | 
|  1599       for(i=1; IdChar(z[i]); i++){} |  | 
|  1600       *tokenType = TOKEN_ID; |  | 
|  1601       return i; |  | 
|  1602     } |  | 
|  1603   } |  | 
|  1604   *tokenType = TOKEN_PUNCT; |  | 
|  1605   return 1; |  | 
|  1606 } |  | 
|  1607  |  | 
|  1608 /* |  | 
|  1609 ** A token extracted from a string is an instance of the following |  | 
|  1610 ** structure. |  | 
|  1611 */ |  | 
|  1612 typedef struct Token { |  | 
|  1613   const char *z;       /* Pointer to token text.  Not '\000' terminated */ |  | 
|  1614   short int n;         /* Length of the token text in bytes. */ |  | 
|  1615 } Token; |  | 
|  1616  |  | 
|  1617 /* |  | 
|  1618 ** Given a input string (which is really one of the argv[] parameters |  | 
|  1619 ** passed into xConnect or xCreate) split the string up into tokens. |  | 
|  1620 ** Return an array of pointers to '\000' terminated strings, one string |  | 
|  1621 ** for each non-whitespace token. |  | 
|  1622 ** |  | 
|  1623 ** The returned array is terminated by a single NULL pointer. |  | 
|  1624 ** |  | 
|  1625 ** Space to hold the returned array is obtained from a single |  | 
|  1626 ** malloc and should be freed by passing the return value to free(). |  | 
|  1627 ** The individual strings within the token list are all a part of |  | 
|  1628 ** the single memory allocation and will all be freed at once. |  | 
|  1629 */ |  | 
|  1630 static char **tokenizeString(const char *z, int *pnToken){ |  | 
|  1631   int nToken = 0; |  | 
|  1632   Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) ); |  | 
|  1633   int n = 1; |  | 
|  1634   int e, i; |  | 
|  1635   int totalSize = 0; |  | 
|  1636   char **azToken; |  | 
|  1637   char *zCopy; |  | 
|  1638   while( n>0 ){ |  | 
|  1639     n = getToken(z, &e); |  | 
|  1640     if( e!=TOKEN_SPACE ){ |  | 
|  1641       aToken[nToken].z = z; |  | 
|  1642       aToken[nToken].n = n; |  | 
|  1643       nToken++; |  | 
|  1644       totalSize += n+1; |  | 
|  1645     } |  | 
|  1646     z += n; |  | 
|  1647   } |  | 
|  1648   azToken = (char**)malloc( nToken*sizeof(char*) + totalSize ); |  | 
|  1649   zCopy = (char*)&azToken[nToken]; |  | 
|  1650   nToken--; |  | 
|  1651   for(i=0; i<nToken; i++){ |  | 
|  1652     azToken[i] = zCopy; |  | 
|  1653     n = aToken[i].n; |  | 
|  1654     memcpy(zCopy, aToken[i].z, n); |  | 
|  1655     zCopy[n] = 0; |  | 
|  1656     zCopy += n+1; |  | 
|  1657   } |  | 
|  1658   azToken[nToken] = 0; |  | 
|  1659   free(aToken); |  | 
|  1660   *pnToken = nToken; |  | 
|  1661   return azToken; |  | 
|  1662 } |  | 
|  1663  |  | 
|  1664 /* |  | 
|  1665 ** Convert an SQL-style quoted string into a normal string by removing |  | 
|  1666 ** the quote characters.  The conversion is done in-place.  If the |  | 
|  1667 ** input does not begin with a quote character, then this routine |  | 
|  1668 ** is a no-op. |  | 
|  1669 ** |  | 
|  1670 ** Examples: |  | 
|  1671 ** |  | 
|  1672 **     "abc"   becomes   abc |  | 
|  1673 **     'xyz'   becomes   xyz |  | 
|  1674 **     [pqr]   becomes   pqr |  | 
|  1675 **     `mno`   becomes   mno |  | 
|  1676 */ |  | 
|  1677 static void dequoteString(char *z){ |  | 
|  1678   int quote; |  | 
|  1679   int i, j; |  | 
|  1680   if( z==0 ) return; |  | 
|  1681   quote = z[0]; |  | 
|  1682   switch( quote ){ |  | 
|  1683     case '\'':  break; |  | 
|  1684     case '"':   break; |  | 
|  1685     case '`':   break;                /* For MySQL compatibility */ |  | 
|  1686     case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */ |  | 
|  1687     default:    return; |  | 
|  1688   } |  | 
|  1689   for(i=1, j=0; z[i]; i++){ |  | 
|  1690     if( z[i]==quote ){ |  | 
|  1691       if( z[i+1]==quote ){ |  | 
|  1692         z[j++] = quote; |  | 
|  1693         i++; |  | 
|  1694       }else{ |  | 
|  1695         z[j++] = 0; |  | 
|  1696         break; |  | 
|  1697       } |  | 
|  1698     }else{ |  | 
|  1699       z[j++] = z[i]; |  | 
|  1700     } |  | 
|  1701   } |  | 
|  1702 } |  | 
|  1703  |  | 
|  1704 /* |  | 
|  1705 ** The input azIn is a NULL-terminated list of tokens.  Remove the first |  | 
|  1706 ** token and all punctuation tokens.  Remove the quotes from |  | 
|  1707 ** around string literal tokens. |  | 
|  1708 ** |  | 
|  1709 ** Example: |  | 
|  1710 ** |  | 
|  1711 **     input:      tokenize chinese ( 'simplifed' , 'mixed' ) |  | 
|  1712 **     output:     chinese simplifed mixed |  | 
|  1713 ** |  | 
|  1714 ** Another example: |  | 
|  1715 ** |  | 
|  1716 **     input:      delimiters ( '[' , ']' , '...' ) |  | 
|  1717 **     output:     [ ] ... |  | 
|  1718 */ |  | 
|  1719 static void tokenListToIdList(char **azIn){ |  | 
|  1720   int i, j; |  | 
|  1721   if( azIn ){ |  | 
|  1722     for(i=0, j=-1; azIn[i]; i++){ |  | 
|  1723       if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){ |  | 
|  1724         dequoteString(azIn[i]); |  | 
|  1725         if( j>=0 ){ |  | 
|  1726           azIn[j] = azIn[i]; |  | 
|  1727         } |  | 
|  1728         j++; |  | 
|  1729       } |  | 
|  1730     } |  | 
|  1731     azIn[j] = 0; |  | 
|  1732   } |  | 
|  1733 } |  | 
|  1734  |  | 
|  1735  |  | 
|  1736 /* |  | 
|  1737 ** Find the first alphanumeric token in the string zIn.  Null-terminate |  | 
|  1738 ** this token.  Remove any quotation marks.  And return a pointer to |  | 
|  1739 ** the result. |  | 
|  1740 */ |  | 
|  1741 static char *firstToken(char *zIn, char **pzTail){ |  | 
|  1742   int n, ttype; |  | 
|  1743   while(1){ |  | 
|  1744     n = getToken(zIn, &ttype); |  | 
|  1745     if( ttype==TOKEN_SPACE ){ |  | 
|  1746       zIn += n; |  | 
|  1747     }else if( ttype==TOKEN_EOF ){ |  | 
|  1748       *pzTail = zIn; |  | 
|  1749       return 0; |  | 
|  1750     }else{ |  | 
|  1751       zIn[n] = 0; |  | 
|  1752       *pzTail = &zIn[1]; |  | 
|  1753       dequoteString(zIn); |  | 
|  1754       return zIn; |  | 
|  1755     } |  | 
|  1756   } |  | 
|  1757   /*NOTREACHED*/ |  | 
|  1758 } |  | 
|  1759  |  | 
|  1760 /* Return true if... |  | 
|  1761 ** |  | 
|  1762 **   *  s begins with the string t, ignoring case |  | 
|  1763 **   *  s is longer than t |  | 
|  1764 **   *  The first character of s beyond t is not a alphanumeric |  | 
|  1765 **  |  | 
|  1766 ** Ignore leading space in *s. |  | 
|  1767 ** |  | 
|  1768 ** To put it another way, return true if the first token of |  | 
|  1769 ** s[] is t[]. |  | 
|  1770 */ |  | 
|  1771 static int startsWith(const char *s, const char *t){ |  | 
|  1772   while( safe_isspace(*s) ){ s++; } |  | 
|  1773   while( *t ){ |  | 
|  1774     if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; |  | 
|  1775   } |  | 
|  1776   return *s!='_' && !safe_isalnum(*s); |  | 
|  1777 } |  | 
|  1778  |  | 
|  1779 /* |  | 
|  1780 ** An instance of this structure defines the "spec" of a |  | 
|  1781 ** full text index.  This structure is populated by parseSpec |  | 
|  1782 ** and use by fulltextConnect and fulltextCreate. |  | 
|  1783 */ |  | 
|  1784 typedef struct TableSpec { |  | 
|  1785   const char *zDb;         /* Logical database name */ |  | 
|  1786   const char *zName;       /* Name of the full-text index */ |  | 
|  1787   int nColumn;             /* Number of columns to be indexed */ |  | 
|  1788   char **azColumn;         /* Original names of columns to be indexed */ |  | 
|  1789   char **azContentColumn;  /* Column names for %_content */ |  | 
|  1790   char **azTokenizer;      /* Name of tokenizer and its arguments */ |  | 
|  1791 } TableSpec; |  | 
|  1792  |  | 
|  1793 /* |  | 
|  1794 ** Reclaim all of the memory used by a TableSpec |  | 
|  1795 */ |  | 
|  1796 static void clearTableSpec(TableSpec *p) { |  | 
|  1797   free(p->azColumn); |  | 
|  1798   free(p->azContentColumn); |  | 
|  1799   free(p->azTokenizer); |  | 
|  1800 } |  | 
|  1801  |  | 
|  1802 /* Parse a CREATE VIRTUAL TABLE statement, which looks like this: |  | 
|  1803  * |  | 
|  1804  * CREATE VIRTUAL TABLE email |  | 
|  1805  *        USING fts1(subject, body, tokenize mytokenizer(myarg)) |  | 
|  1806  * |  | 
|  1807  * We return parsed information in a TableSpec structure. |  | 
|  1808  *  |  | 
|  1809  */ |  | 
|  1810 static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, |  | 
|  1811                      char**pzErr){ |  | 
|  1812   int i, n; |  | 
|  1813   char *z, *zDummy; |  | 
|  1814   char **azArg; |  | 
|  1815   const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */ |  | 
|  1816  |  | 
|  1817   assert( argc>=3 ); |  | 
|  1818   /* Current interface: |  | 
|  1819   ** argv[0] - module name |  | 
|  1820   ** argv[1] - database name |  | 
|  1821   ** argv[2] - table name |  | 
|  1822   ** argv[3..] - columns, optionally followed by tokenizer specification |  | 
|  1823   **             and snippet delimiters specification. |  | 
|  1824   */ |  | 
|  1825  |  | 
|  1826   /* Make a copy of the complete argv[][] array in a single allocation. |  | 
|  1827   ** The argv[][] array is read-only and transient.  We can write to the |  | 
|  1828   ** copy in order to modify things and the copy is persistent. |  | 
|  1829   */ |  | 
|  1830   memset(pSpec, 0, sizeof(*pSpec)); |  | 
|  1831   for(i=n=0; i<argc; i++){ |  | 
|  1832     n += strlen(argv[i]) + 1; |  | 
|  1833   } |  | 
|  1834   azArg = malloc( sizeof(char*)*argc + n ); |  | 
|  1835   if( azArg==0 ){ |  | 
|  1836     return SQLITE_NOMEM; |  | 
|  1837   } |  | 
|  1838   z = (char*)&azArg[argc]; |  | 
|  1839   for(i=0; i<argc; i++){ |  | 
|  1840     azArg[i] = z; |  | 
|  1841     strcpy(z, argv[i]); |  | 
|  1842     z += strlen(z)+1; |  | 
|  1843   } |  | 
|  1844  |  | 
|  1845   /* Identify the column names and the tokenizer and delimiter arguments |  | 
|  1846   ** in the argv[][] array. |  | 
|  1847   */ |  | 
|  1848   pSpec->zDb = azArg[1]; |  | 
|  1849   pSpec->zName = azArg[2]; |  | 
|  1850   pSpec->nColumn = 0; |  | 
|  1851   pSpec->azColumn = azArg; |  | 
|  1852   zTokenizer = "tokenize simple"; |  | 
|  1853   for(i=3; i<argc; ++i){ |  | 
|  1854     if( startsWith(azArg[i],"tokenize") ){ |  | 
|  1855       zTokenizer = azArg[i]; |  | 
|  1856     }else{ |  | 
|  1857       z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy); |  | 
|  1858       pSpec->nColumn++; |  | 
|  1859     } |  | 
|  1860   } |  | 
|  1861   if( pSpec->nColumn==0 ){ |  | 
|  1862     azArg[0] = "content"; |  | 
|  1863     pSpec->nColumn = 1; |  | 
|  1864   } |  | 
|  1865  |  | 
|  1866   /* |  | 
|  1867   ** Construct the list of content column names. |  | 
|  1868   ** |  | 
|  1869   ** Each content column name will be of the form cNNAAAA |  | 
|  1870   ** where NN is the column number and AAAA is the sanitized |  | 
|  1871   ** column name.  "sanitized" means that special characters are |  | 
|  1872   ** converted to "_".  The cNN prefix guarantees that all column |  | 
|  1873   ** names are unique. |  | 
|  1874   ** |  | 
|  1875   ** The AAAA suffix is not strictly necessary.  It is included |  | 
|  1876   ** for the convenience of people who might examine the generated |  | 
|  1877   ** %_content table and wonder what the columns are used for. |  | 
|  1878   */ |  | 
|  1879   pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) ); |  | 
|  1880   if( pSpec->azContentColumn==0 ){ |  | 
|  1881     clearTableSpec(pSpec); |  | 
|  1882     return SQLITE_NOMEM; |  | 
|  1883   } |  | 
|  1884   for(i=0; i<pSpec->nColumn; i++){ |  | 
|  1885     char *p; |  | 
|  1886     pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); |  | 
|  1887     for (p = pSpec->azContentColumn[i]; *p ; ++p) { |  | 
|  1888       if( !safe_isalnum(*p) ) *p = '_'; |  | 
|  1889     } |  | 
|  1890   } |  | 
|  1891  |  | 
|  1892   /* |  | 
|  1893   ** Parse the tokenizer specification string. |  | 
|  1894   */ |  | 
|  1895   pSpec->azTokenizer = tokenizeString(zTokenizer, &n); |  | 
|  1896   tokenListToIdList(pSpec->azTokenizer); |  | 
|  1897  |  | 
|  1898   return SQLITE_OK; |  | 
|  1899 } |  | 
|  1900  |  | 
|  1901 /* |  | 
|  1902 ** Generate a CREATE TABLE statement that describes the schema of |  | 
|  1903 ** the virtual table.  Return a pointer to this schema string. |  | 
|  1904 ** |  | 
|  1905 ** Space is obtained from sqlite3_mprintf() and should be freed |  | 
|  1906 ** using sqlite3_free(). |  | 
|  1907 */ |  | 
|  1908 static char *fulltextSchema( |  | 
|  1909   int nColumn,                  /* Number of columns */ |  | 
|  1910   const char *const* azColumn,  /* List of columns */ |  | 
|  1911   const char *zTableName        /* Name of the table */ |  | 
|  1912 ){ |  | 
|  1913   int i; |  | 
|  1914   char *zSchema, *zNext; |  | 
|  1915   const char *zSep = "("; |  | 
|  1916   zSchema = sqlite3_mprintf("CREATE TABLE x"); |  | 
|  1917   for(i=0; i<nColumn; i++){ |  | 
|  1918     zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]); |  | 
|  1919     sqlite3_free(zSchema); |  | 
|  1920     zSchema = zNext; |  | 
|  1921     zSep = ","; |  | 
|  1922   } |  | 
|  1923   zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName); |  | 
|  1924   sqlite3_free(zSchema); |  | 
|  1925   return zNext; |  | 
|  1926 } |  | 
|  1927  |  | 
|  1928 /* |  | 
|  1929 ** Build a new sqlite3_vtab structure that will describe the |  | 
|  1930 ** fulltext index defined by spec. |  | 
|  1931 */ |  | 
|  1932 static int constructVtab( |  | 
|  1933   sqlite3 *db,              /* The SQLite database connection */ |  | 
|  1934   TableSpec *spec,          /* Parsed spec information from parseSpec() */ |  | 
|  1935   sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */ |  | 
|  1936   char **pzErr              /* Write any error message here */ |  | 
|  1937 ){ |  | 
|  1938   int rc; |  | 
|  1939   int n; |  | 
|  1940   fulltext_vtab *v = 0; |  | 
|  1941   const sqlite3_tokenizer_module *m = NULL; |  | 
|  1942   char *schema; |  | 
|  1943  |  | 
|  1944   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); |  | 
|  1945   if( v==0 ) return SQLITE_NOMEM; |  | 
|  1946   memset(v, 0, sizeof(*v)); |  | 
|  1947   /* sqlite will initialize v->base */ |  | 
|  1948   v->db = db; |  | 
|  1949   v->zDb = spec->zDb;       /* Freed when azColumn is freed */ |  | 
|  1950   v->zName = spec->zName;   /* Freed when azColumn is freed */ |  | 
|  1951   v->nColumn = spec->nColumn; |  | 
|  1952   v->azContentColumn = spec->azContentColumn; |  | 
|  1953   spec->azContentColumn = 0; |  | 
|  1954   v->azColumn = spec->azColumn; |  | 
|  1955   spec->azColumn = 0; |  | 
|  1956  |  | 
|  1957   if( spec->azTokenizer==0 ){ |  | 
|  1958     return SQLITE_NOMEM; |  | 
|  1959   } |  | 
|  1960   /* TODO(shess) For now, add new tokenizers as else if clauses. */ |  | 
|  1961   if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ |  | 
|  1962     sqlite3Fts1SimpleTokenizerModule(&m); |  | 
|  1963   }else if( startsWith(spec->azTokenizer[0], "porter") ){ |  | 
|  1964     sqlite3Fts1PorterTokenizerModule(&m); |  | 
|  1965   }else{ |  | 
|  1966     *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); |  | 
|  1967     rc = SQLITE_ERROR; |  | 
|  1968     goto err; |  | 
|  1969   } |  | 
|  1970   for(n=0; spec->azTokenizer[n]; n++){} |  | 
|  1971   if( n ){ |  | 
|  1972     rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], |  | 
|  1973                     &v->pTokenizer); |  | 
|  1974   }else{ |  | 
|  1975     rc = m->xCreate(0, 0, &v->pTokenizer); |  | 
|  1976   } |  | 
|  1977   if( rc!=SQLITE_OK ) goto err; |  | 
|  1978   v->pTokenizer->pModule = m; |  | 
|  1979  |  | 
|  1980   /* TODO: verify the existence of backing tables foo_content, foo_term */ |  | 
|  1981  |  | 
|  1982   schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, |  | 
|  1983                           spec->zName); |  | 
|  1984   rc = sqlite3_declare_vtab(db, schema); |  | 
|  1985   sqlite3_free(schema); |  | 
|  1986   if( rc!=SQLITE_OK ) goto err; |  | 
|  1987  |  | 
|  1988   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); |  | 
|  1989  |  | 
|  1990   *ppVTab = &v->base; |  | 
|  1991   TRACE(("FTS1 Connect %p\n", v)); |  | 
|  1992  |  | 
|  1993   return rc; |  | 
|  1994  |  | 
|  1995 err: |  | 
|  1996   fulltext_vtab_destroy(v); |  | 
|  1997   return rc; |  | 
|  1998 } |  | 
|  1999  |  | 
|  2000 static int fulltextConnect( |  | 
|  2001   sqlite3 *db, |  | 
|  2002   void *pAux, |  | 
|  2003   int argc, const char *const*argv, |  | 
|  2004   sqlite3_vtab **ppVTab, |  | 
|  2005   char **pzErr |  | 
|  2006 ){ |  | 
|  2007   TableSpec spec; |  | 
|  2008   int rc = parseSpec(&spec, argc, argv, pzErr); |  | 
|  2009   if( rc!=SQLITE_OK ) return rc; |  | 
|  2010  |  | 
|  2011   rc = constructVtab(db, &spec, ppVTab, pzErr); |  | 
|  2012   clearTableSpec(&spec); |  | 
|  2013   return rc; |  | 
|  2014 } |  | 
|  2015  |  | 
|  2016   /* The %_content table holds the text of each document, with |  | 
|  2017   ** the rowid used as the docid. |  | 
|  2018   ** |  | 
|  2019   ** The %_term table maps each term to a document list blob |  | 
|  2020   ** containing elements sorted by ascending docid, each element |  | 
|  2021   ** encoded as: |  | 
|  2022   ** |  | 
|  2023   **   docid varint-encoded |  | 
|  2024   **   token elements: |  | 
|  2025   **     position+1 varint-encoded as delta from previous position |  | 
|  2026   **     start offset varint-encoded as delta from previous start offset |  | 
|  2027   **     end offset varint-encoded as delta from start offset |  | 
|  2028   ** |  | 
|  2029   ** The sentinel position of 0 indicates the end of the token list. |  | 
|  2030   ** |  | 
|  2031   ** Additionally, doclist blobs are chunked into multiple segments, |  | 
|  2032   ** using segment to order the segments.  New elements are added to |  | 
|  2033   ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then |  | 
|  2034   ** segment 0 is deleted, and the doclist is inserted at segment 1. |  | 
|  2035   ** If there is already a doclist at segment 1, the segment 0 doclist |  | 
|  2036   ** is merged with it, the segment 1 doclist is deleted, and the |  | 
|  2037   ** merged doclist is inserted at segment 2, repeating those |  | 
|  2038   ** operations until an insert succeeds. |  | 
|  2039   ** |  | 
|  2040   ** Since this structure doesn't allow us to update elements in place |  | 
|  2041   ** in case of deletion or update, these are simply written to |  | 
|  2042   ** segment 0 (with an empty token list in case of deletion), with |  | 
|  2043   ** docListAccumulate() taking care to retain lower-segment |  | 
|  2044   ** information in preference to higher-segment information. |  | 
|  2045   */ |  | 
|  2046   /* TODO(shess) Provide a VACUUM type operation which both removes |  | 
|  2047   ** deleted elements which are no longer necessary, and duplicated |  | 
|  2048   ** elements.  I suspect this will probably not be necessary in |  | 
|  2049   ** practice, though. |  | 
|  2050   */ |  | 
|  2051 static int fulltextCreate(sqlite3 *db, void *pAux, |  | 
|  2052                           int argc, const char * const *argv, |  | 
|  2053                           sqlite3_vtab **ppVTab, char **pzErr){ |  | 
|  2054   int rc; |  | 
|  2055   TableSpec spec; |  | 
|  2056   StringBuffer schema; |  | 
|  2057   TRACE(("FTS1 Create\n")); |  | 
|  2058  |  | 
|  2059   rc = parseSpec(&spec, argc, argv, pzErr); |  | 
|  2060   if( rc!=SQLITE_OK ) return rc; |  | 
|  2061  |  | 
|  2062   initStringBuffer(&schema); |  | 
|  2063   append(&schema, "CREATE TABLE %_content("); |  | 
|  2064   appendList(&schema, spec.nColumn, spec.azContentColumn); |  | 
|  2065   append(&schema, ")"); |  | 
|  2066   rc = sql_exec(db, spec.zDb, spec.zName, schema.s); |  | 
|  2067   free(schema.s); |  | 
|  2068   if( rc!=SQLITE_OK ) goto out; |  | 
|  2069  |  | 
|  2070   rc = sql_exec(db, spec.zDb, spec.zName, |  | 
|  2071     "create table %_term(term text, segment integer, doclist blob, " |  | 
|  2072                         "primary key(term, segment));"); |  | 
|  2073   if( rc!=SQLITE_OK ) goto out; |  | 
|  2074  |  | 
|  2075   rc = constructVtab(db, &spec, ppVTab, pzErr); |  | 
|  2076  |  | 
|  2077 out: |  | 
|  2078   clearTableSpec(&spec); |  | 
|  2079   return rc; |  | 
|  2080 } |  | 
|  2081  |  | 
|  2082 /* Decide how to handle an SQL query. */ |  | 
|  2083 static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |  | 
|  2084   int i; |  | 
|  2085   TRACE(("FTS1 BestIndex\n")); |  | 
|  2086  |  | 
|  2087   for(i=0; i<pInfo->nConstraint; ++i){ |  | 
|  2088     const struct sqlite3_index_constraint *pConstraint; |  | 
|  2089     pConstraint = &pInfo->aConstraint[i]; |  | 
|  2090     if( pConstraint->usable ) { |  | 
|  2091       if( pConstraint->iColumn==-1 && |  | 
|  2092           pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ |  | 
|  2093         pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */ |  | 
|  2094         TRACE(("FTS1 QUERY_ROWID\n")); |  | 
|  2095       } else if( pConstraint->iColumn>=0 && |  | 
|  2096                  pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ |  | 
|  2097         /* full-text search */ |  | 
|  2098         pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; |  | 
|  2099         TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); |  | 
|  2100       } else continue; |  | 
|  2101  |  | 
|  2102       pInfo->aConstraintUsage[i].argvIndex = 1; |  | 
|  2103       pInfo->aConstraintUsage[i].omit = 1; |  | 
|  2104  |  | 
|  2105       /* An arbitrary value for now. |  | 
|  2106        * TODO: Perhaps rowid matches should be considered cheaper than |  | 
|  2107        * full-text searches. */ |  | 
|  2108       pInfo->estimatedCost = 1.0;    |  | 
|  2109  |  | 
|  2110       return SQLITE_OK; |  | 
|  2111     } |  | 
|  2112   } |  | 
|  2113   pInfo->idxNum = QUERY_GENERIC; |  | 
|  2114   return SQLITE_OK; |  | 
|  2115 } |  | 
|  2116  |  | 
|  2117 static int fulltextDisconnect(sqlite3_vtab *pVTab){ |  | 
|  2118   TRACE(("FTS1 Disconnect %p\n", pVTab)); |  | 
|  2119   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|  2120   return SQLITE_OK; |  | 
|  2121 } |  | 
|  2122  |  | 
|  2123 static int fulltextDestroy(sqlite3_vtab *pVTab){ |  | 
|  2124   fulltext_vtab *v = (fulltext_vtab *)pVTab; |  | 
|  2125   int rc; |  | 
|  2126  |  | 
|  2127   TRACE(("FTS1 Destroy %p\n", pVTab)); |  | 
|  2128   rc = sql_exec(v->db, v->zDb, v->zName, |  | 
|  2129                 "drop table if exists %_content;" |  | 
|  2130                 "drop table if exists %_term;" |  | 
|  2131                 ); |  | 
|  2132   if( rc!=SQLITE_OK ) return rc; |  | 
|  2133  |  | 
|  2134   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|  2135   return SQLITE_OK; |  | 
|  2136 } |  | 
|  2137  |  | 
|  2138 static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ |  | 
|  2139   fulltext_cursor *c; |  | 
|  2140  |  | 
|  2141   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); |  | 
|  2142   /* sqlite will initialize c->base */ |  | 
|  2143   *ppCursor = &c->base; |  | 
|  2144   TRACE(("FTS1 Open %p: %p\n", pVTab, c)); |  | 
|  2145  |  | 
|  2146   return SQLITE_OK; |  | 
|  2147 } |  | 
|  2148  |  | 
|  2149  |  | 
|  2150 /* Free all of the dynamically allocated memory held by *q |  | 
|  2151 */ |  | 
|  2152 static void queryClear(Query *q){ |  | 
|  2153   int i; |  | 
|  2154   for(i = 0; i < q->nTerms; ++i){ |  | 
|  2155     free(q->pTerms[i].pTerm); |  | 
|  2156   } |  | 
|  2157   free(q->pTerms); |  | 
|  2158   memset(q, 0, sizeof(*q)); |  | 
|  2159 } |  | 
|  2160  |  | 
|  2161 /* Free all of the dynamically allocated memory held by the |  | 
|  2162 ** Snippet |  | 
|  2163 */ |  | 
|  2164 static void snippetClear(Snippet *p){ |  | 
|  2165   free(p->aMatch); |  | 
|  2166   free(p->zOffset); |  | 
|  2167   free(p->zSnippet); |  | 
|  2168   memset(p, 0, sizeof(*p)); |  | 
|  2169 } |  | 
|  2170 /* |  | 
|  2171 ** Append a single entry to the p->aMatch[] log. |  | 
|  2172 */ |  | 
|  2173 static void snippetAppendMatch( |  | 
|  2174   Snippet *p,               /* Append the entry to this snippet */ |  | 
|  2175   int iCol, int iTerm,      /* The column and query term */ |  | 
|  2176   int iStart, int nByte     /* Offset and size of the match */ |  | 
|  2177 ){ |  | 
|  2178   int i; |  | 
|  2179   struct snippetMatch *pMatch; |  | 
|  2180   if( p->nMatch+1>=p->nAlloc ){ |  | 
|  2181     p->nAlloc = p->nAlloc*2 + 10; |  | 
|  2182     p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); |  | 
|  2183     if( p->aMatch==0 ){ |  | 
|  2184       p->nMatch = 0; |  | 
|  2185       p->nAlloc = 0; |  | 
|  2186       return; |  | 
|  2187     } |  | 
|  2188   } |  | 
|  2189   i = p->nMatch++; |  | 
|  2190   pMatch = &p->aMatch[i]; |  | 
|  2191   pMatch->iCol = iCol; |  | 
|  2192   pMatch->iTerm = iTerm; |  | 
|  2193   pMatch->iStart = iStart; |  | 
|  2194   pMatch->nByte = nByte; |  | 
|  2195 } |  | 
|  2196  |  | 
|  2197 /* |  | 
|  2198 ** Sizing information for the circular buffer used in snippetOffsetsOfColumn() |  | 
|  2199 */ |  | 
|  2200 #define FTS1_ROTOR_SZ   (32) |  | 
|  2201 #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1) |  | 
|  2202  |  | 
|  2203 /* |  | 
|  2204 ** Add entries to pSnippet->aMatch[] for every match that occurs against |  | 
|  2205 ** document zDoc[0..nDoc-1] which is stored in column iColumn. |  | 
|  2206 */ |  | 
|  2207 static void snippetOffsetsOfColumn( |  | 
|  2208   Query *pQuery, |  | 
|  2209   Snippet *pSnippet, |  | 
|  2210   int iColumn, |  | 
|  2211   const char *zDoc, |  | 
|  2212   int nDoc |  | 
|  2213 ){ |  | 
|  2214   const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */ |  | 
|  2215   sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */ |  | 
|  2216   sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */ |  | 
|  2217   fulltext_vtab *pVtab;                /* The full text index */ |  | 
|  2218   int nColumn;                         /* Number of columns in the index */ |  | 
|  2219   const QueryTerm *aTerm;              /* Query string terms */ |  | 
|  2220   int nTerm;                           /* Number of query string terms */   |  | 
|  2221   int i, j;                            /* Loop counters */ |  | 
|  2222   int rc;                              /* Return code */ |  | 
|  2223   unsigned int match, prevMatch;       /* Phrase search bitmasks */ |  | 
|  2224   const char *zToken;                  /* Next token from the tokenizer */ |  | 
|  2225   int nToken;                          /* Size of zToken */ |  | 
|  2226   int iBegin, iEnd, iPos;              /* Offsets of beginning and end */ |  | 
|  2227  |  | 
|  2228   /* The following variables keep a circular buffer of the last |  | 
|  2229   ** few tokens */ |  | 
|  2230   unsigned int iRotor = 0;             /* Index of current token */ |  | 
|  2231   int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */ |  | 
|  2232   int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */ |  | 
|  2233  |  | 
|  2234   pVtab = pQuery->pFts; |  | 
|  2235   nColumn = pVtab->nColumn; |  | 
|  2236   pTokenizer = pVtab->pTokenizer; |  | 
|  2237   pTModule = pTokenizer->pModule; |  | 
|  2238   rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); |  | 
|  2239   if( rc ) return; |  | 
|  2240   pTCursor->pTokenizer = pTokenizer; |  | 
|  2241   aTerm = pQuery->pTerms; |  | 
|  2242   nTerm = pQuery->nTerms; |  | 
|  2243   if( nTerm>=FTS1_ROTOR_SZ ){ |  | 
|  2244     nTerm = FTS1_ROTOR_SZ - 1; |  | 
|  2245   } |  | 
|  2246   prevMatch = 0; |  | 
|  2247   while(1){ |  | 
|  2248     rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); |  | 
|  2249     if( rc ) break; |  | 
|  2250     iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin; |  | 
|  2251     iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin; |  | 
|  2252     match = 0; |  | 
|  2253     for(i=0; i<nTerm; i++){ |  | 
|  2254       int iCol; |  | 
|  2255       iCol = aTerm[i].iColumn; |  | 
|  2256       if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue; |  | 
|  2257       if( aTerm[i].nTerm!=nToken ) continue; |  | 
|  2258       if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue; |  | 
|  2259       if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue; |  | 
|  2260       match |= 1<<i; |  | 
|  2261       if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){ |  | 
|  2262         for(j=aTerm[i].iPhrase-1; j>=0; j--){ |  | 
|  2263           int k = (iRotor-j) & FTS1_ROTOR_MASK; |  | 
|  2264           snippetAppendMatch(pSnippet, iColumn, i-j, |  | 
|  2265                 iRotorBegin[k], iRotorLen[k]); |  | 
|  2266         } |  | 
|  2267       } |  | 
|  2268     } |  | 
|  2269     prevMatch = match<<1; |  | 
|  2270     iRotor++; |  | 
|  2271   } |  | 
|  2272   pTModule->xClose(pTCursor);   |  | 
|  2273 } |  | 
|  2274  |  | 
|  2275  |  | 
|  2276 /* |  | 
|  2277 ** Compute all offsets for the current row of the query.   |  | 
|  2278 ** If the offsets have already been computed, this routine is a no-op. |  | 
|  2279 */ |  | 
|  2280 static void snippetAllOffsets(fulltext_cursor *p){ |  | 
|  2281   int nColumn; |  | 
|  2282   int iColumn, i; |  | 
|  2283   int iFirst, iLast; |  | 
|  2284   fulltext_vtab *pFts; |  | 
|  2285  |  | 
|  2286   if( p->snippet.nMatch ) return; |  | 
|  2287   if( p->q.nTerms==0 ) return; |  | 
|  2288   pFts = p->q.pFts; |  | 
|  2289   nColumn = pFts->nColumn; |  | 
|  2290   iColumn = p->iCursorType - QUERY_FULLTEXT; |  | 
|  2291   if( iColumn<0 || iColumn>=nColumn ){ |  | 
|  2292     iFirst = 0; |  | 
|  2293     iLast = nColumn-1; |  | 
|  2294   }else{ |  | 
|  2295     iFirst = iColumn; |  | 
|  2296     iLast = iColumn; |  | 
|  2297   } |  | 
|  2298   for(i=iFirst; i<=iLast; i++){ |  | 
|  2299     const char *zDoc; |  | 
|  2300     int nDoc; |  | 
|  2301     zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); |  | 
|  2302     nDoc = sqlite3_column_bytes(p->pStmt, i+1); |  | 
|  2303     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); |  | 
|  2304   } |  | 
|  2305 } |  | 
|  2306  |  | 
|  2307 /* |  | 
|  2308 ** Convert the information in the aMatch[] array of the snippet |  | 
|  2309 ** into the string zOffset[0..nOffset-1]. |  | 
|  2310 */ |  | 
|  2311 static void snippetOffsetText(Snippet *p){ |  | 
|  2312   int i; |  | 
|  2313   int cnt = 0; |  | 
|  2314   StringBuffer sb; |  | 
|  2315   char zBuf[200]; |  | 
|  2316   if( p->zOffset ) return; |  | 
|  2317   initStringBuffer(&sb); |  | 
|  2318   for(i=0; i<p->nMatch; i++){ |  | 
|  2319     struct snippetMatch *pMatch = &p->aMatch[i]; |  | 
|  2320     zBuf[0] = ' '; |  | 
|  2321     sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", |  | 
|  2322         pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); |  | 
|  2323     append(&sb, zBuf); |  | 
|  2324     cnt++; |  | 
|  2325   } |  | 
|  2326   p->zOffset = sb.s; |  | 
|  2327   p->nOffset = sb.len; |  | 
|  2328 } |  | 
|  2329  |  | 
|  2330 /* |  | 
|  2331 ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set |  | 
|  2332 ** of matching words some of which might be in zDoc.  zDoc is column |  | 
|  2333 ** number iCol. |  | 
|  2334 ** |  | 
|  2335 ** iBreak is suggested spot in zDoc where we could begin or end an |  | 
|  2336 ** excerpt.  Return a value similar to iBreak but possibly adjusted |  | 
|  2337 ** to be a little left or right so that the break point is better. |  | 
|  2338 */ |  | 
|  2339 static int wordBoundary( |  | 
|  2340   int iBreak,                   /* The suggested break point */ |  | 
|  2341   const char *zDoc,             /* Document text */ |  | 
|  2342   int nDoc,                     /* Number of bytes in zDoc[] */ |  | 
|  2343   struct snippetMatch *aMatch,  /* Matching words */ |  | 
|  2344   int nMatch,                   /* Number of entries in aMatch[] */ |  | 
|  2345   int iCol                      /* The column number for zDoc[] */ |  | 
|  2346 ){ |  | 
|  2347   int i; |  | 
|  2348   if( iBreak<=10 ){ |  | 
|  2349     return 0; |  | 
|  2350   } |  | 
|  2351   if( iBreak>=nDoc-10 ){ |  | 
|  2352     return nDoc; |  | 
|  2353   } |  | 
|  2354   for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){} |  | 
|  2355   while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; } |  | 
|  2356   if( i<nMatch ){ |  | 
|  2357     if( aMatch[i].iStart<iBreak+10 ){ |  | 
|  2358       return aMatch[i].iStart; |  | 
|  2359     } |  | 
|  2360     if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ |  | 
|  2361       return aMatch[i-1].iStart; |  | 
|  2362     } |  | 
|  2363   } |  | 
|  2364   for(i=1; i<=10; i++){ |  | 
|  2365     if( safe_isspace(zDoc[iBreak-i]) ){ |  | 
|  2366       return iBreak - i + 1; |  | 
|  2367     } |  | 
|  2368     if( safe_isspace(zDoc[iBreak+i]) ){ |  | 
|  2369       return iBreak + i + 1; |  | 
|  2370     } |  | 
|  2371   } |  | 
|  2372   return iBreak; |  | 
|  2373 } |  | 
|  2374  |  | 
|  2375 /* |  | 
|  2376 ** If the StringBuffer does not end in white space, add a single |  | 
|  2377 ** space character to the end. |  | 
|  2378 */ |  | 
|  2379 static void appendWhiteSpace(StringBuffer *p){ |  | 
|  2380   if( p->len==0 ) return; |  | 
|  2381   if( safe_isspace(p->s[p->len-1]) ) return; |  | 
|  2382   append(p, " "); |  | 
|  2383 } |  | 
|  2384  |  | 
|  2385 /* |  | 
|  2386 ** Remove white space from teh end of the StringBuffer |  | 
|  2387 */ |  | 
|  2388 static void trimWhiteSpace(StringBuffer *p){ |  | 
|  2389   while( p->len>0 && safe_isspace(p->s[p->len-1]) ){ |  | 
|  2390     p->len--; |  | 
|  2391   } |  | 
|  2392 } |  | 
|  2393  |  | 
|  2394  |  | 
|  2395  |  | 
|  2396 /* |  | 
|  2397 ** Allowed values for Snippet.aMatch[].snStatus |  | 
|  2398 */ |  | 
|  2399 #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */ |  | 
|  2400 #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */ |  | 
|  2401  |  | 
|  2402 /* |  | 
|  2403 ** Generate the text of a snippet. |  | 
|  2404 */ |  | 
|  2405 static void snippetText( |  | 
|  2406   fulltext_cursor *pCursor,   /* The cursor we need the snippet for */ |  | 
|  2407   const char *zStartMark,     /* Markup to appear before each match */ |  | 
|  2408   const char *zEndMark,       /* Markup to appear after each match */ |  | 
|  2409   const char *zEllipsis       /* Ellipsis mark */ |  | 
|  2410 ){ |  | 
|  2411   int i, j; |  | 
|  2412   struct snippetMatch *aMatch; |  | 
|  2413   int nMatch; |  | 
|  2414   int nDesired; |  | 
|  2415   StringBuffer sb; |  | 
|  2416   int tailCol; |  | 
|  2417   int tailOffset; |  | 
|  2418   int iCol; |  | 
|  2419   int nDoc; |  | 
|  2420   const char *zDoc; |  | 
|  2421   int iStart, iEnd; |  | 
|  2422   int tailEllipsis = 0; |  | 
|  2423   int iMatch; |  | 
|  2424    |  | 
|  2425  |  | 
|  2426   free(pCursor->snippet.zSnippet); |  | 
|  2427   pCursor->snippet.zSnippet = 0; |  | 
|  2428   aMatch = pCursor->snippet.aMatch; |  | 
|  2429   nMatch = pCursor->snippet.nMatch; |  | 
|  2430   initStringBuffer(&sb); |  | 
|  2431  |  | 
|  2432   for(i=0; i<nMatch; i++){ |  | 
|  2433     aMatch[i].snStatus = SNIPPET_IGNORE; |  | 
|  2434   } |  | 
|  2435   nDesired = 0; |  | 
|  2436   for(i=0; i<pCursor->q.nTerms; i++){ |  | 
|  2437     for(j=0; j<nMatch; j++){ |  | 
|  2438       if( aMatch[j].iTerm==i ){ |  | 
|  2439         aMatch[j].snStatus = SNIPPET_DESIRED; |  | 
|  2440         nDesired++; |  | 
|  2441         break; |  | 
|  2442       } |  | 
|  2443     } |  | 
|  2444   } |  | 
|  2445  |  | 
|  2446   iMatch = 0; |  | 
|  2447   tailCol = -1; |  | 
|  2448   tailOffset = 0; |  | 
|  2449   for(i=0; i<nMatch && nDesired>0; i++){ |  | 
|  2450     if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; |  | 
|  2451     nDesired--; |  | 
|  2452     iCol = aMatch[i].iCol; |  | 
|  2453     zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); |  | 
|  2454     nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); |  | 
|  2455     iStart = aMatch[i].iStart - 40; |  | 
|  2456     iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); |  | 
|  2457     if( iStart<=10 ){ |  | 
|  2458       iStart = 0; |  | 
|  2459     } |  | 
|  2460     if( iCol==tailCol && iStart<=tailOffset+20 ){ |  | 
|  2461       iStart = tailOffset; |  | 
|  2462     } |  | 
|  2463     if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ |  | 
|  2464       trimWhiteSpace(&sb); |  | 
|  2465       appendWhiteSpace(&sb); |  | 
|  2466       append(&sb, zEllipsis); |  | 
|  2467       appendWhiteSpace(&sb); |  | 
|  2468     } |  | 
|  2469     iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; |  | 
|  2470     iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); |  | 
|  2471     if( iEnd>=nDoc-10 ){ |  | 
|  2472       iEnd = nDoc; |  | 
|  2473       tailEllipsis = 0; |  | 
|  2474     }else{ |  | 
|  2475       tailEllipsis = 1; |  | 
|  2476     } |  | 
|  2477     while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; } |  | 
|  2478     while( iStart<iEnd ){ |  | 
|  2479       while( iMatch<nMatch && aMatch[iMatch].iStart<iStart |  | 
|  2480              && aMatch[iMatch].iCol<=iCol ){ |  | 
|  2481         iMatch++; |  | 
|  2482       } |  | 
|  2483       if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd |  | 
|  2484              && aMatch[iMatch].iCol==iCol ){ |  | 
|  2485         nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart); |  | 
|  2486         iStart = aMatch[iMatch].iStart; |  | 
|  2487         append(&sb, zStartMark); |  | 
|  2488         nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte); |  | 
|  2489         append(&sb, zEndMark); |  | 
|  2490         iStart += aMatch[iMatch].nByte; |  | 
|  2491         for(j=iMatch+1; j<nMatch; j++){ |  | 
|  2492           if( aMatch[j].iTerm==aMatch[iMatch].iTerm |  | 
|  2493               && aMatch[j].snStatus==SNIPPET_DESIRED ){ |  | 
|  2494             nDesired--; |  | 
|  2495             aMatch[j].snStatus = SNIPPET_IGNORE; |  | 
|  2496           } |  | 
|  2497         } |  | 
|  2498       }else{ |  | 
|  2499         nappend(&sb, &zDoc[iStart], iEnd - iStart); |  | 
|  2500         iStart = iEnd; |  | 
|  2501       } |  | 
|  2502     } |  | 
|  2503     tailCol = iCol; |  | 
|  2504     tailOffset = iEnd; |  | 
|  2505   } |  | 
|  2506   trimWhiteSpace(&sb); |  | 
|  2507   if( tailEllipsis ){ |  | 
|  2508     appendWhiteSpace(&sb); |  | 
|  2509     append(&sb, zEllipsis); |  | 
|  2510   } |  | 
|  2511   pCursor->snippet.zSnippet = sb.s; |  | 
|  2512   pCursor->snippet.nSnippet = sb.len;   |  | 
|  2513 } |  | 
|  2514  |  | 
|  2515  |  | 
|  2516 /* |  | 
|  2517 ** Close the cursor.  For additional information see the documentation |  | 
|  2518 ** on the xClose method of the virtual table interface. |  | 
|  2519 */ |  | 
|  2520 static int fulltextClose(sqlite3_vtab_cursor *pCursor){ |  | 
|  2521   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2522   TRACE(("FTS1 Close %p\n", c)); |  | 
|  2523   sqlite3_finalize(c->pStmt); |  | 
|  2524   queryClear(&c->q); |  | 
|  2525   snippetClear(&c->snippet); |  | 
|  2526   if( c->result.pDoclist!=NULL ){ |  | 
|  2527     docListDelete(c->result.pDoclist); |  | 
|  2528   } |  | 
|  2529   free(c); |  | 
|  2530   return SQLITE_OK; |  | 
|  2531 } |  | 
|  2532  |  | 
|  2533 static int fulltextNext(sqlite3_vtab_cursor *pCursor){ |  | 
|  2534   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2535   sqlite_int64 iDocid; |  | 
|  2536   int rc; |  | 
|  2537  |  | 
|  2538   TRACE(("FTS1 Next %p\n", pCursor)); |  | 
|  2539   snippetClear(&c->snippet); |  | 
|  2540   if( c->iCursorType < QUERY_FULLTEXT ){ |  | 
|  2541     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  2542     rc = sqlite3_step(c->pStmt); |  | 
|  2543     switch( rc ){ |  | 
|  2544       case SQLITE_ROW: |  | 
|  2545         c->eof = 0; |  | 
|  2546         return SQLITE_OK; |  | 
|  2547       case SQLITE_DONE: |  | 
|  2548         c->eof = 1; |  | 
|  2549         return SQLITE_OK; |  | 
|  2550       default: |  | 
|  2551         c->eof = 1; |  | 
|  2552         return rc; |  | 
|  2553     } |  | 
|  2554   } else {  /* full-text query */ |  | 
|  2555     rc = sqlite3_reset(c->pStmt); |  | 
|  2556     if( rc!=SQLITE_OK ) return rc; |  | 
|  2557  |  | 
|  2558     iDocid = nextDocid(&c->result); |  | 
|  2559     if( iDocid==0 ){ |  | 
|  2560       c->eof = 1; |  | 
|  2561       return SQLITE_OK; |  | 
|  2562     } |  | 
|  2563     rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); |  | 
|  2564     if( rc!=SQLITE_OK ) return rc; |  | 
|  2565     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  2566     rc = sqlite3_step(c->pStmt); |  | 
|  2567     if( rc==SQLITE_ROW ){   /* the case we expect */ |  | 
|  2568       c->eof = 0; |  | 
|  2569       return SQLITE_OK; |  | 
|  2570     } |  | 
|  2571     /* an error occurred; abort */ |  | 
|  2572     return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |  | 
|  2573   } |  | 
|  2574 } |  | 
|  2575  |  | 
|  2576  |  | 
|  2577 /* Return a DocList corresponding to the query term *pTerm.  If *pTerm |  | 
|  2578 ** is the first term of a phrase query, go ahead and evaluate the phrase |  | 
|  2579 ** query and return the doclist for the entire phrase query. |  | 
|  2580 ** |  | 
|  2581 ** The result is stored in pTerm->doclist. |  | 
|  2582 */ |  | 
|  2583 static int docListOfTerm( |  | 
|  2584   fulltext_vtab *v,     /* The full text index */ |  | 
|  2585   int iColumn,          /* column to restrict to.  No restrition if >=nColumn */ |  | 
|  2586   QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */ |  | 
|  2587   DocList **ppResult    /* Write the result here */ |  | 
|  2588 ){ |  | 
|  2589   DocList *pLeft, *pRight, *pNew; |  | 
|  2590   int i, rc; |  | 
|  2591  |  | 
|  2592   pLeft = docListNew(DL_POSITIONS); |  | 
|  2593   rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft); |  | 
|  2594   if( rc ){ |  | 
|  2595     docListDelete(pLeft); |  | 
|  2596     return rc; |  | 
|  2597   } |  | 
|  2598   for(i=1; i<=pQTerm->nPhrase; i++){ |  | 
|  2599     pRight = docListNew(DL_POSITIONS); |  | 
|  2600     rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight); |  | 
|  2601     if( rc ){ |  | 
|  2602       docListDelete(pLeft); |  | 
|  2603       return rc; |  | 
|  2604     } |  | 
|  2605     pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS); |  | 
|  2606     docListPhraseMerge(pLeft, pRight, pNew); |  | 
|  2607     docListDelete(pLeft); |  | 
|  2608     docListDelete(pRight); |  | 
|  2609     pLeft = pNew; |  | 
|  2610   } |  | 
|  2611   *ppResult = pLeft; |  | 
|  2612   return SQLITE_OK; |  | 
|  2613 } |  | 
|  2614  |  | 
|  2615 /* Add a new term pTerm[0..nTerm-1] to the query *q. |  | 
|  2616 */ |  | 
|  2617 static void queryAdd(Query *q, const char *pTerm, int nTerm){ |  | 
|  2618   QueryTerm *t; |  | 
|  2619   ++q->nTerms; |  | 
|  2620   q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); |  | 
|  2621   if( q->pTerms==0 ){ |  | 
|  2622     q->nTerms = 0; |  | 
|  2623     return; |  | 
|  2624   } |  | 
|  2625   t = &q->pTerms[q->nTerms - 1]; |  | 
|  2626   memset(t, 0, sizeof(*t)); |  | 
|  2627   t->pTerm = malloc(nTerm+1); |  | 
|  2628   memcpy(t->pTerm, pTerm, nTerm); |  | 
|  2629   t->pTerm[nTerm] = 0; |  | 
|  2630   t->nTerm = nTerm; |  | 
|  2631   t->isOr = q->nextIsOr; |  | 
|  2632   q->nextIsOr = 0; |  | 
|  2633   t->iColumn = q->nextColumn; |  | 
|  2634   q->nextColumn = q->dfltColumn; |  | 
|  2635 } |  | 
|  2636  |  | 
|  2637 /* |  | 
|  2638 ** Check to see if the string zToken[0...nToken-1] matches any |  | 
|  2639 ** column name in the virtual table.   If it does, |  | 
|  2640 ** return the zero-indexed column number.  If not, return -1. |  | 
|  2641 */ |  | 
|  2642 static int checkColumnSpecifier( |  | 
|  2643   fulltext_vtab *pVtab,    /* The virtual table */ |  | 
|  2644   const char *zToken,      /* Text of the token */ |  | 
|  2645   int nToken               /* Number of characters in the token */ |  | 
|  2646 ){ |  | 
|  2647   int i; |  | 
|  2648   for(i=0; i<pVtab->nColumn; i++){ |  | 
|  2649     if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 |  | 
|  2650         && pVtab->azColumn[i][nToken]==0 ){ |  | 
|  2651       return i; |  | 
|  2652     } |  | 
|  2653   } |  | 
|  2654   return -1; |  | 
|  2655 } |  | 
|  2656  |  | 
|  2657 /* |  | 
|  2658 ** Parse the text at pSegment[0..nSegment-1].  Add additional terms |  | 
|  2659 ** to the query being assemblied in pQuery. |  | 
|  2660 ** |  | 
|  2661 ** inPhrase is true if pSegment[0..nSegement-1] is contained within |  | 
|  2662 ** double-quotes.  If inPhrase is true, then the first term |  | 
|  2663 ** is marked with the number of terms in the phrase less one and |  | 
|  2664 ** OR and "-" syntax is ignored.  If inPhrase is false, then every |  | 
|  2665 ** term found is marked with nPhrase=0 and OR and "-" syntax is significant. |  | 
|  2666 */ |  | 
|  2667 static int tokenizeSegment( |  | 
|  2668   sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */ |  | 
|  2669   const char *pSegment, int nSegment,     /* Query expression being parsed */ |  | 
|  2670   int inPhrase,                           /* True if within "..." */ |  | 
|  2671   Query *pQuery                           /* Append results here */ |  | 
|  2672 ){ |  | 
|  2673   const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; |  | 
|  2674   sqlite3_tokenizer_cursor *pCursor; |  | 
|  2675   int firstIndex = pQuery->nTerms; |  | 
|  2676   int iCol; |  | 
|  2677   int nTerm = 1; |  | 
|  2678    |  | 
|  2679   int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); |  | 
|  2680   if( rc!=SQLITE_OK ) return rc; |  | 
|  2681   pCursor->pTokenizer = pTokenizer; |  | 
|  2682  |  | 
|  2683   while( 1 ){ |  | 
|  2684     const char *pToken; |  | 
|  2685     int nToken, iBegin, iEnd, iPos; |  | 
|  2686  |  | 
|  2687     rc = pModule->xNext(pCursor, |  | 
|  2688                         &pToken, &nToken, |  | 
|  2689                         &iBegin, &iEnd, &iPos); |  | 
|  2690     if( rc!=SQLITE_OK ) break; |  | 
|  2691     if( !inPhrase && |  | 
|  2692         pSegment[iEnd]==':' && |  | 
|  2693          (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ |  | 
|  2694       pQuery->nextColumn = iCol; |  | 
|  2695       continue; |  | 
|  2696     } |  | 
|  2697     if( !inPhrase && pQuery->nTerms>0 && nToken==2 |  | 
|  2698          && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ |  | 
|  2699       pQuery->nextIsOr = 1; |  | 
|  2700       continue; |  | 
|  2701     } |  | 
|  2702     queryAdd(pQuery, pToken, nToken); |  | 
|  2703     if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ |  | 
|  2704       pQuery->pTerms[pQuery->nTerms-1].isNot = 1; |  | 
|  2705     } |  | 
|  2706     pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; |  | 
|  2707     if( inPhrase ){ |  | 
|  2708       nTerm++; |  | 
|  2709     } |  | 
|  2710   } |  | 
|  2711  |  | 
|  2712   if( inPhrase && pQuery->nTerms>firstIndex ){ |  | 
|  2713     pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; |  | 
|  2714   } |  | 
|  2715  |  | 
|  2716   return pModule->xClose(pCursor); |  | 
|  2717 } |  | 
|  2718  |  | 
|  2719 /* Parse a query string, yielding a Query object pQuery. |  | 
|  2720 ** |  | 
|  2721 ** The calling function will need to queryClear() to clean up |  | 
|  2722 ** the dynamically allocated memory held by pQuery. |  | 
|  2723 */ |  | 
|  2724 static int parseQuery( |  | 
|  2725   fulltext_vtab *v,        /* The fulltext index */ |  | 
|  2726   const char *zInput,      /* Input text of the query string */ |  | 
|  2727   int nInput,              /* Size of the input text */ |  | 
|  2728   int dfltColumn,          /* Default column of the index to match against */ |  | 
|  2729   Query *pQuery            /* Write the parse results here. */ |  | 
|  2730 ){ |  | 
|  2731   int iInput, inPhrase = 0; |  | 
|  2732  |  | 
|  2733   if( zInput==0 ) nInput = 0; |  | 
|  2734   if( nInput<0 ) nInput = strlen(zInput); |  | 
|  2735   pQuery->nTerms = 0; |  | 
|  2736   pQuery->pTerms = NULL; |  | 
|  2737   pQuery->nextIsOr = 0; |  | 
|  2738   pQuery->nextColumn = dfltColumn; |  | 
|  2739   pQuery->dfltColumn = dfltColumn; |  | 
|  2740   pQuery->pFts = v; |  | 
|  2741  |  | 
|  2742   for(iInput=0; iInput<nInput; ++iInput){ |  | 
|  2743     int i; |  | 
|  2744     for(i=iInput; i<nInput && zInput[i]!='"'; ++i){} |  | 
|  2745     if( i>iInput ){ |  | 
|  2746       tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, |  | 
|  2747                        pQuery); |  | 
|  2748     } |  | 
|  2749     iInput = i; |  | 
|  2750     if( i<nInput ){ |  | 
|  2751       assert( zInput[i]=='"' ); |  | 
|  2752       inPhrase = !inPhrase; |  | 
|  2753     } |  | 
|  2754   } |  | 
|  2755  |  | 
|  2756   if( inPhrase ){ |  | 
|  2757     /* unmatched quote */ |  | 
|  2758     queryClear(pQuery); |  | 
|  2759     return SQLITE_ERROR; |  | 
|  2760   } |  | 
|  2761   return SQLITE_OK; |  | 
|  2762 } |  | 
|  2763  |  | 
|  2764 /* Perform a full-text query using the search expression in |  | 
|  2765 ** zInput[0..nInput-1].  Return a list of matching documents |  | 
|  2766 ** in pResult. |  | 
|  2767 ** |  | 
|  2768 ** Queries must match column iColumn.  Or if iColumn>=nColumn |  | 
|  2769 ** they are allowed to match against any column. |  | 
|  2770 */ |  | 
|  2771 static int fulltextQuery( |  | 
|  2772   fulltext_vtab *v,      /* The full text index */ |  | 
|  2773   int iColumn,           /* Match against this column by default */ |  | 
|  2774   const char *zInput,    /* The query string */ |  | 
|  2775   int nInput,            /* Number of bytes in zInput[] */ |  | 
|  2776   DocList **pResult,     /* Write the result doclist here */ |  | 
|  2777   Query *pQuery          /* Put parsed query string here */ |  | 
|  2778 ){ |  | 
|  2779   int i, iNext, rc; |  | 
|  2780   DocList *pLeft = NULL; |  | 
|  2781   DocList *pRight, *pNew, *pOr; |  | 
|  2782   int nNot = 0; |  | 
|  2783   QueryTerm *aTerm; |  | 
|  2784  |  | 
|  2785   rc = parseQuery(v, zInput, nInput, iColumn, pQuery); |  | 
|  2786   if( rc!=SQLITE_OK ) return rc; |  | 
|  2787  |  | 
|  2788   /* Merge AND terms. */ |  | 
|  2789   aTerm = pQuery->pTerms; |  | 
|  2790   for(i = 0; i<pQuery->nTerms; i=iNext){ |  | 
|  2791     if( aTerm[i].isNot ){ |  | 
|  2792       /* Handle all NOT terms in a separate pass */ |  | 
|  2793       nNot++; |  | 
|  2794       iNext = i + aTerm[i].nPhrase+1; |  | 
|  2795       continue; |  | 
|  2796     } |  | 
|  2797     iNext = i + aTerm[i].nPhrase + 1; |  | 
|  2798     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); |  | 
|  2799     if( rc ){ |  | 
|  2800       queryClear(pQuery); |  | 
|  2801       return rc; |  | 
|  2802     } |  | 
|  2803     while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ |  | 
|  2804       rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr); |  | 
|  2805       iNext += aTerm[iNext].nPhrase + 1; |  | 
|  2806       if( rc ){ |  | 
|  2807         queryClear(pQuery); |  | 
|  2808         return rc; |  | 
|  2809       } |  | 
|  2810       pNew = docListNew(DL_DOCIDS); |  | 
|  2811       docListOrMerge(pRight, pOr, pNew); |  | 
|  2812       docListDelete(pRight); |  | 
|  2813       docListDelete(pOr); |  | 
|  2814       pRight = pNew; |  | 
|  2815     } |  | 
|  2816     if( pLeft==0 ){ |  | 
|  2817       pLeft = pRight; |  | 
|  2818     }else{ |  | 
|  2819       pNew = docListNew(DL_DOCIDS); |  | 
|  2820       docListAndMerge(pLeft, pRight, pNew); |  | 
|  2821       docListDelete(pRight); |  | 
|  2822       docListDelete(pLeft); |  | 
|  2823       pLeft = pNew; |  | 
|  2824     } |  | 
|  2825   } |  | 
|  2826  |  | 
|  2827   if( nNot && pLeft==0 ){ |  | 
|  2828     /* We do not yet know how to handle a query of only NOT terms */ |  | 
|  2829     return SQLITE_ERROR; |  | 
|  2830   } |  | 
|  2831  |  | 
|  2832   /* Do the EXCEPT terms */ |  | 
|  2833   for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){ |  | 
|  2834     if( !aTerm[i].isNot ) continue; |  | 
|  2835     rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); |  | 
|  2836     if( rc ){ |  | 
|  2837       queryClear(pQuery); |  | 
|  2838       docListDelete(pLeft); |  | 
|  2839       return rc; |  | 
|  2840     } |  | 
|  2841     pNew = docListNew(DL_DOCIDS); |  | 
|  2842     docListExceptMerge(pLeft, pRight, pNew); |  | 
|  2843     docListDelete(pRight); |  | 
|  2844     docListDelete(pLeft); |  | 
|  2845     pLeft = pNew; |  | 
|  2846   } |  | 
|  2847  |  | 
|  2848   *pResult = pLeft; |  | 
|  2849   return rc; |  | 
|  2850 } |  | 
|  2851  |  | 
|  2852 /* |  | 
|  2853 ** This is the xFilter interface for the virtual table.  See |  | 
|  2854 ** the virtual table xFilter method documentation for additional |  | 
|  2855 ** information. |  | 
|  2856 ** |  | 
|  2857 ** If idxNum==QUERY_GENERIC then do a full table scan against |  | 
|  2858 ** the %_content table. |  | 
|  2859 ** |  | 
|  2860 ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry |  | 
|  2861 ** in the %_content table. |  | 
|  2862 ** |  | 
|  2863 ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The |  | 
|  2864 ** column on the left-hand side of the MATCH operator is column |  | 
|  2865 ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand |  | 
|  2866 ** side of the MATCH operator. |  | 
|  2867 */ |  | 
|  2868 /* TODO(shess) Upgrade the cursor initialization and destruction to |  | 
|  2869 ** account for fulltextFilter() being called multiple times on the |  | 
|  2870 ** same cursor.  The current solution is very fragile.  Apply fix to |  | 
|  2871 ** fts2 as appropriate. |  | 
|  2872 */ |  | 
|  2873 static int fulltextFilter( |  | 
|  2874   sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */ |  | 
|  2875   int idxNum, const char *idxStr,   /* Which indexing scheme to use */ |  | 
|  2876   int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */ |  | 
|  2877 ){ |  | 
|  2878   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2879   fulltext_vtab *v = cursor_vtab(c); |  | 
|  2880   int rc; |  | 
|  2881   char *zSql; |  | 
|  2882  |  | 
|  2883   TRACE(("FTS1 Filter %p\n",pCursor)); |  | 
|  2884  |  | 
|  2885   zSql = sqlite3_mprintf("select rowid, * from %%_content %s", |  | 
|  2886                           idxNum==QUERY_GENERIC ? "" : "where rowid=?"); |  | 
|  2887   sqlite3_finalize(c->pStmt); |  | 
|  2888   rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql); |  | 
|  2889   sqlite3_free(zSql); |  | 
|  2890   if( rc!=SQLITE_OK ) return rc; |  | 
|  2891  |  | 
|  2892   c->iCursorType = idxNum; |  | 
|  2893   switch( idxNum ){ |  | 
|  2894     case QUERY_GENERIC: |  | 
|  2895       break; |  | 
|  2896  |  | 
|  2897     case QUERY_ROWID: |  | 
|  2898       rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); |  | 
|  2899       if( rc!=SQLITE_OK ) return rc; |  | 
|  2900       break; |  | 
|  2901  |  | 
|  2902     default:   /* full-text search */ |  | 
|  2903     { |  | 
|  2904       const char *zQuery = (const char *)sqlite3_value_text(argv[0]); |  | 
|  2905       DocList *pResult; |  | 
|  2906       assert( idxNum<=QUERY_FULLTEXT+v->nColumn); |  | 
|  2907       assert( argc==1 ); |  | 
|  2908       queryClear(&c->q); |  | 
|  2909       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q); |  | 
|  2910       if( rc!=SQLITE_OK ) return rc; |  | 
|  2911       if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist); |  | 
|  2912       readerInit(&c->result, pResult); |  | 
|  2913       break; |  | 
|  2914     } |  | 
|  2915   } |  | 
|  2916  |  | 
|  2917   return fulltextNext(pCursor); |  | 
|  2918 } |  | 
|  2919  |  | 
|  2920 /* This is the xEof method of the virtual table.  The SQLite core |  | 
|  2921 ** calls this routine to find out if it has reached the end of |  | 
|  2922 ** a query's results set. |  | 
|  2923 */ |  | 
|  2924 static int fulltextEof(sqlite3_vtab_cursor *pCursor){ |  | 
|  2925   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2926   return c->eof; |  | 
|  2927 } |  | 
|  2928  |  | 
|  2929 /* This is the xColumn method of the virtual table.  The SQLite |  | 
|  2930 ** core calls this method during a query when it needs the value |  | 
|  2931 ** of a column from the virtual table.  This method needs to use |  | 
|  2932 ** one of the sqlite3_result_*() routines to store the requested |  | 
|  2933 ** value back in the pContext. |  | 
|  2934 */ |  | 
|  2935 static int fulltextColumn(sqlite3_vtab_cursor *pCursor, |  | 
|  2936                           sqlite3_context *pContext, int idxCol){ |  | 
|  2937   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2938   fulltext_vtab *v = cursor_vtab(c); |  | 
|  2939  |  | 
|  2940   if( idxCol<v->nColumn ){ |  | 
|  2941     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); |  | 
|  2942     sqlite3_result_value(pContext, pVal); |  | 
|  2943   }else if( idxCol==v->nColumn ){ |  | 
|  2944     /* The extra column whose name is the same as the table. |  | 
|  2945     ** Return a blob which is a pointer to the cursor |  | 
|  2946     */ |  | 
|  2947     sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); |  | 
|  2948   } |  | 
|  2949   return SQLITE_OK; |  | 
|  2950 } |  | 
|  2951  |  | 
|  2952 /* This is the xRowid method.  The SQLite core calls this routine to |  | 
|  2953 ** retrive the rowid for the current row of the result set.  The |  | 
|  2954 ** rowid should be written to *pRowid. |  | 
|  2955 */ |  | 
|  2956 static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |  | 
|  2957   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  2958  |  | 
|  2959   *pRowid = sqlite3_column_int64(c->pStmt, 0); |  | 
|  2960   return SQLITE_OK; |  | 
|  2961 } |  | 
|  2962  |  | 
|  2963 /* Add all terms in [zText] to the given hash table.  If [iColumn] > 0, |  | 
|  2964  * we also store positions and offsets in the hash table using the given |  | 
|  2965  * column number. */ |  | 
|  2966 static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid, |  | 
|  2967                       const char *zText, int iColumn){ |  | 
|  2968   sqlite3_tokenizer *pTokenizer = v->pTokenizer; |  | 
|  2969   sqlite3_tokenizer_cursor *pCursor; |  | 
|  2970   const char *pToken; |  | 
|  2971   int nTokenBytes; |  | 
|  2972   int iStartOffset, iEndOffset, iPosition; |  | 
|  2973   int rc; |  | 
|  2974  |  | 
|  2975   rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); |  | 
|  2976   if( rc!=SQLITE_OK ) return rc; |  | 
|  2977  |  | 
|  2978   pCursor->pTokenizer = pTokenizer; |  | 
|  2979   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, |  | 
|  2980                                                &pToken, &nTokenBytes, |  | 
|  2981                                                &iStartOffset, &iEndOffset, |  | 
|  2982                                                &iPosition) ){ |  | 
|  2983     DocList *p; |  | 
|  2984  |  | 
|  2985     /* Positions can't be negative; we use -1 as a terminator internally. */ |  | 
|  2986     if( iPosition<0 ){ |  | 
|  2987       pTokenizer->pModule->xClose(pCursor); |  | 
|  2988       return SQLITE_ERROR; |  | 
|  2989     } |  | 
|  2990  |  | 
|  2991     p = fts1HashFind(terms, pToken, nTokenBytes); |  | 
|  2992     if( p==NULL ){ |  | 
|  2993       p = docListNew(DL_DEFAULT); |  | 
|  2994       docListAddDocid(p, iDocid); |  | 
|  2995       fts1HashInsert(terms, pToken, nTokenBytes, p); |  | 
|  2996     } |  | 
|  2997     if( iColumn>=0 ){ |  | 
|  2998       docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset); |  | 
|  2999     } |  | 
|  3000   } |  | 
|  3001  |  | 
|  3002   /* TODO(shess) Check return?  Should this be able to cause errors at |  | 
|  3003   ** this point?  Actually, same question about sqlite3_finalize(), |  | 
|  3004   ** though one could argue that failure there means that the data is |  | 
|  3005   ** not durable.  *ponder* |  | 
|  3006   */ |  | 
|  3007   pTokenizer->pModule->xClose(pCursor); |  | 
|  3008   return rc; |  | 
|  3009 } |  | 
|  3010  |  | 
|  3011 /* Update the %_terms table to map the term [pTerm] to the given rowid. */ |  | 
|  3012 static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm, |  | 
|  3013                              DocList *d){ |  | 
|  3014   sqlite_int64 iIndexRow; |  | 
|  3015   DocList doclist; |  | 
|  3016   int iSegment = 0, rc; |  | 
|  3017  |  | 
|  3018   rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist); |  | 
|  3019   if( rc==SQLITE_DONE ){ |  | 
|  3020     docListInit(&doclist, DL_DEFAULT, 0, 0); |  | 
|  3021     docListUpdate(&doclist, d); |  | 
|  3022     /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */ |  | 
|  3023     rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist); |  | 
|  3024     goto err; |  | 
|  3025   } |  | 
|  3026   if( rc!=SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  3027  |  | 
|  3028   docListUpdate(&doclist, d); |  | 
|  3029   if( doclist.nData<=CHUNK_MAX ){ |  | 
|  3030     rc = term_update(v, iIndexRow, &doclist); |  | 
|  3031     goto err; |  | 
|  3032   } |  | 
|  3033  |  | 
|  3034   /* Doclist doesn't fit, delete what's there, and accumulate |  | 
|  3035   ** forward. |  | 
|  3036   */ |  | 
|  3037   rc = term_delete(v, iIndexRow); |  | 
|  3038   if( rc!=SQLITE_OK ) goto err; |  | 
|  3039  |  | 
|  3040   /* Try to insert the doclist into a higher segment bucket.  On |  | 
|  3041   ** failure, accumulate existing doclist with the doclist from that |  | 
|  3042   ** bucket, and put results in the next bucket. |  | 
|  3043   */ |  | 
|  3044   iSegment++; |  | 
|  3045   while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment, |  | 
|  3046                          &doclist))!=SQLITE_OK ){ |  | 
|  3047     sqlite_int64 iSegmentRow; |  | 
|  3048     DocList old; |  | 
|  3049     int rc2; |  | 
|  3050  |  | 
|  3051     /* Retain old error in case the term_insert() error was really an |  | 
|  3052     ** error rather than a bounced insert. |  | 
|  3053     */ |  | 
|  3054     rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old); |  | 
|  3055     if( rc2!=SQLITE_ROW ) goto err; |  | 
|  3056  |  | 
|  3057     rc = term_delete(v, iSegmentRow); |  | 
|  3058     if( rc!=SQLITE_OK ) goto err; |  | 
|  3059  |  | 
|  3060     /* Reusing lowest-number deleted row keeps the index smaller. */ |  | 
|  3061     if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow; |  | 
|  3062  |  | 
|  3063     /* doclist contains the newer data, so accumulate it over old. |  | 
|  3064     ** Then steal accumulated data for doclist. |  | 
|  3065     */ |  | 
|  3066     docListAccumulate(&old, &doclist); |  | 
|  3067     docListDestroy(&doclist); |  | 
|  3068     doclist = old; |  | 
|  3069  |  | 
|  3070     iSegment++; |  | 
|  3071   } |  | 
|  3072  |  | 
|  3073  err: |  | 
|  3074   docListDestroy(&doclist); |  | 
|  3075   return rc; |  | 
|  3076 } |  | 
|  3077  |  | 
|  3078 /* Add doclists for all terms in [pValues] to the hash table [terms]. */ |  | 
|  3079 static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid, |  | 
|  3080                 sqlite3_value **pValues){ |  | 
|  3081   int i; |  | 
|  3082   for(i = 0; i < v->nColumn ; ++i){ |  | 
|  3083     char *zText = (char*)sqlite3_value_text(pValues[i]); |  | 
|  3084     int rc = buildTerms(v, terms, iRowid, zText, i); |  | 
|  3085     if( rc!=SQLITE_OK ) return rc; |  | 
|  3086   } |  | 
|  3087   return SQLITE_OK; |  | 
|  3088 } |  | 
|  3089  |  | 
|  3090 /* Add empty doclists for all terms in the given row's content to the hash |  | 
|  3091  * table [pTerms]. */ |  | 
|  3092 static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){ |  | 
|  3093   const char **pValues; |  | 
|  3094   int i; |  | 
|  3095  |  | 
|  3096   int rc = content_select(v, iRowid, &pValues); |  | 
|  3097   if( rc!=SQLITE_OK ) return rc; |  | 
|  3098  |  | 
|  3099   for(i = 0 ; i < v->nColumn; ++i) { |  | 
|  3100     rc = buildTerms(v, pTerms, iRowid, pValues[i], -1); |  | 
|  3101     if( rc!=SQLITE_OK ) break; |  | 
|  3102   } |  | 
|  3103  |  | 
|  3104   freeStringArray(v->nColumn, pValues); |  | 
|  3105   return SQLITE_OK; |  | 
|  3106 } |  | 
|  3107  |  | 
|  3108 /* Insert a row into the %_content table; set *piRowid to be the ID of the |  | 
|  3109  * new row.  Fill [pTerms] with new doclists for the %_term table. */ |  | 
|  3110 static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, |  | 
|  3111                         sqlite3_value **pValues, |  | 
|  3112                         sqlite_int64 *piRowid, fts1Hash *pTerms){ |  | 
|  3113   int rc; |  | 
|  3114  |  | 
|  3115   rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */ |  | 
|  3116   if( rc!=SQLITE_OK ) return rc; |  | 
|  3117   *piRowid = sqlite3_last_insert_rowid(v->db); |  | 
|  3118   return insertTerms(v, pTerms, *piRowid, pValues); |  | 
|  3119 } |  | 
|  3120  |  | 
|  3121 /* Delete a row from the %_content table; fill [pTerms] with empty doclists |  | 
|  3122  * to be written to the %_term table. */ |  | 
|  3123 static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){ |  | 
|  3124   int rc = deleteTerms(v, pTerms, iRow); |  | 
|  3125   if( rc!=SQLITE_OK ) return rc; |  | 
|  3126   return content_delete(v, iRow);  /* execute an SQL DELETE */ |  | 
|  3127 } |  | 
|  3128  |  | 
|  3129 /* Update a row in the %_content table; fill [pTerms] with new doclists for the |  | 
|  3130  * %_term table. */ |  | 
|  3131 static int index_update(fulltext_vtab *v, sqlite_int64 iRow, |  | 
|  3132                         sqlite3_value **pValues, fts1Hash *pTerms){ |  | 
|  3133   /* Generate an empty doclist for each term that previously appeared in this |  | 
|  3134    * row. */ |  | 
|  3135   int rc = deleteTerms(v, pTerms, iRow); |  | 
|  3136   if( rc!=SQLITE_OK ) return rc; |  | 
|  3137  |  | 
|  3138   rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */ |  | 
|  3139   if( rc!=SQLITE_OK ) return rc; |  | 
|  3140  |  | 
|  3141   /* Now add positions for terms which appear in the updated row. */ |  | 
|  3142   return insertTerms(v, pTerms, iRow, pValues); |  | 
|  3143 } |  | 
|  3144  |  | 
|  3145 /* This function implements the xUpdate callback; it is the top-level entry |  | 
|  3146  * point for inserting, deleting or updating a row in a full-text table. */ |  | 
|  3147 static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, |  | 
|  3148                    sqlite_int64 *pRowid){ |  | 
|  3149   fulltext_vtab *v = (fulltext_vtab *) pVtab; |  | 
|  3150   fts1Hash terms;   /* maps term string -> PosList */ |  | 
|  3151   int rc; |  | 
|  3152   fts1HashElem *e; |  | 
|  3153  |  | 
|  3154   TRACE(("FTS1 Update %p\n", pVtab)); |  | 
|  3155    |  | 
|  3156   fts1HashInit(&terms, FTS1_HASH_STRING, 1); |  | 
|  3157  |  | 
|  3158   if( nArg<2 ){ |  | 
|  3159     rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms); |  | 
|  3160   } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ |  | 
|  3161     /* An update: |  | 
|  3162      * ppArg[0] = old rowid |  | 
|  3163      * ppArg[1] = new rowid |  | 
|  3164      * ppArg[2..2+v->nColumn-1] = values |  | 
|  3165      * ppArg[2+v->nColumn] = value for magic column (we ignore this) |  | 
|  3166      */ |  | 
|  3167     sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); |  | 
|  3168     if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || |  | 
|  3169       sqlite3_value_int64(ppArg[1]) != rowid ){ |  | 
|  3170       rc = SQLITE_ERROR;  /* we don't allow changing the rowid */ |  | 
|  3171     } else { |  | 
|  3172       assert( nArg==2+v->nColumn+1); |  | 
|  3173       rc = index_update(v, rowid, &ppArg[2], &terms); |  | 
|  3174     } |  | 
|  3175   } else { |  | 
|  3176     /* An insert: |  | 
|  3177      * ppArg[1] = requested rowid |  | 
|  3178      * ppArg[2..2+v->nColumn-1] = values |  | 
|  3179      * ppArg[2+v->nColumn] = value for magic column (we ignore this) |  | 
|  3180      */ |  | 
|  3181     assert( nArg==2+v->nColumn+1); |  | 
|  3182     rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); |  | 
|  3183   } |  | 
|  3184  |  | 
|  3185   if( rc==SQLITE_OK ){ |  | 
|  3186     /* Write updated doclists to disk. */ |  | 
|  3187     for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ |  | 
|  3188       DocList *p = fts1HashData(e); |  | 
|  3189       rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p); |  | 
|  3190       if( rc!=SQLITE_OK ) break; |  | 
|  3191     } |  | 
|  3192   } |  | 
|  3193  |  | 
|  3194   /* clean up */ |  | 
|  3195   for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ |  | 
|  3196     DocList *p = fts1HashData(e); |  | 
|  3197     docListDelete(p); |  | 
|  3198   } |  | 
|  3199   fts1HashClear(&terms); |  | 
|  3200  |  | 
|  3201   return rc; |  | 
|  3202 } |  | 
|  3203  |  | 
|  3204 /* |  | 
|  3205 ** Implementation of the snippet() function for FTS1 |  | 
|  3206 */ |  | 
|  3207 static void snippetFunc( |  | 
|  3208   sqlite3_context *pContext, |  | 
|  3209   int argc, |  | 
|  3210   sqlite3_value **argv |  | 
|  3211 ){ |  | 
|  3212   fulltext_cursor *pCursor; |  | 
|  3213   if( argc<1 ) return; |  | 
|  3214   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  3215       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  3216     sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); |  | 
|  3217   }else{ |  | 
|  3218     const char *zStart = "<b>"; |  | 
|  3219     const char *zEnd = "</b>"; |  | 
|  3220     const char *zEllipsis = "<b>...</b>"; |  | 
|  3221     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  3222     if( argc>=2 ){ |  | 
|  3223       zStart = (const char*)sqlite3_value_text(argv[1]); |  | 
|  3224       if( argc>=3 ){ |  | 
|  3225         zEnd = (const char*)sqlite3_value_text(argv[2]); |  | 
|  3226         if( argc>=4 ){ |  | 
|  3227           zEllipsis = (const char*)sqlite3_value_text(argv[3]); |  | 
|  3228         } |  | 
|  3229       } |  | 
|  3230     } |  | 
|  3231     snippetAllOffsets(pCursor); |  | 
|  3232     snippetText(pCursor, zStart, zEnd, zEllipsis); |  | 
|  3233     sqlite3_result_text(pContext, pCursor->snippet.zSnippet, |  | 
|  3234                         pCursor->snippet.nSnippet, SQLITE_STATIC); |  | 
|  3235   } |  | 
|  3236 } |  | 
|  3237  |  | 
|  3238 /* |  | 
|  3239 ** Implementation of the offsets() function for FTS1 |  | 
|  3240 */ |  | 
|  3241 static void snippetOffsetsFunc( |  | 
|  3242   sqlite3_context *pContext, |  | 
|  3243   int argc, |  | 
|  3244   sqlite3_value **argv |  | 
|  3245 ){ |  | 
|  3246   fulltext_cursor *pCursor; |  | 
|  3247   if( argc<1 ) return; |  | 
|  3248   if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || |  | 
|  3249       sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ |  | 
|  3250     sqlite3_result_error(pContext, "illegal first argument to offsets",-1); |  | 
|  3251   }else{ |  | 
|  3252     memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); |  | 
|  3253     snippetAllOffsets(pCursor); |  | 
|  3254     snippetOffsetText(&pCursor->snippet); |  | 
|  3255     sqlite3_result_text(pContext, |  | 
|  3256                         pCursor->snippet.zOffset, pCursor->snippet.nOffset, |  | 
|  3257                         SQLITE_STATIC); |  | 
|  3258   } |  | 
|  3259 } |  | 
|  3260  |  | 
|  3261 /* |  | 
|  3262 ** This routine implements the xFindFunction method for the FTS1 |  | 
|  3263 ** virtual table. |  | 
|  3264 */ |  | 
|  3265 static int fulltextFindFunction( |  | 
|  3266   sqlite3_vtab *pVtab, |  | 
|  3267   int nArg, |  | 
|  3268   const char *zName, |  | 
|  3269   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), |  | 
|  3270   void **ppArg |  | 
|  3271 ){ |  | 
|  3272   if( strcmp(zName,"snippet")==0 ){ |  | 
|  3273     *pxFunc = snippetFunc; |  | 
|  3274     return 1; |  | 
|  3275   }else if( strcmp(zName,"offsets")==0 ){ |  | 
|  3276     *pxFunc = snippetOffsetsFunc; |  | 
|  3277     return 1; |  | 
|  3278   } |  | 
|  3279   return 0; |  | 
|  3280 } |  | 
|  3281  |  | 
|  3282 /* |  | 
|  3283 ** Rename an fts1 table. |  | 
|  3284 */ |  | 
|  3285 static int fulltextRename( |  | 
|  3286   sqlite3_vtab *pVtab, |  | 
|  3287   const char *zName |  | 
|  3288 ){ |  | 
|  3289   fulltext_vtab *p = (fulltext_vtab *)pVtab; |  | 
|  3290   int rc = SQLITE_NOMEM; |  | 
|  3291   char *zSql = sqlite3_mprintf( |  | 
|  3292     "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';" |  | 
|  3293     "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';" |  | 
|  3294     , p->zDb, p->zName, zName |  | 
|  3295     , p->zDb, p->zName, zName |  | 
|  3296   ); |  | 
|  3297   if( zSql ){ |  | 
|  3298     rc = sqlite3_exec(p->db, zSql, 0, 0, 0); |  | 
|  3299     sqlite3_free(zSql); |  | 
|  3300   } |  | 
|  3301   return rc; |  | 
|  3302 } |  | 
|  3303  |  | 
|  3304 static const sqlite3_module fulltextModule = { |  | 
|  3305   /* iVersion      */ 0, |  | 
|  3306   /* xCreate       */ fulltextCreate, |  | 
|  3307   /* xConnect      */ fulltextConnect, |  | 
|  3308   /* xBestIndex    */ fulltextBestIndex, |  | 
|  3309   /* xDisconnect   */ fulltextDisconnect, |  | 
|  3310   /* xDestroy      */ fulltextDestroy, |  | 
|  3311   /* xOpen         */ fulltextOpen, |  | 
|  3312   /* xClose        */ fulltextClose, |  | 
|  3313   /* xFilter       */ fulltextFilter, |  | 
|  3314   /* xNext         */ fulltextNext, |  | 
|  3315   /* xEof          */ fulltextEof, |  | 
|  3316   /* xColumn       */ fulltextColumn, |  | 
|  3317   /* xRowid        */ fulltextRowid, |  | 
|  3318   /* xUpdate       */ fulltextUpdate, |  | 
|  3319   /* xBegin        */ 0,  |  | 
|  3320   /* xSync         */ 0, |  | 
|  3321   /* xCommit       */ 0, |  | 
|  3322   /* xRollback     */ 0, |  | 
|  3323   /* xFindFunction */ fulltextFindFunction, |  | 
|  3324   /* xRename       */ fulltextRename, |  | 
|  3325 }; |  | 
|  3326  |  | 
|  3327 int sqlite3Fts1Init(sqlite3 *db){ |  | 
|  3328   sqlite3_overload_function(db, "snippet", -1); |  | 
|  3329   sqlite3_overload_function(db, "offsets", -1); |  | 
|  3330   return sqlite3_create_module(db, "fts1", &fulltextModule, 0); |  | 
|  3331 } |  | 
|  3332  |  | 
|  3333 #if !SQLITE_CORE |  | 
|  3334 int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, |  | 
|  3335                            const sqlite3_api_routines *pApi){ |  | 
|  3336   SQLITE_EXTENSION_INIT2(pApi) |  | 
|  3337   return sqlite3Fts1Init(db); |  | 
|  3338 } |  | 
|  3339 #endif |  | 
|  3340  |  | 
|  3341 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ |  | 
| OLD | NEW |