| OLD | NEW | 
 | (Empty) | 
|     1 /* The author disclaims copyright to this source code. |  | 
|     2  * |  | 
|     3  * This is an SQLite module implementing full-text search. |  | 
|     4  */ |  | 
|     5  |  | 
|     6 #include <assert.h> |  | 
|     7 #if !defined(__APPLE__) |  | 
|     8 #include <malloc.h> |  | 
|     9 #else |  | 
|    10 #include <stdlib.h> |  | 
|    11 #endif |  | 
|    12 #include <stdio.h> |  | 
|    13 #include <string.h> |  | 
|    14 #include <ctype.h> |  | 
|    15  |  | 
|    16 #include "fulltext.h" |  | 
|    17 #include "ft_hash.h" |  | 
|    18 #include "tokenizer.h" |  | 
|    19 #include "sqlite3.h" |  | 
|    20 #include "sqlite3ext.h" |  | 
|    21 SQLITE_EXTENSION_INIT1 |  | 
|    22  |  | 
|    23 /* utility functions */ |  | 
|    24  |  | 
|    25 /* We encode variable-length integers in little-endian order using seven bits |  | 
|    26  * per byte as follows: |  | 
|    27 ** |  | 
|    28 ** KEY: |  | 
|    29 **         A = 0xxxxxxx    7 bits of data and one flag bit |  | 
|    30 **         B = 1xxxxxxx    7 bits of data and one flag bit |  | 
|    31 ** |  | 
|    32 **  7 bits - A |  | 
|    33 ** 14 bits - BA |  | 
|    34 ** 21 bits - BBA |  | 
|    35 ** and so on. |  | 
|    36 */ |  | 
|    37  |  | 
|    38 /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ |  | 
|    39 #define VARINT_MAX 10 |  | 
|    40  |  | 
|    41 /* Write a 64-bit variable-length integer to memory starting at p[0]. |  | 
|    42  * The length of data written will be between 1 and VARINT_MAX bytes. |  | 
|    43  * The number of bytes written is returned. */ |  | 
|    44 static int putVarint(char *p, sqlite_int64 v){ |  | 
|    45   unsigned char *q = (unsigned char *) p; |  | 
|    46   sqlite_uint64 vu = v; |  | 
|    47   do{ |  | 
|    48     *q++ = (unsigned char) ((vu & 0x7f) | 0x80); |  | 
|    49     vu >>= 7; |  | 
|    50   }while( vu!=0 ); |  | 
|    51   q[-1] &= 0x7f;  /* turn off high bit in final byte */ |  | 
|    52   assert( q - (unsigned char *)p <= VARINT_MAX ); |  | 
|    53   return (int) (q - (unsigned char *)p); |  | 
|    54 } |  | 
|    55  |  | 
|    56 /* Read a 64-bit variable-length integer from memory starting at p[0]. |  | 
|    57  * Return the number of bytes read, or 0 on error. |  | 
|    58  * The value is stored in *v. */ |  | 
|    59 static int getVarint(const char *p, sqlite_int64 *v){ |  | 
|    60   const unsigned char *q = (const unsigned char *) p; |  | 
|    61   sqlite_uint64 x = 0, y = 1; |  | 
|    62   while( (*q & 0x80) == 0x80 ){ |  | 
|    63     x += y * (*q++ & 0x7f); |  | 
|    64     y <<= 7; |  | 
|    65     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */ |  | 
|    66       assert( 0 ); |  | 
|    67       return 0; |  | 
|    68     } |  | 
|    69   } |  | 
|    70   x += y * (*q++); |  | 
|    71   *v = (sqlite_int64) x; |  | 
|    72   return (int) (q - (unsigned char *)p); |  | 
|    73 } |  | 
|    74  |  | 
|    75 static int getVarint32(const char *p, int *pi){ |  | 
|    76  sqlite_int64 i; |  | 
|    77  int ret = getVarint(p, &i); |  | 
|    78  *pi = (int) i; |  | 
|    79  assert( *pi==i ); |  | 
|    80  return ret; |  | 
|    81 } |  | 
|    82  |  | 
|    83 /*** Document lists *** |  | 
|    84  * |  | 
|    85  * A document list holds a sorted list of varint-encoded document IDs. |  | 
|    86  * |  | 
|    87  * A doclist with type DL_POSITIONS_OFFSETS is stored like this: |  | 
|    88  * |  | 
|    89  * array { |  | 
|    90  *   varint docid; |  | 
|    91  *   array { |  | 
|    92  *     varint position;     (delta from previous position plus 1, or 0 for end) |  | 
|    93  *     varint startOffset;  (delta from previous startOffset) |  | 
|    94  *     varint endOffset;    (delta from startOffset) |  | 
|    95  *   } |  | 
|    96  * } |  | 
|    97  * |  | 
|    98  * Here, array { X } means zero or more occurrences of X, adjacent in memory. |  | 
|    99  * |  | 
|   100  * A doclist with type DL_POSITIONS is like the above, but holds only docids |  | 
|   101  * and positions without offset information. |  | 
|   102  * |  | 
|   103  * A doclist with type DL_DOCIDS is like the above, but holds only docids |  | 
|   104  * without positions or offset information. |  | 
|   105  * |  | 
|   106  * On disk, every document list has positions and offsets, so we don't bother |  | 
|   107  * to serialize a doclist's type. |  | 
|   108  *  |  | 
|   109  * We don't yet delta-encode document IDs; doing so will probably be a |  | 
|   110  * modest win. |  | 
|   111  * |  | 
|   112  * NOTE(shess) I've thought of a slightly (1%) better offset encoding. |  | 
|   113  * After the first offset, estimate the next offset by using the |  | 
|   114  * current token position and the previous token position and offset, |  | 
|   115  * offset to handle some variance.  So the estimate would be |  | 
|   116  * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded |  | 
|   117  * as normal.  Offsets more than 64 chars from the estimate are |  | 
|   118  * encoded as the delta to the previous start offset + 128.  An |  | 
|   119  * additional tiny increment can be gained by using the end offset of |  | 
|   120  * the previous token to make the estimate a tiny bit more precise. |  | 
|   121 */ |  | 
|   122  |  | 
|   123 typedef enum DocListType { |  | 
|   124   DL_DOCIDS,              /* docids only */ |  | 
|   125   DL_POSITIONS,           /* docids + positions */ |  | 
|   126   DL_POSITIONS_OFFSETS    /* docids + positions + offsets */ |  | 
|   127 } DocListType; |  | 
|   128  |  | 
|   129 typedef struct DocList { |  | 
|   130   char *pData; |  | 
|   131   int nData; |  | 
|   132   DocListType iType; |  | 
|   133   int iLastPos;       /* the last position written */ |  | 
|   134   int iLastOffset;    /* the last start offset written */ |  | 
|   135 } DocList; |  | 
|   136  |  | 
|   137 /* Initialize a new DocList to hold the given data. */ |  | 
|   138 static void docListInit(DocList *d, DocListType iType, |  | 
|   139                         const char *pData, int nData){ |  | 
|   140   d->nData = nData; |  | 
|   141   if( nData>0 ){ |  | 
|   142     d->pData = malloc(nData); |  | 
|   143     memcpy(d->pData, pData, nData); |  | 
|   144   } else { |  | 
|   145     d->pData = NULL; |  | 
|   146   } |  | 
|   147   d->iType = iType; |  | 
|   148   d->iLastPos = 0; |  | 
|   149   d->iLastOffset = 0; |  | 
|   150 } |  | 
|   151  |  | 
|   152 /* Create a new dynamically-allocated DocList. */ |  | 
|   153 static DocList *docListNew(DocListType iType){ |  | 
|   154   DocList *d = (DocList *) malloc(sizeof(DocList)); |  | 
|   155   docListInit(d, iType, 0, 0); |  | 
|   156   return d; |  | 
|   157 } |  | 
|   158  |  | 
|   159 static void docListDestroy(DocList *d){ |  | 
|   160   free(d->pData); |  | 
|   161 #ifndef NDEBUG |  | 
|   162   memset(d, 0x55, sizeof(*d)); |  | 
|   163 #endif |  | 
|   164 } |  | 
|   165  |  | 
|   166 static void docListDelete(DocList *d){ |  | 
|   167   docListDestroy(d); |  | 
|   168   free(d); |  | 
|   169 } |  | 
|   170  |  | 
|   171 static char *docListEnd(DocList *d){ |  | 
|   172   return d->pData + d->nData; |  | 
|   173 } |  | 
|   174  |  | 
|   175 /* Append a varint to a DocList's data. */ |  | 
|   176 static void appendVarint(DocList *d, sqlite_int64 i){ |  | 
|   177   char c[VARINT_MAX]; |  | 
|   178   int n = putVarint(c, i); |  | 
|   179   d->pData = realloc(d->pData, d->nData + n); |  | 
|   180   memcpy(d->pData + d->nData, c, n); |  | 
|   181   d->nData += n; |  | 
|   182 } |  | 
|   183  |  | 
|   184 static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ |  | 
|   185   appendVarint(d, iDocid); |  | 
|   186   d->iLastPos = 0; |  | 
|   187 } |  | 
|   188  |  | 
|   189 /* Add a position to the last position list in a doclist. */ |  | 
|   190 static void docListAddPos(DocList *d, int iPos){ |  | 
|   191   assert( d->iType>=DL_POSITIONS ); |  | 
|   192   appendVarint(d, iPos-d->iLastPos+1); |  | 
|   193   d->iLastPos = iPos; |  | 
|   194 } |  | 
|   195  |  | 
|   196 static void docListAddPosOffset(DocList *d, int iPos, |  | 
|   197                                 int iStartOffset, int iEndOffset){ |  | 
|   198   assert( d->iType==DL_POSITIONS_OFFSETS ); |  | 
|   199   docListAddPos(d, iPos); |  | 
|   200   appendVarint(d, iStartOffset-d->iLastOffset); |  | 
|   201   d->iLastOffset = iStartOffset; |  | 
|   202   appendVarint(d, iEndOffset-iStartOffset); |  | 
|   203 } |  | 
|   204  |  | 
|   205 /* Terminate the last position list in the given doclist. */ |  | 
|   206 static void docListAddEndPos(DocList *d){ |  | 
|   207   appendVarint(d, 0); |  | 
|   208 } |  | 
|   209  |  | 
|   210 typedef struct DocListReader { |  | 
|   211   DocList *pDoclist; |  | 
|   212   char *p; |  | 
|   213   int iLastPos;    /* the last position read */ |  | 
|   214 } DocListReader; |  | 
|   215  |  | 
|   216 static void readerInit(DocListReader *r, DocList *pDoclist){ |  | 
|   217   r->pDoclist = pDoclist; |  | 
|   218   if( pDoclist!=NULL ){ |  | 
|   219     r->p = pDoclist->pData; |  | 
|   220   } |  | 
|   221   r->iLastPos = 0; |  | 
|   222 } |  | 
|   223  |  | 
|   224 static int readerAtEnd(DocListReader *pReader){ |  | 
|   225   return pReader->p >= docListEnd(pReader->pDoclist); |  | 
|   226 } |  | 
|   227  |  | 
|   228 /* Peek at the next docid without advancing the read pointer. */ |  | 
|   229 static sqlite_int64 peekDocid(DocListReader *pReader){ |  | 
|   230   sqlite_int64 ret; |  | 
|   231   assert( !readerAtEnd(pReader) ); |  | 
|   232   getVarint(pReader->p, &ret); |  | 
|   233   return ret; |  | 
|   234 } |  | 
|   235  |  | 
|   236 /* Read the next docid. */ |  | 
|   237 static sqlite_int64 readDocid(DocListReader *pReader){ |  | 
|   238   sqlite_int64 ret; |  | 
|   239   assert( !readerAtEnd(pReader) ); |  | 
|   240   pReader->p += getVarint(pReader->p, &ret); |  | 
|   241   pReader->iLastPos = 0; |  | 
|   242   return ret; |  | 
|   243 } |  | 
|   244  |  | 
|   245 /* Read the next position from a position list. |  | 
|   246  * Returns the position, or -1 at the end of the list. */ |  | 
|   247 static int readPosition(DocListReader *pReader){ |  | 
|   248   int i; |  | 
|   249   int iType = pReader->pDoclist->iType; |  | 
|   250   assert( iType>=DL_POSITIONS ); |  | 
|   251   assert( !readerAtEnd(pReader) ); |  | 
|   252  |  | 
|   253   pReader->p += getVarint32(pReader->p, &i); |  | 
|   254   if( i==0 ){ |  | 
|   255     pReader->iLastPos = -1; |  | 
|   256     return -1; |  | 
|   257   } |  | 
|   258   pReader->iLastPos += ((int) i)-1; |  | 
|   259   if( iType>=DL_POSITIONS_OFFSETS ){ |  | 
|   260     /* Skip over offsets, ignoring them for now. */ |  | 
|   261     int iStart, iEnd; |  | 
|   262     pReader->p += getVarint32(pReader->p, &iStart); |  | 
|   263     pReader->p += getVarint32(pReader->p, &iEnd); |  | 
|   264   } |  | 
|   265   return pReader->iLastPos; |  | 
|   266 } |  | 
|   267  |  | 
|   268 /* Skip past the end of a position list. */ |  | 
|   269 static void skipPositionList(DocListReader *pReader){ |  | 
|   270   while( readPosition(pReader)!=-1 ) |  | 
|   271     ; |  | 
|   272 } |  | 
|   273  |  | 
|   274 /* Skip over a docid, including its position list if the doclist has |  | 
|   275  * positions. */ |  | 
|   276 static void skipDocument(DocListReader *pReader){ |  | 
|   277   readDocid(pReader); |  | 
|   278   if( pReader->pDoclist->iType >= DL_POSITIONS ){ |  | 
|   279     skipPositionList(pReader); |  | 
|   280   } |  | 
|   281 } |  | 
|   282  |  | 
|   283 static sqlite_int64 firstDocid(DocList *d){ |  | 
|   284   DocListReader r; |  | 
|   285   readerInit(&r, d); |  | 
|   286   return readDocid(&r); |  | 
|   287 } |  | 
|   288  |  | 
|   289 /* Doclist multi-tool.  Pass pUpdate==NULL to delete the indicated docid; |  | 
|   290  * otherwise pUpdate, which must contain only the single docid [iDocid], is |  | 
|   291  * inserted (if not present) or updated (if already present). */ |  | 
|   292 static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){ |  | 
|   293   int modified = 0; |  | 
|   294   DocListReader reader; |  | 
|   295   char *p; |  | 
|   296  |  | 
|   297   if( pUpdate!=NULL ){ |  | 
|   298     assert( d->iType==pUpdate->iType); |  | 
|   299     assert( iDocid==firstDocid(pUpdate) ); |  | 
|   300   } |  | 
|   301  |  | 
|   302   readerInit(&reader, d); |  | 
|   303   while( !readerAtEnd(&reader) && peekDocid(&reader)<iDocid ){ |  | 
|   304     skipDocument(&reader); |  | 
|   305   } |  | 
|   306  |  | 
|   307   p = reader.p; |  | 
|   308   /* Delete if there is a matching element. */ |  | 
|   309   if( !readerAtEnd(&reader) && iDocid==peekDocid(&reader) ){ |  | 
|   310     skipDocument(&reader); |  | 
|   311     memmove(p, reader.p, docListEnd(d) - reader.p); |  | 
|   312     d->nData -= (reader.p - p); |  | 
|   313     modified = 1; |  | 
|   314   } |  | 
|   315  |  | 
|   316   /* Insert if indicated. */ |  | 
|   317   if( pUpdate!=NULL ){ |  | 
|   318     int iDoclist = p-d->pData; |  | 
|   319     docListAddEndPos(pUpdate); |  | 
|   320  |  | 
|   321     d->pData = realloc(d->pData, d->nData+pUpdate->nData); |  | 
|   322     p = d->pData + iDoclist; |  | 
|   323  |  | 
|   324     memmove(p+pUpdate->nData, p, docListEnd(d) - p); |  | 
|   325     memcpy(p, pUpdate->pData, pUpdate->nData); |  | 
|   326     d->nData += pUpdate->nData; |  | 
|   327     modified = 1; |  | 
|   328   } |  | 
|   329  |  | 
|   330   return modified; |  | 
|   331 } |  | 
|   332  |  | 
|   333 /* Split the second half of doclist d into a separate doclist d2.  Returns 1 |  | 
|   334  * if successful, or 0 if d contains a single document and hence can't be |  | 
|   335  * split. */ |  | 
|   336 static int docListSplit(DocList *d, DocList *d2){ |  | 
|   337   const char *pSplitPoint = d->pData + d->nData / 2; |  | 
|   338   DocListReader reader; |  | 
|   339  |  | 
|   340   readerInit(&reader, d); |  | 
|   341   while( reader.p<pSplitPoint ){ |  | 
|   342     skipDocument(&reader); |  | 
|   343   } |  | 
|   344   if( readerAtEnd(&reader) ) return 0; |  | 
|   345   docListInit(d2, d->iType, reader.p, docListEnd(d) - reader.p); |  | 
|   346   d->nData = reader.p - d->pData; |  | 
|   347   d->pData = realloc(d->pData, d->nData); |  | 
|   348   return 1; |  | 
|   349 } |  | 
|   350  |  | 
|   351 /* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked |  | 
|   352  * on-disk doclist, resulting in another in-memory DocList [out].  [in] |  | 
|   353  * and [out] may or may not store position information according to the |  | 
|   354  * caller's wishes.  The on-disk doclist always comes with positions. |  | 
|   355  * |  | 
|   356  * The caller must read each chunk of the on-disk doclist in succession and |  | 
|   357  * pass it to mergeBlock(). |  | 
|   358  * |  | 
|   359  * If [in] has positions, then the merge output contains only documents with |  | 
|   360  * matching positions in the two input doclists.  If [in] does not have |  | 
|   361  * positions, then the merge output contains all documents common to the two |  | 
|   362  * input doclists. |  | 
|   363  * |  | 
|   364  * If [in] is NULL, then the on-disk doclist is copied to [out] directly. |  | 
|   365  * |  | 
|   366  * A merge is performed using an integer [iOffset] provided by the caller. |  | 
|   367  * [iOffset] is subtracted from each position in the on-disk doclist for the |  | 
|   368  * purpose of position comparison; this is helpful in implementing phrase |  | 
|   369  * searches. |  | 
|   370  * |  | 
|   371  * A DocListMerge is not yet able to propagate offsets through query |  | 
|   372  * processing; we should add that capability soon. |  | 
|   373 */ |  | 
|   374 typedef struct DocListMerge { |  | 
|   375   DocListReader in; |  | 
|   376   DocList *pOut; |  | 
|   377   int iOffset; |  | 
|   378 } DocListMerge; |  | 
|   379  |  | 
|   380 static void mergeInit(DocListMerge *m, |  | 
|   381                       DocList *pIn, int iOffset, DocList *pOut){ |  | 
|   382   readerInit(&m->in, pIn); |  | 
|   383   m->pOut = pOut; |  | 
|   384   m->iOffset = iOffset; |  | 
|   385  |  | 
|   386   /* can't handle offsets yet */ |  | 
|   387   assert( pIn==NULL || pIn->iType <= DL_POSITIONS ); |  | 
|   388   assert( pOut->iType <= DL_POSITIONS ); |  | 
|   389 } |  | 
|   390  |  | 
|   391 /* A helper function for mergeBlock(), below.  Merge the position lists |  | 
|   392  * pointed to by m->in and pBlockReader. |  | 
|   393  * If the merge matches, write [iDocid] to m->pOut; if m->pOut |  | 
|   394  * has positions then write all matching positions as well. */ |  | 
|   395 static void mergePosList(DocListMerge *m, sqlite_int64 iDocid, |  | 
|   396                   DocListReader *pBlockReader){ |  | 
|   397   int block_pos = readPosition(pBlockReader); |  | 
|   398   int in_pos = readPosition(&m->in); |  | 
|   399   int match = 0; |  | 
|   400   while( block_pos!=-1 || in_pos!=-1 ){ |  | 
|   401     if( block_pos-m->iOffset==in_pos ){ |  | 
|   402       if( !match ){ |  | 
|   403         docListAddDocid(m->pOut, iDocid); |  | 
|   404         match = 1; |  | 
|   405       } |  | 
|   406       if( m->pOut->iType >= DL_POSITIONS ){ |  | 
|   407         docListAddPos(m->pOut, in_pos); |  | 
|   408       } |  | 
|   409       block_pos = readPosition(pBlockReader); |  | 
|   410       in_pos = readPosition(&m->in); |  | 
|   411     } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffset<in_pos) ){ |  | 
|   412       block_pos = readPosition(pBlockReader); |  | 
|   413     } else { |  | 
|   414       in_pos = readPosition(&m->in); |  | 
|   415     } |  | 
|   416   } |  | 
|   417   if( m->pOut->iType >= DL_POSITIONS && match ){ |  | 
|   418     docListAddEndPos(m->pOut); |  | 
|   419   } |  | 
|   420 } |  | 
|   421  |  | 
|   422 /* Merge one block of an on-disk doclist into a DocListMerge. */ |  | 
|   423 static void mergeBlock(DocListMerge *m, DocList *pBlock){ |  | 
|   424   DocListReader blockReader; |  | 
|   425   assert( pBlock->iType >= DL_POSITIONS ); |  | 
|   426   readerInit(&blockReader, pBlock); |  | 
|   427   while( !readerAtEnd(&blockReader) ){ |  | 
|   428     sqlite_int64 iDocid = readDocid(&blockReader); |  | 
|   429     if( m->in.pDoclist!=NULL ){ |  | 
|   430       while( 1 ){ |  | 
|   431         if( readerAtEnd(&m->in) ) return;  /* nothing more to merge */ |  | 
|   432         if( peekDocid(&m->in)>=iDocid ) break; |  | 
|   433         skipDocument(&m->in); |  | 
|   434       } |  | 
|   435       if( peekDocid(&m->in)>iDocid ){  /* [pIn] has no match with iDocid */ |  | 
|   436         skipPositionList(&blockReader);  /* skip this docid in the block */ |  | 
|   437         continue; |  | 
|   438       } |  | 
|   439       readDocid(&m->in); |  | 
|   440     } |  | 
|   441     /* We have a document match. */ |  | 
|   442     if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){ |  | 
|   443       /* We don't need to do a poslist merge. */ |  | 
|   444       docListAddDocid(m->pOut, iDocid); |  | 
|   445       if( m->pOut->iType >= DL_POSITIONS ){ |  | 
|   446         /* Copy all positions to the output doclist. */ |  | 
|   447         while( 1 ){ |  | 
|   448           int pos = readPosition(&blockReader); |  | 
|   449           if( pos==-1 ) break; |  | 
|   450           docListAddPos(m->pOut, pos); |  | 
|   451         } |  | 
|   452         docListAddEndPos(m->pOut); |  | 
|   453       } else skipPositionList(&blockReader); |  | 
|   454       continue; |  | 
|   455     } |  | 
|   456     mergePosList(m, iDocid, &blockReader); |  | 
|   457   } |  | 
|   458 } |  | 
|   459  |  | 
|   460 static char *string_dup_n(const char *s, int n){ |  | 
|   461   char *str = malloc(n + 1); |  | 
|   462   memcpy(str, s, n); |  | 
|   463   str[n] = '\0'; |  | 
|   464   return str; |  | 
|   465 } |  | 
|   466  |  | 
|   467 /* Duplicate a string; the caller must free() the returned string. |  | 
|   468  * (We don't use strdup() since it's not part of the standard C library and |  | 
|   469  * may not be available everywhere.) */ |  | 
|   470 static char *string_dup(const char *s){ |  | 
|   471   return string_dup_n(s, strlen(s)); |  | 
|   472 } |  | 
|   473  |  | 
|   474 /* Format a string, replacing each occurrence of the % character with |  | 
|   475  * zName.  This may be more convenient than sqlite_mprintf() |  | 
|   476  * when one string is used repeatedly in a format string. |  | 
|   477  * The caller must free() the returned string. */ |  | 
|   478 static char *string_format(const char *zFormat, const char *zName){ |  | 
|   479   const char *p; |  | 
|   480   size_t len = 0; |  | 
|   481   size_t nName = strlen(zName); |  | 
|   482   char *result; |  | 
|   483   char *r; |  | 
|   484  |  | 
|   485   /* first compute length needed */ |  | 
|   486   for(p = zFormat ; *p ; ++p){ |  | 
|   487     len += (*p=='%' ? nName : 1); |  | 
|   488   } |  | 
|   489   len += 1;  /* for null terminator */ |  | 
|   490  |  | 
|   491   r = result = malloc(len); |  | 
|   492   for(p = zFormat; *p; ++p){ |  | 
|   493     if( *p=='%' ){ |  | 
|   494       memcpy(r, zName, nName); |  | 
|   495       r += nName; |  | 
|   496     } else { |  | 
|   497       *r++ = *p; |  | 
|   498     } |  | 
|   499   } |  | 
|   500   *r++ = '\0'; |  | 
|   501   assert( r == result + len ); |  | 
|   502   return result; |  | 
|   503 } |  | 
|   504  |  | 
|   505 static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){ |  | 
|   506   char *zCommand = string_format(zFormat, zName); |  | 
|   507   int rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); |  | 
|   508   free(zCommand); |  | 
|   509   return rc; |  | 
|   510 } |  | 
|   511  |  | 
|   512 static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt, |  | 
|   513                 const char *zFormat){ |  | 
|   514   char *zCommand = string_format(zFormat, zName); |  | 
|   515   int rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); |  | 
|   516   free(zCommand); |  | 
|   517   return rc; |  | 
|   518 } |  | 
|   519  |  | 
|   520 /* end utility functions */ |  | 
|   521  |  | 
|   522 #define QUERY_GENERIC 0 |  | 
|   523 #define QUERY_FULLTEXT 1 |  | 
|   524  |  | 
|   525 #define CHUNK_MAX 1024 |  | 
|   526  |  | 
|   527 typedef enum fulltext_statement { |  | 
|   528   CONTENT_INSERT_STMT, |  | 
|   529   CONTENT_SELECT_STMT, |  | 
|   530   CONTENT_DELETE_STMT, |  | 
|   531  |  | 
|   532   TERM_SELECT_STMT, |  | 
|   533   TERM_CHUNK_SELECT_STMT, |  | 
|   534   TERM_INSERT_STMT, |  | 
|   535   TERM_UPDATE_STMT, |  | 
|   536   TERM_DELETE_STMT, |  | 
|   537  |  | 
|   538   MAX_STMT                     /* Always at end! */ |  | 
|   539 } fulltext_statement; |  | 
|   540  |  | 
|   541 /* These must exactly match the enum above. */ |  | 
|   542 /* TODO(adam): Is there some risk that a statement (in particular, |  | 
|   543 ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a |  | 
|   544 ** query joins a virtual table to itself?  If so perhaps we should |  | 
|   545 ** move some of these to the cursor object. |  | 
|   546 */ |  | 
|   547 static const char *fulltext_zStatement[MAX_STMT] = { |  | 
|   548   /* CONTENT_INSERT */ "insert into %_content (rowid, content) values (?, ?)", |  | 
|   549   /* CONTENT_SELECT */ "select content from %_content where rowid = ?", |  | 
|   550   /* CONTENT_DELETE */ "delete from %_content where rowid = ?", |  | 
|   551  |  | 
|   552   /* TERM_SELECT */ |  | 
|   553   "select rowid, doclist from %_term where term = ? and first = ?", |  | 
|   554   /* TERM_CHUNK_SELECT */ |  | 
|   555   "select max(first) from %_term where term = ? and first <= ?", |  | 
|   556   /* TERM_INSERT */ |  | 
|   557   "insert into %_term (term, first, doclist) values (?, ?, ?)", |  | 
|   558   /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", |  | 
|   559   /* TERM_DELETE */ "delete from %_term where rowid = ?", |  | 
|   560 }; |  | 
|   561  |  | 
|   562 typedef struct fulltext_vtab { |  | 
|   563   sqlite3_vtab base; |  | 
|   564   sqlite3 *db; |  | 
|   565   const char *zName;               /* virtual table name */ |  | 
|   566   sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */ |  | 
|   567  |  | 
|   568   /* Precompiled statements which we keep as long as the table is |  | 
|   569   ** open. |  | 
|   570   */ |  | 
|   571   sqlite3_stmt *pFulltextStatements[MAX_STMT]; |  | 
|   572 } fulltext_vtab; |  | 
|   573  |  | 
|   574 typedef struct fulltext_cursor { |  | 
|   575   sqlite3_vtab_cursor base; |  | 
|   576   int iCursorType;  /* QUERY_GENERIC or QUERY_FULLTEXT */ |  | 
|   577  |  | 
|   578   sqlite3_stmt *pStmt; |  | 
|   579  |  | 
|   580   int eof; |  | 
|   581  |  | 
|   582   /* The following is used only when iCursorType == QUERY_FULLTEXT. */ |  | 
|   583   DocListReader result; |  | 
|   584 } fulltext_cursor; |  | 
|   585  |  | 
|   586 static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ |  | 
|   587   return (fulltext_vtab *) c->base.pVtab; |  | 
|   588 } |  | 
|   589  |  | 
|   590 static sqlite3_module fulltextModule;   /* forward declaration */ |  | 
|   591  |  | 
|   592 /* Puts a freshly-prepared statement determined by iStmt in *ppStmt. |  | 
|   593 ** If the indicated statement has never been prepared, it is prepared |  | 
|   594 ** and cached, otherwise the cached version is reset. |  | 
|   595 */ |  | 
|   596 static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, |  | 
|   597                              sqlite3_stmt **ppStmt){ |  | 
|   598   assert( iStmt<MAX_STMT ); |  | 
|   599   if( v->pFulltextStatements[iStmt]==NULL ){ |  | 
|   600     int rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt], |  | 
|   601                          fulltext_zStatement[iStmt]); |  | 
|   602     if( rc!=SQLITE_OK ) return rc; |  | 
|   603   } else { |  | 
|   604     int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); |  | 
|   605     if( rc!=SQLITE_OK ) return rc; |  | 
|   606   } |  | 
|   607  |  | 
|   608   *ppStmt = v->pFulltextStatements[iStmt]; |  | 
|   609   return SQLITE_OK; |  | 
|   610 } |  | 
|   611  |  | 
|   612 /* Step the indicated statement, handling errors SQLITE_BUSY (by |  | 
|   613 ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring |  | 
|   614 ** bindings to the new statement). |  | 
|   615 ** TODO(adam): We should extend this function so that it can work with |  | 
|   616 ** statements declared locally, not only globally cached statements. |  | 
|   617 */ |  | 
|   618 static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, |  | 
|   619                               sqlite3_stmt **ppStmt){ |  | 
|   620   int rc; |  | 
|   621   sqlite3_stmt *s = *ppStmt; |  | 
|   622   assert( iStmt<MAX_STMT ); |  | 
|   623   assert( s==v->pFulltextStatements[iStmt] ); |  | 
|   624  |  | 
|   625   while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ |  | 
|   626     sqlite3_stmt *pNewStmt; |  | 
|   627  |  | 
|   628     if( rc==SQLITE_BUSY ) continue; |  | 
|   629     if( rc!=SQLITE_ERROR ) return rc; |  | 
|   630  |  | 
|   631     rc = sqlite3_reset(s); |  | 
|   632     if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR; |  | 
|   633  |  | 
|   634     v->pFulltextStatements[iStmt] = NULL;   /* Still in s */ |  | 
|   635     rc = sql_get_statement(v, iStmt, &pNewStmt); |  | 
|   636     if( rc!=SQLITE_OK ) goto err; |  | 
|   637     *ppStmt = pNewStmt; |  | 
|   638  |  | 
|   639     rc = sqlite3_transfer_bindings(s, pNewStmt); |  | 
|   640     if( rc!=SQLITE_OK ) goto err; |  | 
|   641  |  | 
|   642     rc = sqlite3_finalize(s); |  | 
|   643     if( rc!=SQLITE_OK ) return rc; |  | 
|   644     s = pNewStmt; |  | 
|   645   } |  | 
|   646   return rc; |  | 
|   647  |  | 
|   648  err: |  | 
|   649   sqlite3_finalize(s); |  | 
|   650   return rc; |  | 
|   651 } |  | 
|   652  |  | 
|   653 /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. |  | 
|   654 ** Useful for statements like UPDATE, where we expect no results. |  | 
|   655 */ |  | 
|   656 static int sql_single_step_statement(fulltext_vtab *v, |  | 
|   657                                      fulltext_statement iStmt, |  | 
|   658                                      sqlite3_stmt **ppStmt){ |  | 
|   659   int rc = sql_step_statement(v, iStmt, ppStmt); |  | 
|   660   return (rc==SQLITE_DONE) ? SQLITE_OK : rc; |  | 
|   661 } |  | 
|   662  |  | 
|   663 /* insert into %_content (rowid, content) values ([rowid], [zContent]) */ |  | 
|   664 static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, |  | 
|   665                           const char *zContent, int nContent){ |  | 
|   666   sqlite3_stmt *s; |  | 
|   667   int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); |  | 
|   668   if( rc!=SQLITE_OK ) return rc; |  | 
|   669  |  | 
|   670   rc = sqlite3_bind_value(s, 1, rowid); |  | 
|   671   if( rc!=SQLITE_OK ) return rc; |  | 
|   672  |  | 
|   673   rc = sqlite3_bind_text(s, 2, zContent, nContent, SQLITE_STATIC); |  | 
|   674   if( rc!=SQLITE_OK ) return rc; |  | 
|   675  |  | 
|   676   return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); |  | 
|   677 } |  | 
|   678  |  | 
|   679 /* select content from %_content where rowid = [iRow] |  | 
|   680  * The caller must delete the returned string. */ |  | 
|   681 static int content_select(fulltext_vtab *v, sqlite_int64 iRow, |  | 
|   682                           char **pzContent){ |  | 
|   683   sqlite3_stmt *s; |  | 
|   684   int rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); |  | 
|   685   if( rc!=SQLITE_OK ) return rc; |  | 
|   686  |  | 
|   687   rc = sqlite3_bind_int64(s, 1, iRow); |  | 
|   688   if( rc!=SQLITE_OK ) return rc; |  | 
|   689  |  | 
|   690   rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); |  | 
|   691   if( rc!=SQLITE_ROW ) return rc; |  | 
|   692  |  | 
|   693   *pzContent = string_dup((const char *)sqlite3_column_text(s, 0)); |  | 
|   694  |  | 
|   695   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|   696    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|   697   rc = sqlite3_step(s); |  | 
|   698   if( rc==SQLITE_DONE ) return SQLITE_OK; |  | 
|   699  |  | 
|   700   free(*pzContent); |  | 
|   701   return rc; |  | 
|   702 } |  | 
|   703  |  | 
|   704 /* delete from %_content where rowid = [iRow ] */ |  | 
|   705 static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ |  | 
|   706   sqlite3_stmt *s; |  | 
|   707   int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); |  | 
|   708   if( rc!=SQLITE_OK ) return rc; |  | 
|   709  |  | 
|   710   rc = sqlite3_bind_int64(s, 1, iRow); |  | 
|   711   if( rc!=SQLITE_OK ) return rc; |  | 
|   712  |  | 
|   713   return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); |  | 
|   714 } |  | 
|   715  |  | 
|   716 /* select rowid, doclist from %_term where term = [zTerm] and first = [iFirst] |  | 
|   717  * If found, returns SQLITE_OK; the caller must free the returned doclist. |  | 
|   718  * If no rows found, returns SQLITE_ERROR. */ |  | 
|   719 static int term_select(fulltext_vtab *v, const char *zTerm, int nTerm, |  | 
|   720                        sqlite_int64 iFirst, |  | 
|   721                        sqlite_int64 *rowid, |  | 
|   722                        DocList *out){ |  | 
|   723   sqlite3_stmt *s; |  | 
|   724   int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); |  | 
|   725   if( rc!=SQLITE_OK ) return rc; |  | 
|   726  |  | 
|   727   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_TRANSIENT); |  | 
|   728   if( rc!=SQLITE_OK ) return rc; |  | 
|   729  |  | 
|   730   rc = sqlite3_bind_int64(s, 2, iFirst); |  | 
|   731   if( rc!=SQLITE_OK ) return rc; |  | 
|   732  |  | 
|   733   rc = sql_step_statement(v, TERM_SELECT_STMT, &s); |  | 
|   734   if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |  | 
|   735  |  | 
|   736   *rowid = sqlite3_column_int64(s, 0); |  | 
|   737   docListInit(out, DL_POSITIONS_OFFSETS, |  | 
|   738               sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); |  | 
|   739  |  | 
|   740   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|   741    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|   742   rc = sqlite3_step(s); |  | 
|   743   return rc==SQLITE_DONE ? SQLITE_OK : rc; |  | 
|   744 } |  | 
|   745  |  | 
|   746 /* select max(first) from %_term where term = [zTerm] and first <= [iFirst] |  | 
|   747  * If found, returns SQLITE_ROW and result in *piResult; if the query returns |  | 
|   748  * NULL (meaning no row found) returns SQLITE_DONE. |  | 
|   749  */ |  | 
|   750 static int term_chunk_select(fulltext_vtab *v, const char *zTerm, int nTerm, |  | 
|   751                            sqlite_int64 iFirst, sqlite_int64 *piResult){ |  | 
|   752   sqlite3_stmt *s; |  | 
|   753   int rc = sql_get_statement(v, TERM_CHUNK_SELECT_STMT, &s); |  | 
|   754   if( rc!=SQLITE_OK ) return rc; |  | 
|   755  |  | 
|   756   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); |  | 
|   757   if( rc!=SQLITE_OK ) return rc; |  | 
|   758  |  | 
|   759   rc = sqlite3_bind_int64(s, 2, iFirst); |  | 
|   760   if( rc!=SQLITE_OK ) return rc; |  | 
|   761  |  | 
|   762   rc = sql_step_statement(v, TERM_CHUNK_SELECT_STMT, &s); |  | 
|   763   if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |  | 
|   764  |  | 
|   765   switch( sqlite3_column_type(s, 0) ){ |  | 
|   766     case SQLITE_NULL: |  | 
|   767       rc = SQLITE_DONE; |  | 
|   768       break; |  | 
|   769     case SQLITE_INTEGER: |  | 
|   770      *piResult = sqlite3_column_int64(s, 0); |  | 
|   771      break; |  | 
|   772     default: |  | 
|   773       return SQLITE_ERROR; |  | 
|   774   } |  | 
|   775   /* We expect only one row.  We must execute another sqlite3_step() |  | 
|   776    * to complete the iteration; otherwise the table will remain locked. */ |  | 
|   777   if( sqlite3_step(s) != SQLITE_DONE ) return SQLITE_ERROR; |  | 
|   778   return rc; |  | 
|   779 } |  | 
|   780  |  | 
|   781 /* insert into %_term (term, first, doclist) |  | 
|   782                values ([zTerm], [iFirst], [doclist]) */ |  | 
|   783 static int term_insert(fulltext_vtab *v, const char *zTerm, int nTerm, |  | 
|   784                        sqlite_int64 iFirst, DocList *doclist){ |  | 
|   785   sqlite3_stmt *s; |  | 
|   786   int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); |  | 
|   787   if( rc!=SQLITE_OK ) return rc; |  | 
|   788  |  | 
|   789   rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); |  | 
|   790   if( rc!=SQLITE_OK ) return rc; |  | 
|   791  |  | 
|   792   rc = sqlite3_bind_int64(s, 2, iFirst); |  | 
|   793   if( rc!=SQLITE_OK ) return rc; |  | 
|   794  |  | 
|   795   rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC); |  | 
|   796   if( rc!=SQLITE_OK ) return rc; |  | 
|   797  |  | 
|   798   return sql_single_step_statement(v, TERM_INSERT_STMT, &s); |  | 
|   799 } |  | 
|   800  |  | 
|   801 /* update %_term set doclist = [doclist] where rowid = [rowid] */ |  | 
|   802 static int term_update(fulltext_vtab *v, sqlite_int64 rowid, |  | 
|   803                        DocList *doclist){ |  | 
|   804   sqlite3_stmt *s; |  | 
|   805   int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); |  | 
|   806   if( rc!=SQLITE_OK ) return rc; |  | 
|   807  |  | 
|   808   rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, |  | 
|   809                          SQLITE_STATIC); |  | 
|   810   if( rc!=SQLITE_OK ) return rc; |  | 
|   811  |  | 
|   812   rc = sqlite3_bind_int64(s, 2, rowid); |  | 
|   813   if( rc!=SQLITE_OK ) return rc; |  | 
|   814  |  | 
|   815   return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); |  | 
|   816 } |  | 
|   817  |  | 
|   818 static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ |  | 
|   819   sqlite3_stmt *s; |  | 
|   820   int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); |  | 
|   821   if( rc!=SQLITE_OK ) return rc; |  | 
|   822  |  | 
|   823   rc = sqlite3_bind_int64(s, 1, rowid); |  | 
|   824   if( rc!=SQLITE_OK ) return rc; |  | 
|   825  |  | 
|   826   return sql_single_step_statement(v, TERM_DELETE_STMT, &s); |  | 
|   827 } |  | 
|   828  |  | 
|   829 static void fulltext_vtab_destroy(fulltext_vtab *v){ |  | 
|   830   int iStmt; |  | 
|   831  |  | 
|   832   for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){ |  | 
|   833     if( v->pFulltextStatements[iStmt]!=NULL ){ |  | 
|   834       sqlite3_finalize(v->pFulltextStatements[iStmt]); |  | 
|   835       v->pFulltextStatements[iStmt] = NULL; |  | 
|   836     } |  | 
|   837   } |  | 
|   838  |  | 
|   839   if( v->pTokenizer!=NULL ){ |  | 
|   840     v->pTokenizer->pModule->xDestroy(v->pTokenizer); |  | 
|   841     v->pTokenizer = NULL; |  | 
|   842   } |  | 
|   843  |  | 
|   844   free((void *) v->zName); |  | 
|   845   free(v); |  | 
|   846 } |  | 
|   847  |  | 
|   848 /* Current interface: |  | 
|   849 ** argv[0] - module name |  | 
|   850 ** argv[1] - database name |  | 
|   851 ** argv[2] - table name |  | 
|   852 ** argv[3] - tokenizer name (optional, a sensible default is provided) |  | 
|   853 ** argv[4..] - passed to tokenizer (optional based on tokenizer) |  | 
|   854 **/ |  | 
|   855 static int fulltextConnect(sqlite3 *db, void *pAux, int argc, char **argv, |  | 
|   856                            sqlite3_vtab **ppVTab){ |  | 
|   857   int rc; |  | 
|   858   fulltext_vtab *v; |  | 
|   859   sqlite3_tokenizer_module *m = NULL; |  | 
|   860  |  | 
|   861   assert( argc>=3 ); |  | 
|   862   v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); |  | 
|   863   /* sqlite will initialize v->base */ |  | 
|   864   v->db = db; |  | 
|   865   v->zName = string_dup(argv[2]); |  | 
|   866   v->pTokenizer = NULL; |  | 
|   867  |  | 
|   868   if( argc==3 ){ |  | 
|   869     get_simple_tokenizer_module(&m); |  | 
|   870   } else { |  | 
|   871     /* TODO(shess) For now, add new tokenizers as else if clauses. */ |  | 
|   872     if( !strcmp(argv[3], "simple") ){ |  | 
|   873       get_simple_tokenizer_module(&m); |  | 
|   874     } else { |  | 
|   875       assert( "unrecognized tokenizer"==NULL ); |  | 
|   876     } |  | 
|   877   } |  | 
|   878  |  | 
|   879   /* TODO(shess) Since tokenization impacts the index, the parameters |  | 
|   880   ** to the tokenizer need to be identical when a persistent virtual |  | 
|   881   ** table is re-created.  One solution would be a meta-table to track |  | 
|   882   ** such information in the database.  Then we could verify that the |  | 
|   883   ** information is identical on subsequent creates. |  | 
|   884   */ |  | 
|   885   /* TODO(shess) Why isn't argv already (const char **)? */ |  | 
|   886   rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer); |  | 
|   887   if( rc!=SQLITE_OK ) return rc; |  | 
|   888   v->pTokenizer->pModule = m; |  | 
|   889  |  | 
|   890   /* TODO: verify the existence of backing tables foo_content, foo_term */ |  | 
|   891  |  | 
|   892   rc = sqlite3_declare_vtab(db, "create table x(content text)"); |  | 
|   893   if( rc!=SQLITE_OK ) return rc; |  | 
|   894  |  | 
|   895   memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); |  | 
|   896  |  | 
|   897   *ppVTab = &v->base; |  | 
|   898   return SQLITE_OK; |  | 
|   899 } |  | 
|   900  |  | 
|   901 static int fulltextCreate(sqlite3 *db, void *pAux, int argc, char **argv, |  | 
|   902                           sqlite3_vtab **ppVTab){ |  | 
|   903   int rc; |  | 
|   904   assert( argc>=3 ); |  | 
|   905  |  | 
|   906   /* The %_content table holds the text of each full-text item, with |  | 
|   907   ** the rowid used as the docid. |  | 
|   908   ** |  | 
|   909   ** The %_term table maps each term to a document list blob |  | 
|   910   ** containing elements sorted by ascending docid, each element |  | 
|   911   ** encoded as: |  | 
|   912   ** |  | 
|   913   **   docid varint-encoded |  | 
|   914   **   token count varint-encoded |  | 
|   915   **   "count" token elements (poslist): |  | 
|   916   **     position varint-encoded as delta from previous position |  | 
|   917   **     start offset varint-encoded as delta from previous start offset |  | 
|   918   **     end offset varint-encoded as delta from start offset |  | 
|   919   ** |  | 
|   920   ** Additionally, doclist blobs can be chunked into multiple rows, |  | 
|   921   ** using "first" to order the blobs.  "first" is simply the first |  | 
|   922   ** docid in the blob. |  | 
|   923   */ |  | 
|   924   /* |  | 
|   925   ** NOTE(shess) That last sentence is incorrect in the face of |  | 
|   926   ** deletion, which can leave a doclist that doesn't contain the |  | 
|   927   ** first from that row.  I _believe_ this does not matter to the |  | 
|   928   ** operation of the system, but it might be reasonable to update |  | 
|   929   ** appropriately in case this assumption becomes more important. |  | 
|   930   */ |  | 
|   931   rc = sql_exec(db, argv[2], |  | 
|   932     "create table %_content(content text);" |  | 
|   933     "create table %_term(term text, first integer, doclist blob);" |  | 
|   934     "create index %_index on %_term(term, first)"); |  | 
|   935   if( rc!=SQLITE_OK ) return rc; |  | 
|   936  |  | 
|   937   return fulltextConnect(db, pAux, argc, argv, ppVTab); |  | 
|   938 } |  | 
|   939  |  | 
|   940 /* Decide how to handle an SQL query. |  | 
|   941  * At the moment, MATCH queries can include implicit boolean ANDs; we |  | 
|   942  * haven't implemented phrase searches or OR yet. */ |  | 
|   943 static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |  | 
|   944   int i; |  | 
|   945  |  | 
|   946   for(i=0; i<pInfo->nConstraint; ++i){ |  | 
|   947     const struct sqlite3_index_constraint *pConstraint; |  | 
|   948     pConstraint = &pInfo->aConstraint[i]; |  | 
|   949     if( pConstraint->iColumn==0 && |  | 
|   950         pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH && |  | 
|   951         pConstraint->usable ){   /* a full-text search */ |  | 
|   952       pInfo->aConstraintUsage[i].argvIndex = 1; |  | 
|   953       pInfo->aConstraintUsage[i].omit = 1; |  | 
|   954       pInfo->idxNum = QUERY_FULLTEXT; |  | 
|   955       pInfo->estimatedCost = 1.0;   /* an arbitrary value for now */ |  | 
|   956       return SQLITE_OK; |  | 
|   957     } |  | 
|   958   } |  | 
|   959   pInfo->idxNum = QUERY_GENERIC; |  | 
|   960   return SQLITE_OK; |  | 
|   961 } |  | 
|   962  |  | 
|   963 static int fulltextDisconnect(sqlite3_vtab *pVTab){ |  | 
|   964   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|   965   return SQLITE_OK; |  | 
|   966 } |  | 
|   967  |  | 
|   968 static int fulltextDestroy(sqlite3_vtab *pVTab){ |  | 
|   969   fulltext_vtab *v = (fulltext_vtab *)pVTab; |  | 
|   970  |  | 
|   971   int rc = sql_exec(v->db, v->zName, |  | 
|   972                     "drop table %_content; drop table %_term"); |  | 
|   973   if( rc!=SQLITE_OK ) return rc; |  | 
|   974  |  | 
|   975   fulltext_vtab_destroy((fulltext_vtab *)pVTab); |  | 
|   976   return SQLITE_OK; |  | 
|   977 } |  | 
|   978  |  | 
|   979 static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ |  | 
|   980   fulltext_cursor *c; |  | 
|   981  |  | 
|   982   c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); |  | 
|   983   /* sqlite will initialize c->base */ |  | 
|   984   *ppCursor = &c->base; |  | 
|   985  |  | 
|   986   return SQLITE_OK; |  | 
|   987 } |  | 
|   988  |  | 
|   989 static int fulltextClose(sqlite3_vtab_cursor *pCursor){ |  | 
|   990   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|   991   sqlite3_finalize(c->pStmt); |  | 
|   992   if( c->result.pDoclist!=NULL ){ |  | 
|   993     docListDelete(c->result.pDoclist); |  | 
|   994   } |  | 
|   995   free(c); |  | 
|   996   return SQLITE_OK; |  | 
|   997 } |  | 
|   998  |  | 
|   999 static int fulltextNext(sqlite3_vtab_cursor *pCursor){ |  | 
|  1000   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  1001   sqlite_int64 iDocid; |  | 
|  1002   int rc; |  | 
|  1003  |  | 
|  1004   switch( c->iCursorType ){ |  | 
|  1005     case QUERY_GENERIC: |  | 
|  1006       /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  1007       rc = sqlite3_step(c->pStmt); |  | 
|  1008       switch( rc ){ |  | 
|  1009         case SQLITE_ROW: |  | 
|  1010           c->eof = 0; |  | 
|  1011           return SQLITE_OK; |  | 
|  1012         case SQLITE_DONE: |  | 
|  1013           c->eof = 1; |  | 
|  1014           return SQLITE_OK; |  | 
|  1015         default: |  | 
|  1016           c->eof = 1; |  | 
|  1017           return rc; |  | 
|  1018       } |  | 
|  1019     case QUERY_FULLTEXT: |  | 
|  1020       rc = sqlite3_reset(c->pStmt); |  | 
|  1021       if( rc!=SQLITE_OK ) return rc; |  | 
|  1022  |  | 
|  1023       if( readerAtEnd(&c->result)){ |  | 
|  1024         c->eof = 1; |  | 
|  1025         return SQLITE_OK; |  | 
|  1026       } |  | 
|  1027       iDocid = readDocid(&c->result); |  | 
|  1028       rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); |  | 
|  1029       if( rc!=SQLITE_OK ) return rc; |  | 
|  1030       /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |  | 
|  1031       rc = sqlite3_step(c->pStmt); |  | 
|  1032       if( rc==SQLITE_ROW ){   /* the case we expect */ |  | 
|  1033         c->eof = 0; |  | 
|  1034         return SQLITE_OK; |  | 
|  1035       } |  | 
|  1036       /* an error occurred; abort */ |  | 
|  1037       return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |  | 
|  1038     default: |  | 
|  1039       assert( 0 ); |  | 
|  1040       return SQLITE_ERROR;  /* not reached */ |  | 
|  1041   } |  | 
|  1042 } |  | 
|  1043  |  | 
|  1044 static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm, |  | 
|  1045                                sqlite3_stmt **ppStmt){ |  | 
|  1046   int rc; |  | 
|  1047   if( *ppStmt ){ |  | 
|  1048     rc = sqlite3_reset(*ppStmt); |  | 
|  1049   } else { |  | 
|  1050     rc = sql_prepare(v->db, v->zName, ppStmt, |  | 
|  1051       "select doclist from %_term where term = ? order by first"); |  | 
|  1052   } |  | 
|  1053   if( rc!=SQLITE_OK ) return rc; |  | 
|  1054  |  | 
|  1055   rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT); |  | 
|  1056   if( rc!=SQLITE_OK ) return rc; |  | 
|  1057  |  | 
|  1058   return sqlite3_step(*ppStmt);   /* TODO(adamd): handle schema error */ |  | 
|  1059 } |  | 
|  1060  |  | 
|  1061 /* Read the posting list for [zTerm]; AND it with the doclist [in] to |  | 
|  1062  * produce the doclist [out], using the given offset [iOffset] for phrase |  | 
|  1063  * matching. |  | 
|  1064  * (*pSelect) is used to hold an SQLite statement used inside this function; |  | 
|  1065  * the caller should initialize *pSelect to NULL before the first call. |  | 
|  1066  */ |  | 
|  1067 static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect, |  | 
|  1068                        const char *zTerm, |  | 
|  1069                        DocList *pIn, int iOffset, DocList *out){ |  | 
|  1070   int rc; |  | 
|  1071   DocListMerge merge; |  | 
|  1072  |  | 
|  1073   if( pIn!=NULL && !pIn->nData ){ |  | 
|  1074     /* If [pIn] is already empty, there's no point in reading the |  | 
|  1075      * posting list to AND it in; return immediately. */ |  | 
|  1076       return SQLITE_OK; |  | 
|  1077   } |  | 
|  1078  |  | 
|  1079   rc = term_select_doclist(v, zTerm, -1, pSelect); |  | 
|  1080   if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; |  | 
|  1081  |  | 
|  1082   mergeInit(&merge, pIn, iOffset, out); |  | 
|  1083   while( rc==SQLITE_ROW ){ |  | 
|  1084     DocList block; |  | 
|  1085     docListInit(&block, DL_POSITIONS_OFFSETS, |  | 
|  1086                 sqlite3_column_blob(*pSelect, 0), |  | 
|  1087                 sqlite3_column_bytes(*pSelect, 0)); |  | 
|  1088     mergeBlock(&merge, &block); |  | 
|  1089     docListDestroy(&block); |  | 
|  1090  |  | 
|  1091     rc = sqlite3_step(*pSelect); |  | 
|  1092     if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){ |  | 
|  1093       return rc; |  | 
|  1094     } |  | 
|  1095   } |  | 
|  1096    |  | 
|  1097   return SQLITE_OK; |  | 
|  1098 } |  | 
|  1099  |  | 
|  1100 typedef struct QueryTerm { |  | 
|  1101   int is_phrase;    /* true if this term begins a new phrase */ |  | 
|  1102   const char *zTerm; |  | 
|  1103 } QueryTerm; |  | 
|  1104  |  | 
|  1105 /* A parsed query. |  | 
|  1106  * |  | 
|  1107  * As an example, parsing the query ["four score" years "new nation"] will |  | 
|  1108  * yield a Query with 5 terms: |  | 
|  1109  *   "four",   is_phrase = 1 |  | 
|  1110  *   "score",  is_phrase = 0 |  | 
|  1111  *   "years",  is_phrase = 1 |  | 
|  1112  *   "new",    is_phrase = 1 |  | 
|  1113  *   "nation", is_phrase = 0 |  | 
|  1114  */ |  | 
|  1115 typedef struct Query { |  | 
|  1116   int nTerms; |  | 
|  1117   QueryTerm *pTerm; |  | 
|  1118 } Query; |  | 
|  1119  |  | 
|  1120 static void query_add(Query *q, int is_phrase, const char *zTerm){ |  | 
|  1121   QueryTerm *t; |  | 
|  1122   ++q->nTerms; |  | 
|  1123   q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0])); |  | 
|  1124   t = &q->pTerm[q->nTerms - 1]; |  | 
|  1125   t->is_phrase = is_phrase; |  | 
|  1126   t->zTerm = zTerm; |  | 
|  1127 } |  | 
|  1128      |  | 
|  1129 static void query_free(Query *q){ |  | 
|  1130   int i; |  | 
|  1131   for(i = 0; i < q->nTerms; ++i){ |  | 
|  1132     free((void *) q->pTerm[i].zTerm); |  | 
|  1133   } |  | 
|  1134   free(q->pTerm); |  | 
|  1135 } |  | 
|  1136  |  | 
|  1137 static int tokenize_segment(sqlite3_tokenizer *pTokenizer, |  | 
|  1138                             const char *zQuery, int in_phrase, |  | 
|  1139                             Query *pQuery){ |  | 
|  1140   sqlite3_tokenizer_module *pModule = pTokenizer->pModule; |  | 
|  1141   sqlite3_tokenizer_cursor *pCursor; |  | 
|  1142   int is_first = 1; |  | 
|  1143    |  | 
|  1144   int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor); |  | 
|  1145   if( rc!=SQLITE_OK ) return rc; |  | 
|  1146   pCursor->pTokenizer = pTokenizer; |  | 
|  1147  |  | 
|  1148   while( 1 ){ |  | 
|  1149     const char *zToken; |  | 
|  1150     int nToken, iStartOffset, iEndOffset, dummy_pos; |  | 
|  1151  |  | 
|  1152     rc = pModule->xNext(pCursor, |  | 
|  1153                         &zToken, &nToken, |  | 
|  1154                         &iStartOffset, &iEndOffset, |  | 
|  1155                         &dummy_pos); |  | 
|  1156     if( rc!=SQLITE_OK ) break; |  | 
|  1157     query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken)); |  | 
|  1158     is_first = 0; |  | 
|  1159   } |  | 
|  1160  |  | 
|  1161   return pModule->xClose(pCursor); |  | 
|  1162 } |  | 
|  1163  |  | 
|  1164 /* Parse a query string, yielding a Query object. */ |  | 
|  1165 static int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){ |  | 
|  1166   char *zQuery1 = string_dup(zQuery); |  | 
|  1167   int in_phrase = 0; |  | 
|  1168   char *s = zQuery1; |  | 
|  1169   pQuery->nTerms = 0; |  | 
|  1170   pQuery->pTerm = NULL; |  | 
|  1171  |  | 
|  1172   while( *s ){ |  | 
|  1173     char *t = s; |  | 
|  1174     while( *t ){ |  | 
|  1175       if( *t=='"' ){ |  | 
|  1176         *t++ = '\0'; |  | 
|  1177         break; |  | 
|  1178       } |  | 
|  1179       ++t; |  | 
|  1180     } |  | 
|  1181     if( *s ){ |  | 
|  1182       tokenize_segment(v->pTokenizer, s, in_phrase, pQuery); |  | 
|  1183     } |  | 
|  1184     s = t; |  | 
|  1185     in_phrase = !in_phrase; |  | 
|  1186   } |  | 
|  1187    |  | 
|  1188   free(zQuery1); |  | 
|  1189   return SQLITE_OK; |  | 
|  1190 } |  | 
|  1191  |  | 
|  1192 /* Perform a full-text query; return a list of documents in [pResult]. */ |  | 
|  1193 static int fulltext_query(fulltext_vtab *v, const char *zQuery, |  | 
|  1194                           DocList **pResult){ |  | 
|  1195   Query q; |  | 
|  1196   int phrase_start = -1; |  | 
|  1197   int i; |  | 
|  1198   sqlite3_stmt *pSelect = NULL; |  | 
|  1199   DocList *d = NULL; |  | 
|  1200  |  | 
|  1201   int rc = parse_query(v, zQuery, &q); |  | 
|  1202   if( rc!=SQLITE_OK ) return rc; |  | 
|  1203  |  | 
|  1204   /* Merge terms. */ |  | 
|  1205   for(i = 0 ; i < q.nTerms ; ++i){ |  | 
|  1206     /* In each merge step, we need to generate positions whenever we're |  | 
|  1207      * processing a phrase which hasn't ended yet. */ |  | 
|  1208     int need_positions = i<q.nTerms-1 && !q.pTerm[i+1].is_phrase; |  | 
|  1209     DocList *next = docListNew(need_positions ? DL_POSITIONS : DL_DOCIDS); |  | 
|  1210     if( q.pTerm[i].is_phrase ){ |  | 
|  1211       phrase_start = i; |  | 
|  1212     } |  | 
|  1213     rc = query_merge(v, &pSelect, q.pTerm[i].zTerm, d, i - phrase_start, next); |  | 
|  1214     if( rc!=SQLITE_OK ) break; |  | 
|  1215     if( d!=NULL ){ |  | 
|  1216       docListDelete(d); |  | 
|  1217     } |  | 
|  1218     d = next; |  | 
|  1219   } |  | 
|  1220  |  | 
|  1221   sqlite3_finalize(pSelect); |  | 
|  1222   query_free(&q); |  | 
|  1223   *pResult = d; |  | 
|  1224   return rc; |  | 
|  1225 } |  | 
|  1226  |  | 
|  1227 static int fulltextFilter(sqlite3_vtab_cursor *pCursor, |  | 
|  1228                           int idxNum, const char *idxStr, |  | 
|  1229                           int argc, sqlite3_value **argv){ |  | 
|  1230   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  1231   fulltext_vtab *v = cursor_vtab(c); |  | 
|  1232   int rc; |  | 
|  1233   const char *zStatement; |  | 
|  1234  |  | 
|  1235   c->iCursorType = idxNum; |  | 
|  1236   switch( idxNum ){ |  | 
|  1237     case QUERY_GENERIC: |  | 
|  1238       zStatement = "select rowid, content from %_content"; |  | 
|  1239       break; |  | 
|  1240  |  | 
|  1241     case QUERY_FULLTEXT:   /* full-text search */ |  | 
|  1242     { |  | 
|  1243       const char *zQuery = (const char *)sqlite3_value_text(argv[0]); |  | 
|  1244       DocList *pResult; |  | 
|  1245       assert( argc==1 ); |  | 
|  1246       rc = fulltext_query(v, zQuery, &pResult); |  | 
|  1247       if( rc!=SQLITE_OK ) return rc; |  | 
|  1248       readerInit(&c->result, pResult); |  | 
|  1249       zStatement = "select rowid, content from %_content where rowid = ?"; |  | 
|  1250       break; |  | 
|  1251     } |  | 
|  1252  |  | 
|  1253     default: |  | 
|  1254       assert( 0 ); |  | 
|  1255   } |  | 
|  1256  |  | 
|  1257   rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement); |  | 
|  1258   if( rc!=SQLITE_OK ) return rc; |  | 
|  1259  |  | 
|  1260   return fulltextNext(pCursor); |  | 
|  1261 } |  | 
|  1262  |  | 
|  1263 static int fulltextEof(sqlite3_vtab_cursor *pCursor){ |  | 
|  1264   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  1265   return c->eof; |  | 
|  1266 } |  | 
|  1267  |  | 
|  1268 static int fulltextColumn(sqlite3_vtab_cursor *pCursor, |  | 
|  1269                           sqlite3_context *pContext, int idxCol){ |  | 
|  1270   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  1271   const char *s; |  | 
|  1272  |  | 
|  1273   assert( idxCol==0 ); |  | 
|  1274   s = (const char *) sqlite3_column_text(c->pStmt, 1); |  | 
|  1275   sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT); |  | 
|  1276  |  | 
|  1277   return SQLITE_OK; |  | 
|  1278 } |  | 
|  1279  |  | 
|  1280 static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |  | 
|  1281   fulltext_cursor *c = (fulltext_cursor *) pCursor; |  | 
|  1282  |  | 
|  1283   *pRowid = sqlite3_column_int64(c->pStmt, 0); |  | 
|  1284   return SQLITE_OK; |  | 
|  1285 } |  | 
|  1286  |  | 
|  1287 /* Build a hash table containing all terms in zText. */ |  | 
|  1288 static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer, |  | 
|  1289                        const char *zText, sqlite_int64 iDocid){ |  | 
|  1290   sqlite3_tokenizer_cursor *pCursor; |  | 
|  1291   const char *pToken; |  | 
|  1292   int nTokenBytes; |  | 
|  1293   int iStartOffset, iEndOffset, iPosition; |  | 
|  1294  |  | 
|  1295   int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); |  | 
|  1296   if( rc!=SQLITE_OK ) return rc; |  | 
|  1297  |  | 
|  1298   pCursor->pTokenizer = pTokenizer; |  | 
|  1299   HashInit(terms, HASH_STRING, 1); |  | 
|  1300   while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, |  | 
|  1301                                                &pToken, &nTokenBytes, |  | 
|  1302                                                &iStartOffset, &iEndOffset, |  | 
|  1303                                                &iPosition) ){ |  | 
|  1304     DocList *p; |  | 
|  1305  |  | 
|  1306     /* Positions can't be negative; we use -1 as a terminator internally. */ |  | 
|  1307     if( iPosition<0 ) { |  | 
|  1308       rc = SQLITE_ERROR;   |  | 
|  1309       goto err; |  | 
|  1310     } |  | 
|  1311  |  | 
|  1312     p = HashFind(terms, pToken, nTokenBytes); |  | 
|  1313     if( p==NULL ){ |  | 
|  1314       p = docListNew(DL_POSITIONS_OFFSETS); |  | 
|  1315       docListAddDocid(p, iDocid); |  | 
|  1316       HashInsert(terms, pToken, nTokenBytes, p); |  | 
|  1317     } |  | 
|  1318     docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset); |  | 
|  1319   } |  | 
|  1320  |  | 
|  1321 err: |  | 
|  1322   /* TODO(shess) Check return?  Should this be able to cause errors at |  | 
|  1323   ** this point?  Actually, same question about sqlite3_finalize(), |  | 
|  1324   ** though one could argue that failure there means that the data is |  | 
|  1325   ** not durable.  *ponder* |  | 
|  1326   */ |  | 
|  1327   pTokenizer->pModule->xClose(pCursor); |  | 
|  1328   return rc; |  | 
|  1329 } |  | 
|  1330 /* Update the %_terms table to map the term [zTerm] to the given rowid. */ |  | 
|  1331 static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm, |  | 
|  1332                              sqlite_int64 iDocid, DocList *p){ |  | 
|  1333   sqlite_int64 iFirst; |  | 
|  1334   sqlite_int64 iIndexRow; |  | 
|  1335   DocList doclist; |  | 
|  1336  |  | 
|  1337   int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); |  | 
|  1338   if( rc==SQLITE_DONE ){ |  | 
|  1339     docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0); |  | 
|  1340     if( docListUpdate(&doclist, iDocid, p) ){ |  | 
|  1341       rc = term_insert(v, zTerm, nTerm, iDocid, &doclist); |  | 
|  1342       docListDestroy(&doclist); |  | 
|  1343       return rc; |  | 
|  1344     } |  | 
|  1345     return SQLITE_OK; |  | 
|  1346   } |  | 
|  1347   if( rc!=SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  1348  |  | 
|  1349   /* This word is in the index; add this document ID to its blob. */ |  | 
|  1350  |  | 
|  1351   rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); |  | 
|  1352   if( rc!=SQLITE_OK ) return rc; |  | 
|  1353  |  | 
|  1354   if( docListUpdate(&doclist, iDocid, p) ){ |  | 
|  1355     /* If the blob is too big, split it in half. */ |  | 
|  1356     if( doclist.nData>CHUNK_MAX ){ |  | 
|  1357       DocList half; |  | 
|  1358       if( docListSplit(&doclist, &half) ){ |  | 
|  1359         rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half); |  | 
|  1360         docListDestroy(&half); |  | 
|  1361         if( rc!=SQLITE_OK ) goto err; |  | 
|  1362       } |  | 
|  1363     } |  | 
|  1364     rc = term_update(v, iIndexRow, &doclist); |  | 
|  1365   } |  | 
|  1366  |  | 
|  1367 err: |  | 
|  1368   docListDestroy(&doclist); |  | 
|  1369   return rc; |  | 
|  1370 } |  | 
|  1371  |  | 
|  1372 /* Insert a row into the full-text index; set *piRowid to be the ID of the |  | 
|  1373  * new row. */ |  | 
|  1374 static int index_insert(fulltext_vtab *v, |  | 
|  1375                         sqlite3_value *pRequestRowid, const char *zText, |  | 
|  1376                         sqlite_int64 *piRowid){ |  | 
|  1377   Hash terms;  /* maps term string -> PosList */ |  | 
|  1378   HashElem *e; |  | 
|  1379  |  | 
|  1380   int rc = content_insert(v, pRequestRowid, zText, -1); |  | 
|  1381   if( rc!=SQLITE_OK ) return rc; |  | 
|  1382   *piRowid = sqlite3_last_insert_rowid(v->db); |  | 
|  1383  |  | 
|  1384   if( !zText ) return SQLITE_OK;   /* nothing to index */ |  | 
|  1385  |  | 
|  1386   rc = build_terms(&terms, v->pTokenizer, zText, *piRowid); |  | 
|  1387   if( rc!=SQLITE_OK ) return rc; |  | 
|  1388  |  | 
|  1389   for(e=HashFirst(&terms); e; e=HashNext(e)){ |  | 
|  1390     DocList *p = HashData(e); |  | 
|  1391     rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p); |  | 
|  1392     if( rc!=SQLITE_OK ) break; |  | 
|  1393   } |  | 
|  1394  |  | 
|  1395   for(e=HashFirst(&terms); e; e=HashNext(e)){ |  | 
|  1396     DocList *p = HashData(e); |  | 
|  1397     docListDelete(p); |  | 
|  1398   } |  | 
|  1399   HashClear(&terms); |  | 
|  1400   return rc; |  | 
|  1401 } |  | 
|  1402  |  | 
|  1403 static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm, |  | 
|  1404                              sqlite_int64 iDocid){ |  | 
|  1405   sqlite_int64 iFirst; |  | 
|  1406   sqlite_int64 iIndexRow; |  | 
|  1407   DocList doclist; |  | 
|  1408  |  | 
|  1409   int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); |  | 
|  1410   if( rc!=SQLITE_ROW ) return SQLITE_ERROR; |  | 
|  1411  |  | 
|  1412   rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); |  | 
|  1413   if( rc!=SQLITE_OK ) return rc; |  | 
|  1414  |  | 
|  1415   if( docListUpdate(&doclist, iDocid, NULL) ){ |  | 
|  1416     if( doclist.nData>0 ){ |  | 
|  1417       rc = term_update(v, iIndexRow, &doclist); |  | 
|  1418     } else {  /* empty posting list */ |  | 
|  1419       rc = term_delete(v, iIndexRow); |  | 
|  1420     } |  | 
|  1421   } |  | 
|  1422   docListDestroy(&doclist); |  | 
|  1423   return rc; |  | 
|  1424 } |  | 
|  1425  |  | 
|  1426 /* Delete a row from the full-text index. */ |  | 
|  1427 static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ |  | 
|  1428   char *zText; |  | 
|  1429   Hash terms; |  | 
|  1430   HashElem *e; |  | 
|  1431  |  | 
|  1432   int rc = content_select(v, iRow, &zText); |  | 
|  1433   if( rc!=SQLITE_OK ) return rc; |  | 
|  1434  |  | 
|  1435   rc = build_terms(&terms, v->pTokenizer, zText, iRow); |  | 
|  1436   free(zText); |  | 
|  1437   if( rc!=SQLITE_OK ) return rc; |  | 
|  1438  |  | 
|  1439   for(e=HashFirst(&terms); e; e=HashNext(e)){ |  | 
|  1440     rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow); |  | 
|  1441     if( rc!=SQLITE_OK ) break; |  | 
|  1442   } |  | 
|  1443   for(e=HashFirst(&terms); e; e=HashNext(e)){ |  | 
|  1444     DocList *p = HashData(e); |  | 
|  1445     docListDelete(p); |  | 
|  1446   } |  | 
|  1447   HashClear(&terms); |  | 
|  1448  |  | 
|  1449   return content_delete(v, iRow); |  | 
|  1450 } |  | 
|  1451  |  | 
|  1452 static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, |  | 
|  1453                    sqlite_int64 *pRowid){ |  | 
|  1454   fulltext_vtab *v = (fulltext_vtab *) pVtab; |  | 
|  1455  |  | 
|  1456   if( nArg<2 ){ |  | 
|  1457     return index_delete(v, sqlite3_value_int64(ppArg[0])); |  | 
|  1458   } |  | 
|  1459  |  | 
|  1460   if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ |  | 
|  1461     return SQLITE_ERROR;   /* an update; not yet supported */ |  | 
|  1462   } |  | 
|  1463  |  | 
|  1464   assert( nArg==3 );    /* ppArg[1] = rowid, ppArg[2] = content */ |  | 
|  1465   return index_insert(v, ppArg[1], |  | 
|  1466                       (const char *)sqlite3_value_text(ppArg[2]), pRowid); |  | 
|  1467 } |  | 
|  1468  |  | 
|  1469 static sqlite3_module fulltextModule = { |  | 
|  1470   0, |  | 
|  1471   fulltextCreate, |  | 
|  1472   fulltextConnect, |  | 
|  1473   fulltextBestIndex, |  | 
|  1474   fulltextDisconnect, |  | 
|  1475   fulltextDestroy, |  | 
|  1476   fulltextOpen, |  | 
|  1477   fulltextClose, |  | 
|  1478   fulltextFilter, |  | 
|  1479   fulltextNext, |  | 
|  1480   fulltextEof, |  | 
|  1481   fulltextColumn, |  | 
|  1482   fulltextRowid, |  | 
|  1483   fulltextUpdate |  | 
|  1484 }; |  | 
|  1485  |  | 
|  1486 int fulltext_init(sqlite3 *db){ |  | 
|  1487  return sqlite3_create_module(db, "fulltext", &fulltextModule, 0); |  | 
|  1488 } |  | 
|  1489  |  | 
|  1490 #if !SQLITE_CORE |  | 
|  1491 int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, |  | 
|  1492                            const sqlite3_api_routines *pApi){ |  | 
|  1493  SQLITE_EXTENSION_INIT2(pApi) |  | 
|  1494  return fulltext_init(db); |  | 
|  1495 } |  | 
|  1496 #endif |  | 
| OLD | NEW |