Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: third_party/sqlite/src/ext/fts3/fts3_write.c

Issue 5626002: Update sqlite to 3.7.3. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/third_party/sqlite/src
Patch Set: Remove misc change. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file is part of the SQLite FTS3 extension module. Specifically,
14 ** this file contains code to insert, update and delete rows from FTS3
15 ** tables. It also contains code to merge FTS3 b-tree segments. Some
16 ** of the sub-routines used to merge segments are also used by the query
17 ** code in fts3.c.
18 */
19
20 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
21
22 #include "fts3Int.h"
23 #include <string.h>
24 #include <assert.h>
25 #include <stdlib.h>
26
27 typedef struct PendingList PendingList;
28 typedef struct SegmentNode SegmentNode;
29 typedef struct SegmentWriter SegmentWriter;
30
31 /*
32 ** Data structure used while accumulating terms in the pending-terms hash
33 ** table. The hash table entry maps from term (a string) to a malloc'd
34 ** instance of this structure.
35 */
36 struct PendingList {
37 int nData;
38 char *aData;
39 int nSpace;
40 sqlite3_int64 iLastDocid;
41 sqlite3_int64 iLastCol;
42 sqlite3_int64 iLastPos;
43 };
44
45 /*
46 ** An instance of this structure is used to iterate through the terms on
47 ** a contiguous set of segment b-tree leaf nodes. Although the details of
48 ** this structure are only manipulated by code in this file, opaque handles
49 ** of type Fts3SegReader* are also used by code in fts3.c to iterate through
50 ** terms when querying the full-text index. See functions:
51 **
52 ** sqlite3Fts3SegReaderNew()
53 ** sqlite3Fts3SegReaderFree()
54 ** sqlite3Fts3SegReaderIterate()
55 **
56 ** Methods used to manipulate Fts3SegReader structures:
57 **
58 ** fts3SegReaderNext()
59 ** fts3SegReaderFirstDocid()
60 ** fts3SegReaderNextDocid()
61 */
62 struct Fts3SegReader {
63 int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
64 sqlite3_int64 iStartBlock;
65 sqlite3_int64 iEndBlock;
66 sqlite3_stmt *pStmt; /* SQL Statement to access leaf nodes */
67 char *aNode; /* Pointer to node data (or NULL) */
68 int nNode; /* Size of buffer at aNode (or 0) */
69 int nTermAlloc; /* Allocated size of zTerm buffer */
70 Fts3HashElem **ppNextElem;
71
72 /* Variables set by fts3SegReaderNext(). These may be read directly
73 ** by the caller. They are valid from the time SegmentReaderNew() returns
74 ** until SegmentReaderNext() returns something other than SQLITE_OK
75 ** (i.e. SQLITE_DONE).
76 */
77 int nTerm; /* Number of bytes in current term */
78 char *zTerm; /* Pointer to current term */
79 char *aDoclist; /* Pointer to doclist of current entry */
80 int nDoclist; /* Size of doclist in current entry */
81
82 /* The following variables are used to iterate through the current doclist */
83 char *pOffsetList;
84 sqlite3_int64 iDocid;
85 };
86
87 #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
88
89 /*
90 ** An instance of this structure is used to create a segment b-tree in the
91 ** database. The internal details of this type are only accessed by the
92 ** following functions:
93 **
94 ** fts3SegWriterAdd()
95 ** fts3SegWriterFlush()
96 ** fts3SegWriterFree()
97 */
98 struct SegmentWriter {
99 SegmentNode *pTree; /* Pointer to interior tree structure */
100 sqlite3_int64 iFirst; /* First slot in %_segments written */
101 sqlite3_int64 iFree; /* Next free slot in %_segments */
102 char *zTerm; /* Pointer to previous term buffer */
103 int nTerm; /* Number of bytes in zTerm */
104 int nMalloc; /* Size of malloc'd buffer at zMalloc */
105 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
106 int nSize; /* Size of allocation at aData */
107 int nData; /* Bytes of data in aData */
108 char *aData; /* Pointer to block from malloc() */
109 };
110
111 /*
112 ** Type SegmentNode is used by the following three functions to create
113 ** the interior part of the segment b+-tree structures (everything except
114 ** the leaf nodes). These functions and type are only ever used by code
115 ** within the fts3SegWriterXXX() family of functions described above.
116 **
117 ** fts3NodeAddTerm()
118 ** fts3NodeWrite()
119 ** fts3NodeFree()
120 */
121 struct SegmentNode {
122 SegmentNode *pParent; /* Parent node (or NULL for root node) */
123 SegmentNode *pRight; /* Pointer to right-sibling */
124 SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */
125 int nEntry; /* Number of terms written to node so far */
126 char *zTerm; /* Pointer to previous term buffer */
127 int nTerm; /* Number of bytes in zTerm */
128 int nMalloc; /* Size of malloc'd buffer at zMalloc */
129 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
130 int nData; /* Bytes of valid data so far */
131 char *aData; /* Node data */
132 };
133
134 /*
135 ** Valid values for the second argument to fts3SqlStmt().
136 */
137 #define SQL_DELETE_CONTENT 0
138 #define SQL_IS_EMPTY 1
139 #define SQL_DELETE_ALL_CONTENT 2
140 #define SQL_DELETE_ALL_SEGMENTS 3
141 #define SQL_DELETE_ALL_SEGDIR 4
142 #define SQL_DELETE_ALL_DOCSIZE 5
143 #define SQL_DELETE_ALL_STAT 6
144 #define SQL_SELECT_CONTENT_BY_ROWID 7
145 #define SQL_NEXT_SEGMENT_INDEX 8
146 #define SQL_INSERT_SEGMENTS 9
147 #define SQL_NEXT_SEGMENTS_ID 10
148 #define SQL_INSERT_SEGDIR 11
149 #define SQL_SELECT_LEVEL 12
150 #define SQL_SELECT_ALL_LEVEL 13
151 #define SQL_SELECT_LEVEL_COUNT 14
152 #define SQL_SELECT_SEGDIR_COUNT_MAX 15
153 #define SQL_DELETE_SEGDIR_BY_LEVEL 16
154 #define SQL_DELETE_SEGMENTS_RANGE 17
155 #define SQL_CONTENT_INSERT 18
156 #define SQL_GET_BLOCK 19
157 #define SQL_DELETE_DOCSIZE 20
158 #define SQL_REPLACE_DOCSIZE 21
159 #define SQL_SELECT_DOCSIZE 22
160 #define SQL_SELECT_DOCTOTAL 23
161 #define SQL_REPLACE_DOCTOTAL 24
162
163 /*
164 ** This function is used to obtain an SQLite prepared statement handle
165 ** for the statement identified by the second argument. If successful,
166 ** *pp is set to the requested statement handle and SQLITE_OK returned.
167 ** Otherwise, an SQLite error code is returned and *pp is set to 0.
168 **
169 ** If argument apVal is not NULL, then it must point to an array with
170 ** at least as many entries as the requested statement has bound
171 ** parameters. The values are bound to the statements parameters before
172 ** returning.
173 */
174 static int fts3SqlStmt(
175 Fts3Table *p, /* Virtual table handle */
176 int eStmt, /* One of the SQL_XXX constants above */
177 sqlite3_stmt **pp, /* OUT: Statement handle */
178 sqlite3_value **apVal /* Values to bind to statement */
179 ){
180 const char *azSql[] = {
181 /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?",
182 /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)",
183 /* 2 */ "DELETE FROM %Q.'%q_content'",
184 /* 3 */ "DELETE FROM %Q.'%q_segments'",
185 /* 4 */ "DELETE FROM %Q.'%q_segdir'",
186 /* 5 */ "DELETE FROM %Q.'%q_docsize'",
187 /* 6 */ "DELETE FROM %Q.'%q_stat'",
188 /* 7 */ "SELECT * FROM %Q.'%q_content' WHERE rowid=?",
189 /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
190 /* 9 */ "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
191 /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
192 /* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
193
194 /* Return segments in order from oldest to newest.*/
195 /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
196 "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
197 /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
198 "FROM %Q.'%q_segdir' ORDER BY level DESC, idx ASC",
199
200 /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
201 /* 15 */ "SELECT count(*), max(level) FROM %Q.'%q_segdir'",
202
203 /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
204 /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
205 /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%z)",
206 /* 19 */ "SELECT block FROM %Q.'%q_segments' WHERE blockid = ?",
207 /* 20 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
208 /* 21 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
209 /* 22 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
210 /* 23 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0",
211 /* 24 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
212 };
213 int rc = SQLITE_OK;
214 sqlite3_stmt *pStmt;
215
216 assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
217 assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
218
219 pStmt = p->aStmt[eStmt];
220 if( !pStmt ){
221 char *zSql;
222 if( eStmt==SQL_CONTENT_INSERT ){
223 int i; /* Iterator variable */
224 char *zVarlist; /* The "?, ?, ..." string */
225 zVarlist = (char *)sqlite3_malloc(2*p->nColumn+2);
226 if( !zVarlist ){
227 *pp = 0;
228 return SQLITE_NOMEM;
229 }
230 zVarlist[0] = '?';
231 zVarlist[p->nColumn*2+1] = '\0';
232 for(i=1; i<=p->nColumn; i++){
233 zVarlist[i*2-1] = ',';
234 zVarlist[i*2] = '?';
235 }
236 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, zVarlist);
237 }else{
238 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
239 }
240 if( !zSql ){
241 rc = SQLITE_NOMEM;
242 }else{
243 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL);
244 sqlite3_free(zSql);
245 assert( rc==SQLITE_OK || pStmt==0 );
246 p->aStmt[eStmt] = pStmt;
247 }
248 }
249 if( apVal ){
250 int i;
251 int nParam = sqlite3_bind_parameter_count(pStmt);
252 for(i=0; rc==SQLITE_OK && i<nParam; i++){
253 rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
254 }
255 }
256 *pp = pStmt;
257 return rc;
258 }
259
260 /*
261 ** Similar to fts3SqlStmt(). Except, after binding the parameters in
262 ** array apVal[] to the SQL statement identified by eStmt, the statement
263 ** is executed.
264 **
265 ** Returns SQLITE_OK if the statement is successfully executed, or an
266 ** SQLite error code otherwise.
267 */
268 static void fts3SqlExec(
269 int *pRC, /* Result code */
270 Fts3Table *p, /* The FTS3 table */
271 int eStmt, /* Index of statement to evaluate */
272 sqlite3_value **apVal /* Parameters to bind */
273 ){
274 sqlite3_stmt *pStmt;
275 int rc;
276 if( *pRC ) return;
277 rc = fts3SqlStmt(p, eStmt, &pStmt, apVal);
278 if( rc==SQLITE_OK ){
279 sqlite3_step(pStmt);
280 rc = sqlite3_reset(pStmt);
281 }
282 *pRC = rc;
283 }
284
285
286 /*
287 ** Read a single block from the %_segments table. If the specified block
288 ** does not exist, return SQLITE_CORRUPT. If some other error (malloc, IO
289 ** etc.) occurs, return the appropriate SQLite error code.
290 **
291 ** Otherwise, if successful, set *pzBlock to point to a buffer containing
292 ** the block read from the database, and *pnBlock to the size of the read
293 ** block in bytes.
294 **
295 ** WARNING: The returned buffer is only valid until the next call to
296 ** sqlite3Fts3ReadBlock().
297 */
298 int sqlite3Fts3ReadBlock(
299 Fts3Table *p,
300 sqlite3_int64 iBlock,
301 char const **pzBlock,
302 int *pnBlock
303 ){
304 sqlite3_stmt *pStmt;
305 int rc = fts3SqlStmt(p, SQL_GET_BLOCK, &pStmt, 0);
306 if( rc!=SQLITE_OK ) return rc;
307 sqlite3_reset(pStmt);
308
309 if( pzBlock ){
310 sqlite3_bind_int64(pStmt, 1, iBlock);
311 rc = sqlite3_step(pStmt);
312 if( rc!=SQLITE_ROW ){
313 return (rc==SQLITE_DONE ? SQLITE_CORRUPT : rc);
314 }
315
316 *pnBlock = sqlite3_column_bytes(pStmt, 0);
317 *pzBlock = (char *)sqlite3_column_blob(pStmt, 0);
318 if( sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
319 return SQLITE_CORRUPT;
320 }
321 }
322 return SQLITE_OK;
323 }
324
325 /*
326 ** This function ensures that the caller has obtained a shared-cache
327 ** table-lock on the %_content table. This is required before reading
328 ** data from the fts3 table. If this lock is not acquired first, then
329 ** the caller may end up holding read-locks on the %_segments and %_segdir
330 ** tables, but no read-lock on the %_content table. If this happens
331 ** a second connection will be able to write to the fts3 table, but
332 ** attempting to commit those writes might return SQLITE_LOCKED or
333 ** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain
334 ** write-locks on the %_segments and %_segdir ** tables).
335 **
336 ** We try to avoid this because if FTS3 returns any error when committing
337 ** a transaction, the whole transaction will be rolled back. And this is
338 ** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
339 ** still happen if the user reads data directly from the %_segments or
340 ** %_segdir tables instead of going through FTS3 though.
341 */
342 int sqlite3Fts3ReadLock(Fts3Table *p){
343 int rc; /* Return code */
344 sqlite3_stmt *pStmt; /* Statement used to obtain lock */
345
346 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
347 if( rc==SQLITE_OK ){
348 sqlite3_bind_null(pStmt, 1);
349 sqlite3_step(pStmt);
350 rc = sqlite3_reset(pStmt);
351 }
352 return rc;
353 }
354
355 /*
356 ** Set *ppStmt to a statement handle that may be used to iterate through
357 ** all rows in the %_segdir table, from oldest to newest. If successful,
358 ** return SQLITE_OK. If an error occurs while preparing the statement,
359 ** return an SQLite error code.
360 **
361 ** There is only ever one instance of this SQL statement compiled for
362 ** each FTS3 table.
363 **
364 ** The statement returns the following columns from the %_segdir table:
365 **
366 ** 0: idx
367 ** 1: start_block
368 ** 2: leaves_end_block
369 ** 3: end_block
370 ** 4: root
371 */
372 int sqlite3Fts3AllSegdirs(Fts3Table *p, sqlite3_stmt **ppStmt){
373 return fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, ppStmt, 0);
374 }
375
376
377 /*
378 ** Append a single varint to a PendingList buffer. SQLITE_OK is returned
379 ** if successful, or an SQLite error code otherwise.
380 **
381 ** This function also serves to allocate the PendingList structure itself.
382 ** For example, to create a new PendingList structure containing two
383 ** varints:
384 **
385 ** PendingList *p = 0;
386 ** fts3PendingListAppendVarint(&p, 1);
387 ** fts3PendingListAppendVarint(&p, 2);
388 */
389 static int fts3PendingListAppendVarint(
390 PendingList **pp, /* IN/OUT: Pointer to PendingList struct */
391 sqlite3_int64 i /* Value to append to data */
392 ){
393 PendingList *p = *pp;
394
395 /* Allocate or grow the PendingList as required. */
396 if( !p ){
397 p = sqlite3_malloc(sizeof(*p) + 100);
398 if( !p ){
399 return SQLITE_NOMEM;
400 }
401 p->nSpace = 100;
402 p->aData = (char *)&p[1];
403 p->nData = 0;
404 }
405 else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
406 int nNew = p->nSpace * 2;
407 p = sqlite3_realloc(p, sizeof(*p) + nNew);
408 if( !p ){
409 sqlite3_free(*pp);
410 *pp = 0;
411 return SQLITE_NOMEM;
412 }
413 p->nSpace = nNew;
414 p->aData = (char *)&p[1];
415 }
416
417 /* Append the new serialized varint to the end of the list. */
418 p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
419 p->aData[p->nData] = '\0';
420 *pp = p;
421 return SQLITE_OK;
422 }
423
424 /*
425 ** Add a docid/column/position entry to a PendingList structure. Non-zero
426 ** is returned if the structure is sqlite3_realloced as part of adding
427 ** the entry. Otherwise, zero.
428 **
429 ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
430 ** Zero is always returned in this case. Otherwise, if no OOM error occurs,
431 ** it is set to SQLITE_OK.
432 */
433 static int fts3PendingListAppend(
434 PendingList **pp, /* IN/OUT: PendingList structure */
435 sqlite3_int64 iDocid, /* Docid for entry to add */
436 sqlite3_int64 iCol, /* Column for entry to add */
437 sqlite3_int64 iPos, /* Position of term for entry to add */
438 int *pRc /* OUT: Return code */
439 ){
440 PendingList *p = *pp;
441 int rc = SQLITE_OK;
442
443 assert( !p || p->iLastDocid<=iDocid );
444
445 if( !p || p->iLastDocid!=iDocid ){
446 sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0);
447 if( p ){
448 assert( p->nData<p->nSpace );
449 assert( p->aData[p->nData]==0 );
450 p->nData++;
451 }
452 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){
453 goto pendinglistappend_out;
454 }
455 p->iLastCol = -1;
456 p->iLastPos = 0;
457 p->iLastDocid = iDocid;
458 }
459 if( iCol>0 && p->iLastCol!=iCol ){
460 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1))
461 || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol))
462 ){
463 goto pendinglistappend_out;
464 }
465 p->iLastCol = iCol;
466 p->iLastPos = 0;
467 }
468 if( iCol>=0 ){
469 assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) );
470 rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos);
471 if( rc==SQLITE_OK ){
472 p->iLastPos = iPos;
473 }
474 }
475
476 pendinglistappend_out:
477 *pRc = rc;
478 if( p!=*pp ){
479 *pp = p;
480 return 1;
481 }
482 return 0;
483 }
484
485 /*
486 ** Tokenize the nul-terminated string zText and add all tokens to the
487 ** pending-terms hash-table. The docid used is that currently stored in
488 ** p->iPrevDocid, and the column is specified by argument iCol.
489 **
490 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
491 */
492 static int fts3PendingTermsAdd(
493 Fts3Table *p, /* FTS table into which text will be inserted */
494 const char *zText, /* Text of document to be inseted */
495 int iCol, /* Column number into which text is inserted */
496 u32 *pnWord /* OUT: Number of tokens inserted */
497 ){
498 int rc;
499 int iStart;
500 int iEnd;
501 int iPos;
502 int nWord = 0;
503
504 char const *zToken;
505 int nToken;
506
507 sqlite3_tokenizer *pTokenizer = p->pTokenizer;
508 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
509 sqlite3_tokenizer_cursor *pCsr;
510 int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
511 const char**,int*,int*,int*,int*);
512
513 assert( pTokenizer && pModule );
514
515 rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
516 if( rc!=SQLITE_OK ){
517 return rc;
518 }
519 pCsr->pTokenizer = pTokenizer;
520
521 xNext = pModule->xNext;
522 while( SQLITE_OK==rc
523 && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
524 ){
525 PendingList *pList;
526
527 if( iPos>=nWord ) nWord = iPos+1;
528
529 /* Positions cannot be negative; we use -1 as a terminator internally.
530 ** Tokens must have a non-zero length.
531 */
532 if( iPos<0 || !zToken || nToken<=0 ){
533 rc = SQLITE_ERROR;
534 break;
535 }
536
537 pList = (PendingList *)fts3HashFind(&p->pendingTerms, zToken, nToken);
538 if( pList ){
539 p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
540 }
541 if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
542 if( pList==fts3HashInsert(&p->pendingTerms, zToken, nToken, pList) ){
543 /* Malloc failed while inserting the new entry. This can only
544 ** happen if there was no previous entry for this token.
545 */
546 assert( 0==fts3HashFind(&p->pendingTerms, zToken, nToken) );
547 sqlite3_free(pList);
548 rc = SQLITE_NOMEM;
549 }
550 }
551 if( rc==SQLITE_OK ){
552 p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
553 }
554 }
555
556 pModule->xClose(pCsr);
557 *pnWord = nWord;
558 return (rc==SQLITE_DONE ? SQLITE_OK : rc);
559 }
560
561 /*
562 ** Calling this function indicates that subsequent calls to
563 ** fts3PendingTermsAdd() are to add term/position-list pairs for the
564 ** contents of the document with docid iDocid.
565 */
566 static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
567 /* TODO(shess) Explore whether partially flushing the buffer on
568 ** forced-flush would provide better performance. I suspect that if
569 ** we ordered the doclists by size and flushed the largest until the
570 ** buffer was half empty, that would let the less frequent terms
571 ** generate longer doclists.
572 */
573 if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
574 int rc = sqlite3Fts3PendingTermsFlush(p);
575 if( rc!=SQLITE_OK ) return rc;
576 }
577 p->iPrevDocid = iDocid;
578 return SQLITE_OK;
579 }
580
581 void sqlite3Fts3PendingTermsClear(Fts3Table *p){
582 Fts3HashElem *pElem;
583 for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){
584 sqlite3_free(fts3HashData(pElem));
585 }
586 fts3HashClear(&p->pendingTerms);
587 p->nPendingData = 0;
588 }
589
590 /*
591 ** This function is called by the xUpdate() method as part of an INSERT
592 ** operation. It adds entries for each term in the new record to the
593 ** pendingTerms hash table.
594 **
595 ** Argument apVal is the same as the similarly named argument passed to
596 ** fts3InsertData(). Parameter iDocid is the docid of the new row.
597 */
598 static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
599 int i; /* Iterator variable */
600 for(i=2; i<p->nColumn+2; i++){
601 const char *zText = (const char *)sqlite3_value_text(apVal[i]);
602 if( zText ){
603 int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
604 if( rc!=SQLITE_OK ){
605 return rc;
606 }
607 }
608 }
609 return SQLITE_OK;
610 }
611
612 /*
613 ** This function is called by the xUpdate() method for an INSERT operation.
614 ** The apVal parameter is passed a copy of the apVal argument passed by
615 ** SQLite to the xUpdate() method. i.e:
616 **
617 ** apVal[0] Not used for INSERT.
618 ** apVal[1] rowid
619 ** apVal[2] Left-most user-defined column
620 ** ...
621 ** apVal[p->nColumn+1] Right-most user-defined column
622 ** apVal[p->nColumn+2] Hidden column with same name as table
623 ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
624 */
625 static int fts3InsertData(
626 Fts3Table *p, /* Full-text table */
627 sqlite3_value **apVal, /* Array of values to insert */
628 sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */
629 ){
630 int rc; /* Return code */
631 sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
632
633 /* Locate the statement handle used to insert data into the %_content
634 ** table. The SQL for this statement is:
635 **
636 ** INSERT INTO %_content VALUES(?, ?, ?, ...)
637 **
638 ** The statement features N '?' variables, where N is the number of user
639 ** defined columns in the FTS3 table, plus one for the docid field.
640 */
641 rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
642 if( rc!=SQLITE_OK ){
643 return rc;
644 }
645
646 /* There is a quirk here. The users INSERT statement may have specified
647 ** a value for the "rowid" field, for the "docid" field, or for both.
648 ** Which is a problem, since "rowid" and "docid" are aliases for the
649 ** same value. For example:
650 **
651 ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
652 **
653 ** In FTS3, this is an error. It is an error to specify non-NULL values
654 ** for both docid and some other rowid alias.
655 */
656 if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
657 if( SQLITE_NULL==sqlite3_value_type(apVal[0])
658 && SQLITE_NULL!=sqlite3_value_type(apVal[1])
659 ){
660 /* A rowid/docid conflict. */
661 return SQLITE_ERROR;
662 }
663 rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
664 if( rc!=SQLITE_OK ) return rc;
665 }
666
667 /* Execute the statement to insert the record. Set *piDocid to the
668 ** new docid value.
669 */
670 sqlite3_step(pContentInsert);
671 rc = sqlite3_reset(pContentInsert);
672
673 *piDocid = sqlite3_last_insert_rowid(p->db);
674 return rc;
675 }
676
677
678
679 /*
680 ** Remove all data from the FTS3 table. Clear the hash table containing
681 ** pending terms.
682 */
683 static int fts3DeleteAll(Fts3Table *p){
684 int rc = SQLITE_OK; /* Return code */
685
686 /* Discard the contents of the pending-terms hash table. */
687 sqlite3Fts3PendingTermsClear(p);
688
689 /* Delete everything from the %_content, %_segments and %_segdir tables. */
690 fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
691 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
692 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
693 if( p->bHasDocsize ){
694 fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
695 fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
696 }
697 return rc;
698 }
699
700 /*
701 ** The first element in the apVal[] array is assumed to contain the docid
702 ** (an integer) of a row about to be deleted. Remove all terms from the
703 ** full-text index.
704 */
705 static void fts3DeleteTerms(
706 int *pRC, /* Result code */
707 Fts3Table *p, /* The FTS table to delete from */
708 sqlite3_value **apVal, /* apVal[] contains the docid to be deleted */
709 u32 *aSz /* Sizes of deleted document written here */
710 ){
711 int rc;
712 sqlite3_stmt *pSelect;
713
714 if( *pRC ) return;
715 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, apVal);
716 if( rc==SQLITE_OK ){
717 if( SQLITE_ROW==sqlite3_step(pSelect) ){
718 int i;
719 for(i=1; i<=p->nColumn; i++){
720 const char *zText = (const char *)sqlite3_column_text(pSelect, i);
721 rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
722 if( rc!=SQLITE_OK ){
723 sqlite3_reset(pSelect);
724 *pRC = rc;
725 return;
726 }
727 }
728 }
729 rc = sqlite3_reset(pSelect);
730 }else{
731 sqlite3_reset(pSelect);
732 }
733 *pRC = rc;
734 }
735
736 /*
737 ** Forward declaration to account for the circular dependency between
738 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
739 */
740 static int fts3SegmentMerge(Fts3Table *, int);
741
742 /*
743 ** This function allocates a new level iLevel index in the segdir table.
744 ** Usually, indexes are allocated within a level sequentially starting
745 ** with 0, so the allocated index is one greater than the value returned
746 ** by:
747 **
748 ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel
749 **
750 ** However, if there are already FTS3_MERGE_COUNT indexes at the requested
751 ** level, they are merged into a single level (iLevel+1) segment and the
752 ** allocated index is 0.
753 **
754 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
755 ** returned. Otherwise, an SQLite error code is returned.
756 */
757 static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){
758 int rc; /* Return Code */
759 sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
760 int iNext = 0; /* Result of query pNextIdx */
761
762 /* Set variable iNext to the next available segdir index at level iLevel. */
763 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
764 if( rc==SQLITE_OK ){
765 sqlite3_bind_int(pNextIdx, 1, iLevel);
766 if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
767 iNext = sqlite3_column_int(pNextIdx, 0);
768 }
769 rc = sqlite3_reset(pNextIdx);
770 }
771
772 if( rc==SQLITE_OK ){
773 /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
774 ** full, merge all segments in level iLevel into a single iLevel+1
775 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
776 ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
777 */
778 if( iNext>=FTS3_MERGE_COUNT ){
779 rc = fts3SegmentMerge(p, iLevel);
780 *piIdx = 0;
781 }else{
782 *piIdx = iNext;
783 }
784 }
785
786 return rc;
787 }
788
789 /*
790 ** Move the iterator passed as the first argument to the next term in the
791 ** segment. If successful, SQLITE_OK is returned. If there is no next term,
792 ** SQLITE_DONE. Otherwise, an SQLite error code.
793 */
794 static int fts3SegReaderNext(Fts3SegReader *pReader){
795 char *pNext; /* Cursor variable */
796 int nPrefix; /* Number of bytes in term prefix */
797 int nSuffix; /* Number of bytes in term suffix */
798
799 if( !pReader->aDoclist ){
800 pNext = pReader->aNode;
801 }else{
802 pNext = &pReader->aDoclist[pReader->nDoclist];
803 }
804
805 if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
806 int rc;
807 if( fts3SegReaderIsPending(pReader) ){
808 Fts3HashElem *pElem = *(pReader->ppNextElem);
809 if( pElem==0 ){
810 pReader->aNode = 0;
811 }else{
812 PendingList *pList = (PendingList *)fts3HashData(pElem);
813 pReader->zTerm = (char *)fts3HashKey(pElem);
814 pReader->nTerm = fts3HashKeysize(pElem);
815 pReader->nNode = pReader->nDoclist = pList->nData + 1;
816 pReader->aNode = pReader->aDoclist = pList->aData;
817 pReader->ppNextElem++;
818 assert( pReader->aNode );
819 }
820 return SQLITE_OK;
821 }
822 if( !pReader->pStmt ){
823 pReader->aNode = 0;
824 return SQLITE_OK;
825 }
826 rc = sqlite3_step(pReader->pStmt);
827 if( rc!=SQLITE_ROW ){
828 pReader->aNode = 0;
829 return (rc==SQLITE_DONE ? SQLITE_OK : rc);
830 }
831 pReader->nNode = sqlite3_column_bytes(pReader->pStmt, 0);
832 pReader->aNode = (char *)sqlite3_column_blob(pReader->pStmt, 0);
833 pNext = pReader->aNode;
834 }
835
836 pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
837 pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
838
839 if( nPrefix+nSuffix>pReader->nTermAlloc ){
840 int nNew = (nPrefix+nSuffix)*2;
841 char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
842 if( !zNew ){
843 return SQLITE_NOMEM;
844 }
845 pReader->zTerm = zNew;
846 pReader->nTermAlloc = nNew;
847 }
848 memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
849 pReader->nTerm = nPrefix+nSuffix;
850 pNext += nSuffix;
851 pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
852 assert( pNext<&pReader->aNode[pReader->nNode] );
853 pReader->aDoclist = pNext;
854 pReader->pOffsetList = 0;
855 return SQLITE_OK;
856 }
857
858 /*
859 ** Set the SegReader to point to the first docid in the doclist associated
860 ** with the current term.
861 */
862 static void fts3SegReaderFirstDocid(Fts3SegReader *pReader){
863 int n;
864 assert( pReader->aDoclist );
865 assert( !pReader->pOffsetList );
866 n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid);
867 pReader->pOffsetList = &pReader->aDoclist[n];
868 }
869
870 /*
871 ** Advance the SegReader to point to the next docid in the doclist
872 ** associated with the current term.
873 **
874 ** If arguments ppOffsetList and pnOffsetList are not NULL, then
875 ** *ppOffsetList is set to point to the first column-offset list
876 ** in the doclist entry (i.e. immediately past the docid varint).
877 ** *pnOffsetList is set to the length of the set of column-offset
878 ** lists, not including the nul-terminator byte. For example:
879 */
880 static void fts3SegReaderNextDocid(
881 Fts3SegReader *pReader,
882 char **ppOffsetList,
883 int *pnOffsetList
884 ){
885 char *p = pReader->pOffsetList;
886 char c = 0;
887
888 /* Pointer p currently points at the first byte of an offset list. The
889 ** following two lines advance it to point one byte past the end of
890 ** the same offset list.
891 */
892 while( *p | c ) c = *p++ & 0x80;
893 p++;
894
895 /* If required, populate the output variables with a pointer to and the
896 ** size of the previous offset-list.
897 */
898 if( ppOffsetList ){
899 *ppOffsetList = pReader->pOffsetList;
900 *pnOffsetList = (int)(p - pReader->pOffsetList - 1);
901 }
902
903 /* If there are no more entries in the doclist, set pOffsetList to
904 ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and
905 ** Fts3SegReader.pOffsetList to point to the next offset list before
906 ** returning.
907 */
908 if( p>=&pReader->aDoclist[pReader->nDoclist] ){
909 pReader->pOffsetList = 0;
910 }else{
911 sqlite3_int64 iDelta;
912 pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta);
913 pReader->iDocid += iDelta;
914 }
915 }
916
917 /*
918 ** Free all allocations associated with the iterator passed as the
919 ** second argument.
920 */
921 void sqlite3Fts3SegReaderFree(Fts3Table *p, Fts3SegReader *pReader){
922 if( pReader ){
923 if( pReader->pStmt ){
924 /* Move the leaf-range SELECT statement to the aLeavesStmt[] array,
925 ** so that it can be reused when required by another query.
926 */
927 assert( p->nLeavesStmt<p->nLeavesTotal );
928 sqlite3_reset(pReader->pStmt);
929 p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt;
930 }
931 if( !fts3SegReaderIsPending(pReader) ){
932 sqlite3_free(pReader->zTerm);
933 }
934 sqlite3_free(pReader);
935 }
936 }
937
938 /*
939 ** Allocate a new SegReader object.
940 */
941 int sqlite3Fts3SegReaderNew(
942 Fts3Table *p, /* Virtual table handle */
943 int iAge, /* Segment "age". */
944 sqlite3_int64 iStartLeaf, /* First leaf to traverse */
945 sqlite3_int64 iEndLeaf, /* Final leaf to traverse */
946 sqlite3_int64 iEndBlock, /* Final block of segment */
947 const char *zRoot, /* Buffer containing root node */
948 int nRoot, /* Size of buffer containing root node */
949 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
950 ){
951 int rc = SQLITE_OK; /* Return code */
952 Fts3SegReader *pReader; /* Newly allocated SegReader object */
953 int nExtra = 0; /* Bytes to allocate segment root node */
954
955 if( iStartLeaf==0 ){
956 nExtra = nRoot;
957 }
958
959 pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
960 if( !pReader ){
961 return SQLITE_NOMEM;
962 }
963 memset(pReader, 0, sizeof(Fts3SegReader));
964 pReader->iStartBlock = iStartLeaf;
965 pReader->iIdx = iAge;
966 pReader->iEndBlock = iEndBlock;
967
968 if( nExtra ){
969 /* The entire segment is stored in the root node. */
970 pReader->aNode = (char *)&pReader[1];
971 pReader->nNode = nRoot;
972 memcpy(pReader->aNode, zRoot, nRoot);
973 }else{
974 /* If the text of the SQL statement to iterate through a contiguous
975 ** set of entries in the %_segments table has not yet been composed,
976 ** compose it now.
977 */
978 if( !p->zSelectLeaves ){
979 p->zSelectLeaves = sqlite3_mprintf(
980 "SELECT block FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ? "
981 "ORDER BY blockid", p->zDb, p->zName
982 );
983 if( !p->zSelectLeaves ){
984 rc = SQLITE_NOMEM;
985 goto finished;
986 }
987 }
988
989 /* If there are no free statements in the aLeavesStmt[] array, prepare
990 ** a new statement now. Otherwise, reuse a prepared statement from
991 ** aLeavesStmt[].
992 */
993 if( p->nLeavesStmt==0 ){
994 if( p->nLeavesTotal==p->nLeavesAlloc ){
995 int nNew = p->nLeavesAlloc + 16;
996 sqlite3_stmt **aNew = (sqlite3_stmt **)sqlite3_realloc(
997 p->aLeavesStmt, nNew*sizeof(sqlite3_stmt *)
998 );
999 if( !aNew ){
1000 rc = SQLITE_NOMEM;
1001 goto finished;
1002 }
1003 p->nLeavesAlloc = nNew;
1004 p->aLeavesStmt = aNew;
1005 }
1006 rc = sqlite3_prepare_v2(p->db, p->zSelectLeaves, -1, &pReader->pStmt, 0);
1007 if( rc!=SQLITE_OK ){
1008 goto finished;
1009 }
1010 p->nLeavesTotal++;
1011 }else{
1012 pReader->pStmt = p->aLeavesStmt[--p->nLeavesStmt];
1013 }
1014
1015 /* Bind the start and end leaf blockids to the prepared SQL statement. */
1016 sqlite3_bind_int64(pReader->pStmt, 1, iStartLeaf);
1017 sqlite3_bind_int64(pReader->pStmt, 2, iEndLeaf);
1018 }
1019 rc = fts3SegReaderNext(pReader);
1020
1021 finished:
1022 if( rc==SQLITE_OK ){
1023 *ppReader = pReader;
1024 }else{
1025 sqlite3Fts3SegReaderFree(p, pReader);
1026 }
1027 return rc;
1028 }
1029
1030 /*
1031 ** This is a comparison function used as a qsort() callback when sorting
1032 ** an array of pending terms by term. This occurs as part of flushing
1033 ** the contents of the pending-terms hash table to the database.
1034 */
1035 static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
1036 char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
1037 char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
1038 int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
1039 int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);
1040
1041 int n = (n1<n2 ? n1 : n2);
1042 int c = memcmp(z1, z2, n);
1043 if( c==0 ){
1044 c = n1 - n2;
1045 }
1046 return c;
1047 }
1048
1049 /*
1050 ** This function is used to allocate an Fts3SegReader that iterates through
1051 ** a subset of the terms stored in the Fts3Table.pendingTerms array.
1052 */
1053 int sqlite3Fts3SegReaderPending(
1054 Fts3Table *p, /* Virtual table handle */
1055 const char *zTerm, /* Term to search for */
1056 int nTerm, /* Size of buffer zTerm */
1057 int isPrefix, /* True for a term-prefix query */
1058 Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
1059 ){
1060 Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
1061 Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */
1062 int nElem = 0; /* Size of array at aElem */
1063 int rc = SQLITE_OK; /* Return Code */
1064
1065 if( isPrefix ){
1066 int nAlloc = 0; /* Size of allocated array at aElem */
1067 Fts3HashElem *pE = 0; /* Iterator variable */
1068
1069 for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
1070 char *zKey = (char *)fts3HashKey(pE);
1071 int nKey = fts3HashKeysize(pE);
1072 if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
1073 if( nElem==nAlloc ){
1074 Fts3HashElem **aElem2;
1075 nAlloc += 16;
1076 aElem2 = (Fts3HashElem **)sqlite3_realloc(
1077 aElem, nAlloc*sizeof(Fts3HashElem *)
1078 );
1079 if( !aElem2 ){
1080 rc = SQLITE_NOMEM;
1081 nElem = 0;
1082 break;
1083 }
1084 aElem = aElem2;
1085 }
1086 aElem[nElem++] = pE;
1087 }
1088 }
1089
1090 /* If more than one term matches the prefix, sort the Fts3HashElem
1091 ** objects in term order using qsort(). This uses the same comparison
1092 ** callback as is used when flushing terms to disk.
1093 */
1094 if( nElem>1 ){
1095 qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
1096 }
1097
1098 }else{
1099 Fts3HashElem *pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm);
1100 if( pE ){
1101 aElem = &pE;
1102 nElem = 1;
1103 }
1104 }
1105
1106 if( nElem>0 ){
1107 int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
1108 pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
1109 if( !pReader ){
1110 rc = SQLITE_NOMEM;
1111 }else{
1112 memset(pReader, 0, nByte);
1113 pReader->iIdx = 0x7FFFFFFF;
1114 pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
1115 memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
1116 fts3SegReaderNext(pReader);
1117 }
1118 }
1119
1120 if( isPrefix ){
1121 sqlite3_free(aElem);
1122 }
1123 *ppReader = pReader;
1124 return rc;
1125 }
1126
1127
1128 /*
1129 ** The second argument to this function is expected to be a statement of
1130 ** the form:
1131 **
1132 ** SELECT
1133 ** idx, -- col 0
1134 ** start_block, -- col 1
1135 ** leaves_end_block, -- col 2
1136 ** end_block, -- col 3
1137 ** root -- col 4
1138 ** FROM %_segdir ...
1139 **
1140 ** This function allocates and initializes a Fts3SegReader structure to
1141 ** iterate through the terms stored in the segment identified by the
1142 ** current row that pStmt is pointing to.
1143 **
1144 ** If successful, the Fts3SegReader is left pointing to the first term
1145 ** in the segment and SQLITE_OK is returned. Otherwise, an SQLite error
1146 ** code is returned.
1147 */
1148 static int fts3SegReaderNew(
1149 Fts3Table *p, /* Virtual table handle */
1150 sqlite3_stmt *pStmt, /* See above */
1151 int iAge, /* Segment "age". */
1152 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
1153 ){
1154 return sqlite3Fts3SegReaderNew(p, iAge,
1155 sqlite3_column_int64(pStmt, 1),
1156 sqlite3_column_int64(pStmt, 2),
1157 sqlite3_column_int64(pStmt, 3),
1158 sqlite3_column_blob(pStmt, 4),
1159 sqlite3_column_bytes(pStmt, 4),
1160 ppReader
1161 );
1162 }
1163
1164 /*
1165 ** Compare the entries pointed to by two Fts3SegReader structures.
1166 ** Comparison is as follows:
1167 **
1168 ** 1) EOF is greater than not EOF.
1169 **
1170 ** 2) The current terms (if any) are compared using memcmp(). If one
1171 ** term is a prefix of another, the longer term is considered the
1172 ** larger.
1173 **
1174 ** 3) By segment age. An older segment is considered larger.
1175 */
1176 static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
1177 int rc;
1178 if( pLhs->aNode && pRhs->aNode ){
1179 int rc2 = pLhs->nTerm - pRhs->nTerm;
1180 if( rc2<0 ){
1181 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm);
1182 }else{
1183 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm);
1184 }
1185 if( rc==0 ){
1186 rc = rc2;
1187 }
1188 }else{
1189 rc = (pLhs->aNode==0) - (pRhs->aNode==0);
1190 }
1191 if( rc==0 ){
1192 rc = pRhs->iIdx - pLhs->iIdx;
1193 }
1194 assert( rc!=0 );
1195 return rc;
1196 }
1197
1198 /*
1199 ** A different comparison function for SegReader structures. In this
1200 ** version, it is assumed that each SegReader points to an entry in
1201 ** a doclist for identical terms. Comparison is made as follows:
1202 **
1203 ** 1) EOF (end of doclist in this case) is greater than not EOF.
1204 **
1205 ** 2) By current docid.
1206 **
1207 ** 3) By segment age. An older segment is considered larger.
1208 */
1209 static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
1210 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
1211 if( rc==0 ){
1212 if( pLhs->iDocid==pRhs->iDocid ){
1213 rc = pRhs->iIdx - pLhs->iIdx;
1214 }else{
1215 rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
1216 }
1217 }
1218 assert( pLhs->aNode && pRhs->aNode );
1219 return rc;
1220 }
1221
1222 /*
1223 ** Compare the term that the Fts3SegReader object passed as the first argument
1224 ** points to with the term specified by arguments zTerm and nTerm.
1225 **
1226 ** If the pSeg iterator is already at EOF, return 0. Otherwise, return
1227 ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
1228 ** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
1229 */
1230 static int fts3SegReaderTermCmp(
1231 Fts3SegReader *pSeg, /* Segment reader object */
1232 const char *zTerm, /* Term to compare to */
1233 int nTerm /* Size of term zTerm in bytes */
1234 ){
1235 int res = 0;
1236 if( pSeg->aNode ){
1237 if( pSeg->nTerm>nTerm ){
1238 res = memcmp(pSeg->zTerm, zTerm, nTerm);
1239 }else{
1240 res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
1241 }
1242 if( res==0 ){
1243 res = pSeg->nTerm-nTerm;
1244 }
1245 }
1246 return res;
1247 }
1248
1249 /*
1250 ** Argument apSegment is an array of nSegment elements. It is known that
1251 ** the final (nSegment-nSuspect) members are already in sorted order
1252 ** (according to the comparison function provided). This function shuffles
1253 ** the array around until all entries are in sorted order.
1254 */
1255 static void fts3SegReaderSort(
1256 Fts3SegReader **apSegment, /* Array to sort entries of */
1257 int nSegment, /* Size of apSegment array */
1258 int nSuspect, /* Unsorted entry count */
1259 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */
1260 ){
1261 int i; /* Iterator variable */
1262
1263 assert( nSuspect<=nSegment );
1264
1265 if( nSuspect==nSegment ) nSuspect--;
1266 for(i=nSuspect-1; i>=0; i--){
1267 int j;
1268 for(j=i; j<(nSegment-1); j++){
1269 Fts3SegReader *pTmp;
1270 if( xCmp(apSegment[j], apSegment[j+1])<0 ) break;
1271 pTmp = apSegment[j+1];
1272 apSegment[j+1] = apSegment[j];
1273 apSegment[j] = pTmp;
1274 }
1275 }
1276
1277 #ifndef NDEBUG
1278 /* Check that the list really is sorted now. */
1279 for(i=0; i<(nSuspect-1); i++){
1280 assert( xCmp(apSegment[i], apSegment[i+1])<0 );
1281 }
1282 #endif
1283 }
1284
1285 /*
1286 ** Insert a record into the %_segments table.
1287 */
1288 static int fts3WriteSegment(
1289 Fts3Table *p, /* Virtual table handle */
1290 sqlite3_int64 iBlock, /* Block id for new block */
1291 char *z, /* Pointer to buffer containing block data */
1292 int n /* Size of buffer z in bytes */
1293 ){
1294 sqlite3_stmt *pStmt;
1295 int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0);
1296 if( rc==SQLITE_OK ){
1297 sqlite3_bind_int64(pStmt, 1, iBlock);
1298 sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
1299 sqlite3_step(pStmt);
1300 rc = sqlite3_reset(pStmt);
1301 }
1302 return rc;
1303 }
1304
1305 /*
1306 ** Insert a record into the %_segdir table.
1307 */
1308 static int fts3WriteSegdir(
1309 Fts3Table *p, /* Virtual table handle */
1310 int iLevel, /* Value for "level" field */
1311 int iIdx, /* Value for "idx" field */
1312 sqlite3_int64 iStartBlock, /* Value for "start_block" field */
1313 sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
1314 sqlite3_int64 iEndBlock, /* Value for "end_block" field */
1315 char *zRoot, /* Blob value for "root" field */
1316 int nRoot /* Number of bytes in buffer zRoot */
1317 ){
1318 sqlite3_stmt *pStmt;
1319 int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
1320 if( rc==SQLITE_OK ){
1321 sqlite3_bind_int(pStmt, 1, iLevel);
1322 sqlite3_bind_int(pStmt, 2, iIdx);
1323 sqlite3_bind_int64(pStmt, 3, iStartBlock);
1324 sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
1325 sqlite3_bind_int64(pStmt, 5, iEndBlock);
1326 sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
1327 sqlite3_step(pStmt);
1328 rc = sqlite3_reset(pStmt);
1329 }
1330 return rc;
1331 }
1332
1333 /*
1334 ** Return the size of the common prefix (if any) shared by zPrev and
1335 ** zNext, in bytes. For example,
1336 **
1337 ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3
1338 ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2
1339 ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0
1340 */
1341 static int fts3PrefixCompress(
1342 const char *zPrev, /* Buffer containing previous term */
1343 int nPrev, /* Size of buffer zPrev in bytes */
1344 const char *zNext, /* Buffer containing next term */
1345 int nNext /* Size of buffer zNext in bytes */
1346 ){
1347 int n;
1348 UNUSED_PARAMETER(nNext);
1349 for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++);
1350 return n;
1351 }
1352
1353 /*
1354 ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger
1355 ** (according to memcmp) than the previous term.
1356 */
1357 static int fts3NodeAddTerm(
1358 Fts3Table *p, /* Virtual table handle */
1359 SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */
1360 int isCopyTerm, /* True if zTerm/nTerm is transient */
1361 const char *zTerm, /* Pointer to buffer containing term */
1362 int nTerm /* Size of term in bytes */
1363 ){
1364 SegmentNode *pTree = *ppTree;
1365 int rc;
1366 SegmentNode *pNew;
1367
1368 /* First try to append the term to the current node. Return early if
1369 ** this is possible.
1370 */
1371 if( pTree ){
1372 int nData = pTree->nData; /* Current size of node in bytes */
1373 int nReq = nData; /* Required space after adding zTerm */
1374 int nPrefix; /* Number of bytes of prefix compression */
1375 int nSuffix; /* Suffix length */
1376
1377 nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm);
1378 nSuffix = nTerm-nPrefix;
1379
1380 nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix;
1381 if( nReq<=p->nNodeSize || !pTree->zTerm ){
1382
1383 if( nReq>p->nNodeSize ){
1384 /* An unusual case: this is the first term to be added to the node
1385 ** and the static node buffer (p->nNodeSize bytes) is not large
1386 ** enough. Use a separately malloced buffer instead This wastes
1387 ** p->nNodeSize bytes, but since this scenario only comes about when
1388 ** the database contain two terms that share a prefix of almost 2KB,
1389 ** this is not expected to be a serious problem.
1390 */
1391 assert( pTree->aData==(char *)&pTree[1] );
1392 pTree->aData = (char *)sqlite3_malloc(nReq);
1393 if( !pTree->aData ){
1394 return SQLITE_NOMEM;
1395 }
1396 }
1397
1398 if( pTree->zTerm ){
1399 /* There is no prefix-length field for first term in a node */
1400 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix);
1401 }
1402
1403 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix);
1404 memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix);
1405 pTree->nData = nData + nSuffix;
1406 pTree->nEntry++;
1407
1408 if( isCopyTerm ){
1409 if( pTree->nMalloc<nTerm ){
1410 char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
1411 if( !zNew ){
1412 return SQLITE_NOMEM;
1413 }
1414 pTree->nMalloc = nTerm*2;
1415 pTree->zMalloc = zNew;
1416 }
1417 pTree->zTerm = pTree->zMalloc;
1418 memcpy(pTree->zTerm, zTerm, nTerm);
1419 pTree->nTerm = nTerm;
1420 }else{
1421 pTree->zTerm = (char *)zTerm;
1422 pTree->nTerm = nTerm;
1423 }
1424 return SQLITE_OK;
1425 }
1426 }
1427
1428 /* If control flows to here, it was not possible to append zTerm to the
1429 ** current node. Create a new node (a right-sibling of the current node).
1430 ** If this is the first node in the tree, the term is added to it.
1431 **
1432 ** Otherwise, the term is not added to the new node, it is left empty for
1433 ** now. Instead, the term is inserted into the parent of pTree. If pTree
1434 ** has no parent, one is created here.
1435 */
1436 pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
1437 if( !pNew ){
1438 return SQLITE_NOMEM;
1439 }
1440 memset(pNew, 0, sizeof(SegmentNode));
1441 pNew->nData = 1 + FTS3_VARINT_MAX;
1442 pNew->aData = (char *)&pNew[1];
1443
1444 if( pTree ){
1445 SegmentNode *pParent = pTree->pParent;
1446 rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm);
1447 if( pTree->pParent==0 ){
1448 pTree->pParent = pParent;
1449 }
1450 pTree->pRight = pNew;
1451 pNew->pLeftmost = pTree->pLeftmost;
1452 pNew->pParent = pParent;
1453 pNew->zMalloc = pTree->zMalloc;
1454 pNew->nMalloc = pTree->nMalloc;
1455 pTree->zMalloc = 0;
1456 }else{
1457 pNew->pLeftmost = pNew;
1458 rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm);
1459 }
1460
1461 *ppTree = pNew;
1462 return rc;
1463 }
1464
1465 /*
1466 ** Helper function for fts3NodeWrite().
1467 */
1468 static int fts3TreeFinishNode(
1469 SegmentNode *pTree,
1470 int iHeight,
1471 sqlite3_int64 iLeftChild
1472 ){
1473 int nStart;
1474 assert( iHeight>=1 && iHeight<128 );
1475 nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild);
1476 pTree->aData[nStart] = (char)iHeight;
1477 sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild);
1478 return nStart;
1479 }
1480
1481 /*
1482 ** Write the buffer for the segment node pTree and all of its peers to the
1483 ** database. Then call this function recursively to write the parent of
1484 ** pTree and its peers to the database.
1485 **
1486 ** Except, if pTree is a root node, do not write it to the database. Instead,
1487 ** set output variables *paRoot and *pnRoot to contain the root node.
1488 **
1489 ** If successful, SQLITE_OK is returned and output variable *piLast is
1490 ** set to the largest blockid written to the database (or zero if no
1491 ** blocks were written to the db). Otherwise, an SQLite error code is
1492 ** returned.
1493 */
1494 static int fts3NodeWrite(
1495 Fts3Table *p, /* Virtual table handle */
1496 SegmentNode *pTree, /* SegmentNode handle */
1497 int iHeight, /* Height of this node in tree */
1498 sqlite3_int64 iLeaf, /* Block id of first leaf node */
1499 sqlite3_int64 iFree, /* Block id of next free slot in %_segments */
1500 sqlite3_int64 *piLast, /* OUT: Block id of last entry written */
1501 char **paRoot, /* OUT: Data for root node */
1502 int *pnRoot /* OUT: Size of root node in bytes */
1503 ){
1504 int rc = SQLITE_OK;
1505
1506 if( !pTree->pParent ){
1507 /* Root node of the tree. */
1508 int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf);
1509 *piLast = iFree-1;
1510 *pnRoot = pTree->nData - nStart;
1511 *paRoot = &pTree->aData[nStart];
1512 }else{
1513 SegmentNode *pIter;
1514 sqlite3_int64 iNextFree = iFree;
1515 sqlite3_int64 iNextLeaf = iLeaf;
1516 for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){
1517 int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf);
1518 int nWrite = pIter->nData - nStart;
1519
1520 rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite);
1521 iNextFree++;
1522 iNextLeaf += (pIter->nEntry+1);
1523 }
1524 if( rc==SQLITE_OK ){
1525 assert( iNextLeaf==iFree );
1526 rc = fts3NodeWrite(
1527 p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot
1528 );
1529 }
1530 }
1531
1532 return rc;
1533 }
1534
1535 /*
1536 ** Free all memory allocations associated with the tree pTree.
1537 */
1538 static void fts3NodeFree(SegmentNode *pTree){
1539 if( pTree ){
1540 SegmentNode *p = pTree->pLeftmost;
1541 fts3NodeFree(p->pParent);
1542 while( p ){
1543 SegmentNode *pRight = p->pRight;
1544 if( p->aData!=(char *)&p[1] ){
1545 sqlite3_free(p->aData);
1546 }
1547 assert( pRight==0 || p->zMalloc==0 );
1548 sqlite3_free(p->zMalloc);
1549 sqlite3_free(p);
1550 p = pRight;
1551 }
1552 }
1553 }
1554
1555 /*
1556 ** Add a term to the segment being constructed by the SegmentWriter object
1557 ** *ppWriter. When adding the first term to a segment, *ppWriter should
1558 ** be passed NULL. This function will allocate a new SegmentWriter object
1559 ** and return it via the input/output variable *ppWriter in this case.
1560 **
1561 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
1562 */
1563 static int fts3SegWriterAdd(
1564 Fts3Table *p, /* Virtual table handle */
1565 SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */
1566 int isCopyTerm, /* True if buffer zTerm must be copied */
1567 const char *zTerm, /* Pointer to buffer containing term */
1568 int nTerm, /* Size of term in bytes */
1569 const char *aDoclist, /* Pointer to buffer containing doclist */
1570 int nDoclist /* Size of doclist in bytes */
1571 ){
1572 int nPrefix; /* Size of term prefix in bytes */
1573 int nSuffix; /* Size of term suffix in bytes */
1574 int nReq; /* Number of bytes required on leaf page */
1575 int nData;
1576 SegmentWriter *pWriter = *ppWriter;
1577
1578 if( !pWriter ){
1579 int rc;
1580 sqlite3_stmt *pStmt;
1581
1582 /* Allocate the SegmentWriter structure */
1583 pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
1584 if( !pWriter ) return SQLITE_NOMEM;
1585 memset(pWriter, 0, sizeof(SegmentWriter));
1586 *ppWriter = pWriter;
1587
1588 /* Allocate a buffer in which to accumulate data */
1589 pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
1590 if( !pWriter->aData ) return SQLITE_NOMEM;
1591 pWriter->nSize = p->nNodeSize;
1592
1593 /* Find the next free blockid in the %_segments table */
1594 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0);
1595 if( rc!=SQLITE_OK ) return rc;
1596 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1597 pWriter->iFree = sqlite3_column_int64(pStmt, 0);
1598 pWriter->iFirst = pWriter->iFree;
1599 }
1600 rc = sqlite3_reset(pStmt);
1601 if( rc!=SQLITE_OK ) return rc;
1602 }
1603 nData = pWriter->nData;
1604
1605 nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm);
1606 nSuffix = nTerm-nPrefix;
1607
1608 /* Figure out how many bytes are required by this new entry */
1609 nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */
1610 sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */
1611 nSuffix + /* Term suffix */
1612 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
1613 nDoclist; /* Doclist data */
1614
1615 if( nData>0 && nData+nReq>p->nNodeSize ){
1616 int rc;
1617
1618 /* The current leaf node is full. Write it out to the database. */
1619 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
1620 if( rc!=SQLITE_OK ) return rc;
1621
1622 /* Add the current term to the interior node tree. The term added to
1623 ** the interior tree must:
1624 **
1625 ** a) be greater than the largest term on the leaf node just written
1626 ** to the database (still available in pWriter->zTerm), and
1627 **
1628 ** b) be less than or equal to the term about to be added to the new
1629 ** leaf node (zTerm/nTerm).
1630 **
1631 ** In other words, it must be the prefix of zTerm 1 byte longer than
1632 ** the common prefix (if any) of zTerm and pWriter->zTerm.
1633 */
1634 assert( nPrefix<nTerm );
1635 rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1);
1636 if( rc!=SQLITE_OK ) return rc;
1637
1638 nData = 0;
1639 pWriter->nTerm = 0;
1640
1641 nPrefix = 0;
1642 nSuffix = nTerm;
1643 nReq = 1 + /* varint containing prefix size */
1644 sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */
1645 nTerm + /* Term suffix */
1646 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
1647 nDoclist; /* Doclist data */
1648 }
1649
1650 /* If the buffer currently allocated is too small for this entry, realloc
1651 ** the buffer to make it large enough.
1652 */
1653 if( nReq>pWriter->nSize ){
1654 char *aNew = sqlite3_realloc(pWriter->aData, nReq);
1655 if( !aNew ) return SQLITE_NOMEM;
1656 pWriter->aData = aNew;
1657 pWriter->nSize = nReq;
1658 }
1659 assert( nData+nReq<=pWriter->nSize );
1660
1661 /* Append the prefix-compressed term and doclist to the buffer. */
1662 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix);
1663 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix);
1664 memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix);
1665 nData += nSuffix;
1666 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist);
1667 memcpy(&pWriter->aData[nData], aDoclist, nDoclist);
1668 pWriter->nData = nData + nDoclist;
1669
1670 /* Save the current term so that it can be used to prefix-compress the next.
1671 ** If the isCopyTerm parameter is true, then the buffer pointed to by
1672 ** zTerm is transient, so take a copy of the term data. Otherwise, just
1673 ** store a copy of the pointer.
1674 */
1675 if( isCopyTerm ){
1676 if( nTerm>pWriter->nMalloc ){
1677 char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
1678 if( !zNew ){
1679 return SQLITE_NOMEM;
1680 }
1681 pWriter->nMalloc = nTerm*2;
1682 pWriter->zMalloc = zNew;
1683 pWriter->zTerm = zNew;
1684 }
1685 assert( pWriter->zTerm==pWriter->zMalloc );
1686 memcpy(pWriter->zTerm, zTerm, nTerm);
1687 }else{
1688 pWriter->zTerm = (char *)zTerm;
1689 }
1690 pWriter->nTerm = nTerm;
1691
1692 return SQLITE_OK;
1693 }
1694
1695 /*
1696 ** Flush all data associated with the SegmentWriter object pWriter to the
1697 ** database. This function must be called after all terms have been added
1698 ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is
1699 ** returned. Otherwise, an SQLite error code.
1700 */
1701 static int fts3SegWriterFlush(
1702 Fts3Table *p, /* Virtual table handle */
1703 SegmentWriter *pWriter, /* SegmentWriter to flush to the db */
1704 int iLevel, /* Value for 'level' column of %_segdir */
1705 int iIdx /* Value for 'idx' column of %_segdir */
1706 ){
1707 int rc; /* Return code */
1708 if( pWriter->pTree ){
1709 sqlite3_int64 iLast = 0; /* Largest block id written to database */
1710 sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */
1711 char *zRoot = NULL; /* Pointer to buffer containing root node */
1712 int nRoot = 0; /* Size of buffer zRoot */
1713
1714 iLastLeaf = pWriter->iFree;
1715 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData);
1716 if( rc==SQLITE_OK ){
1717 rc = fts3NodeWrite(p, pWriter->pTree, 1,
1718 pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
1719 }
1720 if( rc==SQLITE_OK ){
1721 rc = fts3WriteSegdir(
1722 p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
1723 }
1724 }else{
1725 /* The entire tree fits on the root node. Write it to the segdir table. */
1726 rc = fts3WriteSegdir(
1727 p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
1728 }
1729 return rc;
1730 }
1731
1732 /*
1733 ** Release all memory held by the SegmentWriter object passed as the
1734 ** first argument.
1735 */
1736 static void fts3SegWriterFree(SegmentWriter *pWriter){
1737 if( pWriter ){
1738 sqlite3_free(pWriter->aData);
1739 sqlite3_free(pWriter->zMalloc);
1740 fts3NodeFree(pWriter->pTree);
1741 sqlite3_free(pWriter);
1742 }
1743 }
1744
1745 /*
1746 ** The first value in the apVal[] array is assumed to contain an integer.
1747 ** This function tests if there exist any documents with docid values that
1748 ** are different from that integer. i.e. if deleting the document with docid
1749 ** apVal[0] would mean the FTS3 table were empty.
1750 **
1751 ** If successful, *pisEmpty is set to true if the table is empty except for
1752 ** document apVal[0], or false otherwise, and SQLITE_OK is returned. If an
1753 ** error occurs, an SQLite error code is returned.
1754 */
1755 static int fts3IsEmpty(Fts3Table *p, sqlite3_value **apVal, int *pisEmpty){
1756 sqlite3_stmt *pStmt;
1757 int rc;
1758 rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, apVal);
1759 if( rc==SQLITE_OK ){
1760 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1761 *pisEmpty = sqlite3_column_int(pStmt, 0);
1762 }
1763 rc = sqlite3_reset(pStmt);
1764 }
1765 return rc;
1766 }
1767
1768 /*
1769 ** Set *pnSegment to the number of segments of level iLevel in the database.
1770 **
1771 ** Return SQLITE_OK if successful, or an SQLite error code if not.
1772 */
1773 static int fts3SegmentCount(Fts3Table *p, int iLevel, int *pnSegment){
1774 sqlite3_stmt *pStmt;
1775 int rc;
1776
1777 assert( iLevel>=0 );
1778 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_COUNT, &pStmt, 0);
1779 if( rc!=SQLITE_OK ) return rc;
1780 sqlite3_bind_int(pStmt, 1, iLevel);
1781 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1782 *pnSegment = sqlite3_column_int(pStmt, 0);
1783 }
1784 return sqlite3_reset(pStmt);
1785 }
1786
1787 /*
1788 ** Set *pnSegment to the total number of segments in the database. Set
1789 ** *pnMax to the largest segment level in the database (segment levels
1790 ** are stored in the 'level' column of the %_segdir table).
1791 **
1792 ** Return SQLITE_OK if successful, or an SQLite error code if not.
1793 */
1794 static int fts3SegmentCountMax(Fts3Table *p, int *pnSegment, int *pnMax){
1795 sqlite3_stmt *pStmt;
1796 int rc;
1797
1798 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_COUNT_MAX, &pStmt, 0);
1799 if( rc!=SQLITE_OK ) return rc;
1800 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1801 *pnSegment = sqlite3_column_int(pStmt, 0);
1802 *pnMax = sqlite3_column_int(pStmt, 1);
1803 }
1804 return sqlite3_reset(pStmt);
1805 }
1806
1807 /*
1808 ** This function is used after merging multiple segments into a single large
1809 ** segment to delete the old, now redundant, segment b-trees. Specifically,
1810 ** it:
1811 **
1812 ** 1) Deletes all %_segments entries for the segments associated with
1813 ** each of the SegReader objects in the array passed as the third
1814 ** argument, and
1815 **
1816 ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
1817 ** entries regardless of level if (iLevel<0).
1818 **
1819 ** SQLITE_OK is returned if successful, otherwise an SQLite error code.
1820 */
1821 static int fts3DeleteSegdir(
1822 Fts3Table *p, /* Virtual table handle */
1823 int iLevel, /* Level of %_segdir entries to delete */
1824 Fts3SegReader **apSegment, /* Array of SegReader objects */
1825 int nReader /* Size of array apSegment */
1826 ){
1827 int rc; /* Return Code */
1828 int i; /* Iterator variable */
1829 sqlite3_stmt *pDelete; /* SQL statement to delete rows */
1830
1831 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
1832 for(i=0; rc==SQLITE_OK && i<nReader; i++){
1833 Fts3SegReader *pSegment = apSegment[i];
1834 if( pSegment->iStartBlock ){
1835 sqlite3_bind_int64(pDelete, 1, pSegment->iStartBlock);
1836 sqlite3_bind_int64(pDelete, 2, pSegment->iEndBlock);
1837 sqlite3_step(pDelete);
1838 rc = sqlite3_reset(pDelete);
1839 }
1840 }
1841 if( rc!=SQLITE_OK ){
1842 return rc;
1843 }
1844
1845 if( iLevel>=0 ){
1846 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0);
1847 if( rc==SQLITE_OK ){
1848 sqlite3_bind_int(pDelete, 1, iLevel);
1849 sqlite3_step(pDelete);
1850 rc = sqlite3_reset(pDelete);
1851 }
1852 }else{
1853 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
1854 }
1855
1856 return rc;
1857 }
1858
1859 /*
1860 ** When this function is called, buffer *ppList (size *pnList bytes) contains
1861 ** a position list that may (or may not) feature multiple columns. This
1862 ** function adjusts the pointer *ppList and the length *pnList so that they
1863 ** identify the subset of the position list that corresponds to column iCol.
1864 **
1865 ** If there are no entries in the input position list for column iCol, then
1866 ** *pnList is set to zero before returning.
1867 */
1868 static void fts3ColumnFilter(
1869 int iCol, /* Column to filter on */
1870 char **ppList, /* IN/OUT: Pointer to position list */
1871 int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
1872 ){
1873 char *pList = *ppList;
1874 int nList = *pnList;
1875 char *pEnd = &pList[nList];
1876 int iCurrent = 0;
1877 char *p = pList;
1878
1879 assert( iCol>=0 );
1880 while( 1 ){
1881 char c = 0;
1882 while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
1883
1884 if( iCol==iCurrent ){
1885 nList = (int)(p - pList);
1886 break;
1887 }
1888
1889 nList -= (int)(p - pList);
1890 pList = p;
1891 if( nList==0 ){
1892 break;
1893 }
1894 p = &pList[1];
1895 p += sqlite3Fts3GetVarint32(p, &iCurrent);
1896 }
1897
1898 *ppList = pList;
1899 *pnList = nList;
1900 }
1901
1902 /*
1903 ** sqlite3Fts3SegReaderIterate() callback used when merging multiple
1904 ** segments to create a single, larger segment.
1905 */
1906 static int fts3MergeCallback(
1907 Fts3Table *p, /* FTS3 Virtual table handle */
1908 void *pContext, /* Pointer to SegmentWriter* to write with */
1909 char *zTerm, /* Term to write to the db */
1910 int nTerm, /* Number of bytes in zTerm */
1911 char *aDoclist, /* Doclist associated with zTerm */
1912 int nDoclist /* Number of bytes in doclist */
1913 ){
1914 SegmentWriter **ppW = (SegmentWriter **)pContext;
1915 return fts3SegWriterAdd(p, ppW, 1, zTerm, nTerm, aDoclist, nDoclist);
1916 }
1917
1918 /*
1919 ** sqlite3Fts3SegReaderIterate() callback used when flushing the contents
1920 ** of the pending-terms hash table to the database.
1921 */
1922 static int fts3FlushCallback(
1923 Fts3Table *p, /* FTS3 Virtual table handle */
1924 void *pContext, /* Pointer to SegmentWriter* to write with */
1925 char *zTerm, /* Term to write to the db */
1926 int nTerm, /* Number of bytes in zTerm */
1927 char *aDoclist, /* Doclist associated with zTerm */
1928 int nDoclist /* Number of bytes in doclist */
1929 ){
1930 SegmentWriter **ppW = (SegmentWriter **)pContext;
1931 return fts3SegWriterAdd(p, ppW, 0, zTerm, nTerm, aDoclist, nDoclist);
1932 }
1933
1934 /*
1935 ** This function is used to iterate through a contiguous set of terms
1936 ** stored in the full-text index. It merges data contained in one or
1937 ** more segments to support this.
1938 **
1939 ** The second argument is passed an array of pointers to SegReader objects
1940 ** allocated with sqlite3Fts3SegReaderNew(). This function merges the range
1941 ** of terms selected by each SegReader. If a single term is present in
1942 ** more than one segment, the associated doclists are merged. For each
1943 ** term and (possibly merged) doclist in the merged range, the callback
1944 ** function xFunc is invoked with its arguments set as follows.
1945 **
1946 ** arg 0: Copy of 'p' parameter passed to this function
1947 ** arg 1: Copy of 'pContext' parameter passed to this function
1948 ** arg 2: Pointer to buffer containing term
1949 ** arg 3: Size of arg 2 buffer in bytes
1950 ** arg 4: Pointer to buffer containing doclist
1951 ** arg 5: Size of arg 2 buffer in bytes
1952 **
1953 ** The 4th argument to this function is a pointer to a structure of type
1954 ** Fts3SegFilter, defined in fts3Int.h. The contents of this structure
1955 ** further restrict the range of terms that callbacks are made for and
1956 ** modify the behaviour of this function. See comments above structure
1957 ** definition for details.
1958 */
1959 int sqlite3Fts3SegReaderIterate(
1960 Fts3Table *p, /* Virtual table handle */
1961 Fts3SegReader **apSegment, /* Array of Fts3SegReader objects */
1962 int nSegment, /* Size of apSegment array */
1963 Fts3SegFilter *pFilter, /* Restrictions on range of iteration */
1964 int (*xFunc)(Fts3Table *, void *, char *, int, char *, int), /* Callback */
1965 void *pContext /* Callback context (2nd argument) */
1966 ){
1967 int i; /* Iterator variable */
1968 char *aBuffer = 0; /* Buffer to merge doclists in */
1969 int nAlloc = 0; /* Allocated size of aBuffer buffer */
1970 int rc = SQLITE_OK; /* Return code */
1971
1972 int isIgnoreEmpty = (pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
1973 int isRequirePos = (pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
1974 int isColFilter = (pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
1975 int isPrefix = (pFilter->flags & FTS3_SEGMENT_PREFIX);
1976
1977 /* If there are zero segments, this function is a no-op. This scenario
1978 ** comes about only when reading from an empty database.
1979 */
1980 if( nSegment==0 ) goto finished;
1981
1982 /* If the Fts3SegFilter defines a specific term (or term prefix) to search
1983 ** for, then advance each segment iterator until it points to a term of
1984 ** equal or greater value than the specified term. This prevents many
1985 ** unnecessary merge/sort operations for the case where single segment
1986 ** b-tree leaf nodes contain more than one term.
1987 */
1988 if( pFilter->zTerm ){
1989 int nTerm = pFilter->nTerm;
1990 const char *zTerm = pFilter->zTerm;
1991 for(i=0; i<nSegment; i++){
1992 Fts3SegReader *pSeg = apSegment[i];
1993 while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
1994 rc = fts3SegReaderNext(pSeg);
1995 if( rc!=SQLITE_OK ) goto finished; }
1996 }
1997 }
1998
1999 fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
2000 while( apSegment[0]->aNode ){
2001 int nTerm = apSegment[0]->nTerm;
2002 char *zTerm = apSegment[0]->zTerm;
2003 int nMerge = 1;
2004
2005 /* If this is a prefix-search, and if the term that apSegment[0] points
2006 ** to does not share a suffix with pFilter->zTerm/nTerm, then all
2007 ** required callbacks have been made. In this case exit early.
2008 **
2009 ** Similarly, if this is a search for an exact match, and the first term
2010 ** of segment apSegment[0] is not a match, exit early.
2011 */
2012 if( pFilter->zTerm ){
2013 if( nTerm<pFilter->nTerm
2014 || (!isPrefix && nTerm>pFilter->nTerm)
2015 || memcmp(zTerm, pFilter->zTerm, pFilter->nTerm)
2016 ){
2017 goto finished;
2018 }
2019 }
2020
2021 while( nMerge<nSegment
2022 && apSegment[nMerge]->aNode
2023 && apSegment[nMerge]->nTerm==nTerm
2024 && 0==memcmp(zTerm, apSegment[nMerge]->zTerm, nTerm)
2025 ){
2026 nMerge++;
2027 }
2028
2029 assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
2030 if( nMerge==1 && !isIgnoreEmpty ){
2031 Fts3SegReader *p0 = apSegment[0];
2032 rc = xFunc(p, pContext, zTerm, nTerm, p0->aDoclist, p0->nDoclist);
2033 if( rc!=SQLITE_OK ) goto finished;
2034 }else{
2035 int nDoclist = 0; /* Size of doclist */
2036 sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */
2037
2038 /* The current term of the first nMerge entries in the array
2039 ** of Fts3SegReader objects is the same. The doclists must be merged
2040 ** and a single term added to the new segment.
2041 */
2042 for(i=0; i<nMerge; i++){
2043 fts3SegReaderFirstDocid(apSegment[i]);
2044 }
2045 fts3SegReaderSort(apSegment, nMerge, nMerge, fts3SegReaderDoclistCmp);
2046 while( apSegment[0]->pOffsetList ){
2047 int j; /* Number of segments that share a docid */
2048 char *pList;
2049 int nList;
2050 int nByte;
2051 sqlite3_int64 iDocid = apSegment[0]->iDocid;
2052 fts3SegReaderNextDocid(apSegment[0], &pList, &nList);
2053 j = 1;
2054 while( j<nMerge
2055 && apSegment[j]->pOffsetList
2056 && apSegment[j]->iDocid==iDocid
2057 ){
2058 fts3SegReaderNextDocid(apSegment[j], 0, 0);
2059 j++;
2060 }
2061
2062 if( isColFilter ){
2063 fts3ColumnFilter(pFilter->iCol, &pList, &nList);
2064 }
2065
2066 if( !isIgnoreEmpty || nList>0 ){
2067 nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0);
2068 if( nDoclist+nByte>nAlloc ){
2069 char *aNew;
2070 nAlloc = nDoclist+nByte*2;
2071 aNew = sqlite3_realloc(aBuffer, nAlloc);
2072 if( !aNew ){
2073 rc = SQLITE_NOMEM;
2074 goto finished;
2075 }
2076 aBuffer = aNew;
2077 }
2078 nDoclist += sqlite3Fts3PutVarint(&aBuffer[nDoclist], iDocid-iPrev);
2079 iPrev = iDocid;
2080 if( isRequirePos ){
2081 memcpy(&aBuffer[nDoclist], pList, nList);
2082 nDoclist += nList;
2083 aBuffer[nDoclist++] = '\0';
2084 }
2085 }
2086
2087 fts3SegReaderSort(apSegment, nMerge, j, fts3SegReaderDoclistCmp);
2088 }
2089
2090 if( nDoclist>0 ){
2091 rc = xFunc(p, pContext, zTerm, nTerm, aBuffer, nDoclist);
2092 if( rc!=SQLITE_OK ) goto finished;
2093 }
2094 }
2095
2096 /* If there is a term specified to filter on, and this is not a prefix
2097 ** search, return now. The callback that corresponds to the required
2098 ** term (if such a term exists in the index) has already been made.
2099 */
2100 if( pFilter->zTerm && !isPrefix ){
2101 goto finished;
2102 }
2103
2104 for(i=0; i<nMerge; i++){
2105 rc = fts3SegReaderNext(apSegment[i]);
2106 if( rc!=SQLITE_OK ) goto finished;
2107 }
2108 fts3SegReaderSort(apSegment, nSegment, nMerge, fts3SegReaderCmp);
2109 }
2110
2111 finished:
2112 sqlite3_free(aBuffer);
2113 return rc;
2114 }
2115
2116 /*
2117 ** Merge all level iLevel segments in the database into a single
2118 ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
2119 ** single segment with a level equal to the numerically largest level
2120 ** currently present in the database.
2121 **
2122 ** If this function is called with iLevel<0, but there is only one
2123 ** segment in the database, SQLITE_DONE is returned immediately.
2124 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
2125 ** an SQLite error code is returned.
2126 */
2127 static int fts3SegmentMerge(Fts3Table *p, int iLevel){
2128 int i; /* Iterator variable */
2129 int rc; /* Return code */
2130 int iIdx; /* Index of new segment */
2131 int iNewLevel; /* Level to create new segment at */
2132 sqlite3_stmt *pStmt = 0;
2133 SegmentWriter *pWriter = 0;
2134 int nSegment = 0; /* Number of segments being merged */
2135 Fts3SegReader **apSegment = 0; /* Array of Segment iterators */
2136 Fts3SegReader *pPending = 0; /* Iterator for pending-terms */
2137 Fts3SegFilter filter; /* Segment term filter condition */
2138
2139 if( iLevel<0 ){
2140 /* This call is to merge all segments in the database to a single
2141 ** segment. The level of the new segment is equal to the the numerically
2142 ** greatest segment level currently present in the database. The index
2143 ** of the new segment is always 0.
2144 */
2145 iIdx = 0;
2146 rc = sqlite3Fts3SegReaderPending(p, 0, 0, 1, &pPending);
2147 if( rc!=SQLITE_OK ) goto finished;
2148 rc = fts3SegmentCountMax(p, &nSegment, &iNewLevel);
2149 if( rc!=SQLITE_OK ) goto finished;
2150 nSegment += (pPending!=0);
2151 if( nSegment<=1 ){
2152 return SQLITE_DONE;
2153 }
2154 }else{
2155 /* This call is to merge all segments at level iLevel. Find the next
2156 ** available segment index at level iLevel+1. The call to
2157 ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
2158 ** a single iLevel+2 segment if necessary.
2159 */
2160 iNewLevel = iLevel+1;
2161 rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx);
2162 if( rc!=SQLITE_OK ) goto finished;
2163 rc = fts3SegmentCount(p, iLevel, &nSegment);
2164 if( rc!=SQLITE_OK ) goto finished;
2165 }
2166 assert( nSegment>0 );
2167 assert( iNewLevel>=0 );
2168
2169 /* Allocate space for an array of pointers to segment iterators. */
2170 apSegment = (Fts3SegReader**)sqlite3_malloc(sizeof(Fts3SegReader *)*nSegment);
2171 if( !apSegment ){
2172 rc = SQLITE_NOMEM;
2173 goto finished;
2174 }
2175 memset(apSegment, 0, sizeof(Fts3SegReader *)*nSegment);
2176
2177 /* Allocate a Fts3SegReader structure for each segment being merged. A
2178 ** Fts3SegReader stores the state data required to iterate through all
2179 ** entries on all leaves of a single segment.
2180 */
2181 assert( SQL_SELECT_LEVEL+1==SQL_SELECT_ALL_LEVEL);
2182 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL+(iLevel<0), &pStmt, 0);
2183 if( rc!=SQLITE_OK ) goto finished;
2184 sqlite3_bind_int(pStmt, 1, iLevel);
2185 for(i=0; SQLITE_ROW==(sqlite3_step(pStmt)); i++){
2186 rc = fts3SegReaderNew(p, pStmt, i, &apSegment[i]);
2187 if( rc!=SQLITE_OK ){
2188 goto finished;
2189 }
2190 }
2191 rc = sqlite3_reset(pStmt);
2192 if( pPending ){
2193 apSegment[i] = pPending;
2194 pPending = 0;
2195 }
2196 pStmt = 0;
2197 if( rc!=SQLITE_OK ) goto finished;
2198
2199 memset(&filter, 0, sizeof(Fts3SegFilter));
2200 filter.flags = FTS3_SEGMENT_REQUIRE_POS;
2201 filter.flags |= (iLevel<0 ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
2202 rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment,
2203 &filter, fts3MergeCallback, (void *)&pWriter
2204 );
2205 if( rc!=SQLITE_OK ) goto finished;
2206
2207 rc = fts3DeleteSegdir(p, iLevel, apSegment, nSegment);
2208 if( rc==SQLITE_OK ){
2209 rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
2210 }
2211
2212 finished:
2213 fts3SegWriterFree(pWriter);
2214 if( apSegment ){
2215 for(i=0; i<nSegment; i++){
2216 sqlite3Fts3SegReaderFree(p, apSegment[i]);
2217 }
2218 sqlite3_free(apSegment);
2219 }
2220 sqlite3Fts3SegReaderFree(p, pPending);
2221 sqlite3_reset(pStmt);
2222 return rc;
2223 }
2224
2225
2226 /*
2227 ** Flush the contents of pendingTerms to a level 0 segment.
2228 */
2229 int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
2230 int rc; /* Return Code */
2231 int idx; /* Index of new segment created */
2232 SegmentWriter *pWriter = 0; /* Used to write the segment */
2233 Fts3SegReader *pReader = 0; /* Used to iterate through the hash table */
2234
2235 /* Allocate a SegReader object to iterate through the contents of the
2236 ** pending-terms table. If an error occurs, or if there are no terms
2237 ** in the pending-terms table, return immediately.
2238 */
2239 rc = sqlite3Fts3SegReaderPending(p, 0, 0, 1, &pReader);
2240 if( rc!=SQLITE_OK || pReader==0 ){
2241 return rc;
2242 }
2243
2244 /* Determine the next index at level 0. If level 0 is already full, this
2245 ** call may merge all existing level 0 segments into a single level 1
2246 ** segment.
2247 */
2248 rc = fts3AllocateSegdirIdx(p, 0, &idx);
2249
2250 /* If no errors have occured, iterate through the contents of the
2251 ** pending-terms hash table using the Fts3SegReader iterator. The callback
2252 ** writes each term (along with its doclist) to the database via the
2253 ** SegmentWriter handle pWriter.
2254 */
2255 if( rc==SQLITE_OK ){
2256 void *c = (void *)&pWriter; /* SegReaderIterate() callback context */
2257 Fts3SegFilter f; /* SegReaderIterate() parameters */
2258
2259 memset(&f, 0, sizeof(Fts3SegFilter));
2260 f.flags = FTS3_SEGMENT_REQUIRE_POS;
2261 rc = sqlite3Fts3SegReaderIterate(p, &pReader, 1, &f, fts3FlushCallback, c);
2262 }
2263 assert( pWriter || rc!=SQLITE_OK );
2264
2265 /* If no errors have occured, flush the SegmentWriter object to the
2266 ** database. Then delete the SegmentWriter and Fts3SegReader objects
2267 ** allocated by this function.
2268 */
2269 if( rc==SQLITE_OK ){
2270 rc = fts3SegWriterFlush(p, pWriter, 0, idx);
2271 }
2272 fts3SegWriterFree(pWriter);
2273 sqlite3Fts3SegReaderFree(p, pReader);
2274
2275 if( rc==SQLITE_OK ){
2276 sqlite3Fts3PendingTermsClear(p);
2277 }
2278 return rc;
2279 }
2280
2281 /*
2282 ** Encode N integers as varints into a blob.
2283 */
2284 static void fts3EncodeIntArray(
2285 int N, /* The number of integers to encode */
2286 u32 *a, /* The integer values */
2287 char *zBuf, /* Write the BLOB here */
2288 int *pNBuf /* Write number of bytes if zBuf[] used here */
2289 ){
2290 int i, j;
2291 for(i=j=0; i<N; i++){
2292 j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
2293 }
2294 *pNBuf = j;
2295 }
2296
2297 /*
2298 ** Decode a blob of varints into N integers
2299 */
2300 static void fts3DecodeIntArray(
2301 int N, /* The number of integers to decode */
2302 u32 *a, /* Write the integer values */
2303 const char *zBuf, /* The BLOB containing the varints */
2304 int nBuf /* size of the BLOB */
2305 ){
2306 int i, j;
2307 UNUSED_PARAMETER(nBuf);
2308 for(i=j=0; i<N; i++){
2309 sqlite3_int64 x;
2310 j += sqlite3Fts3GetVarint(&zBuf[j], &x);
2311 assert(j<=nBuf);
2312 a[i] = (u32)(x & 0xffffffff);
2313 }
2314 }
2315
2316 /*
2317 ** Fill in the document size auxiliary information for the matchinfo
2318 ** structure. The auxiliary information is:
2319 **
2320 ** N Total number of documents in the full-text index
2321 ** a0 Average length of column 0 over the whole index
2322 ** n0 Length of column 0 on the matching row
2323 ** ...
2324 ** aM Average length of column M over the whole index
2325 ** nM Length of column M on the matching row
2326 **
2327 ** The fts3MatchinfoDocsizeLocal() routine fills in the nX values.
2328 ** The fts3MatchinfoDocsizeGlobal() routine fills in N and the aX values.
2329 */
2330 int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor *pCur, u32 *a){
2331 const char *pBlob; /* The BLOB holding %_docsize info */
2332 int nBlob; /* Size of the BLOB */
2333 sqlite3_stmt *pStmt; /* Statement for reading and writing */
2334 int i, j; /* Loop counters */
2335 sqlite3_int64 x; /* Varint value */
2336 int rc; /* Result code from subfunctions */
2337 Fts3Table *p; /* The FTS table */
2338
2339 p = (Fts3Table*)pCur->base.pVtab;
2340 rc = fts3SqlStmt(p, SQL_SELECT_DOCSIZE, &pStmt, 0);
2341 if( rc ){
2342 return rc;
2343 }
2344 sqlite3_bind_int64(pStmt, 1, pCur->iPrevId);
2345 if( sqlite3_step(pStmt)==SQLITE_ROW ){
2346 nBlob = sqlite3_column_bytes(pStmt, 0);
2347 pBlob = (const char*)sqlite3_column_blob(pStmt, 0);
2348 for(i=j=0; i<p->nColumn && j<nBlob; i++){
2349 j = sqlite3Fts3GetVarint(&pBlob[j], &x);
2350 a[2+i*2] = (u32)(x & 0xffffffff);
2351 }
2352 }
2353 sqlite3_reset(pStmt);
2354 return SQLITE_OK;
2355 }
2356 int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor *pCur, u32 *a){
2357 const char *pBlob; /* The BLOB holding %_stat info */
2358 int nBlob; /* Size of the BLOB */
2359 sqlite3_stmt *pStmt; /* Statement for reading and writing */
2360 int i, j; /* Loop counters */
2361 sqlite3_int64 x; /* Varint value */
2362 int nDoc; /* Number of documents */
2363 int rc; /* Result code from subfunctions */
2364 Fts3Table *p; /* The FTS table */
2365
2366 p = (Fts3Table*)pCur->base.pVtab;
2367 rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
2368 if( rc ){
2369 return rc;
2370 }
2371 if( sqlite3_step(pStmt)==SQLITE_ROW ){
2372 nBlob = sqlite3_column_bytes(pStmt, 0);
2373 pBlob = (const char*)sqlite3_column_blob(pStmt, 0);
2374 j = sqlite3Fts3GetVarint(pBlob, &x);
2375 a[0] = nDoc = (u32)(x & 0xffffffff);
2376 for(i=0; i<p->nColumn && j<nBlob; i++){
2377 j = sqlite3Fts3GetVarint(&pBlob[j], &x);
2378 a[1+i*2] = ((u32)(x & 0xffffffff) + nDoc/2)/nDoc;
2379 }
2380 }
2381 sqlite3_reset(pStmt);
2382 return SQLITE_OK;
2383 }
2384
2385 /*
2386 ** Insert the sizes (in tokens) for each column of the document
2387 ** with docid equal to p->iPrevDocid. The sizes are encoded as
2388 ** a blob of varints.
2389 */
2390 static void fts3InsertDocsize(
2391 int *pRC, /* Result code */
2392 Fts3Table *p, /* Table into which to insert */
2393 u32 *aSz /* Sizes of each column */
2394 ){
2395 char *pBlob; /* The BLOB encoding of the document size */
2396 int nBlob; /* Number of bytes in the BLOB */
2397 sqlite3_stmt *pStmt; /* Statement used to insert the encoding */
2398 int rc; /* Result code from subfunctions */
2399
2400 if( *pRC ) return;
2401 pBlob = sqlite3_malloc( 10*p->nColumn );
2402 if( pBlob==0 ){
2403 *pRC = SQLITE_NOMEM;
2404 return;
2405 }
2406 fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob);
2407 rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0);
2408 if( rc ){
2409 sqlite3_free(pBlob);
2410 *pRC = rc;
2411 return;
2412 }
2413 sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
2414 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
2415 sqlite3_step(pStmt);
2416 *pRC = sqlite3_reset(pStmt);
2417 }
2418
2419 /*
2420 ** Update the 0 record of the %_stat table so that it holds a blob
2421 ** which contains the document count followed by the cumulative
2422 ** document sizes for all columns.
2423 */
2424 static void fts3UpdateDocTotals(
2425 int *pRC, /* The result code */
2426 Fts3Table *p, /* Table being updated */
2427 u32 *aSzIns, /* Size increases */
2428 u32 *aSzDel, /* Size decreases */
2429 int nChng /* Change in the number of documents */
2430 ){
2431 char *pBlob; /* Storage for BLOB written into %_stat */
2432 int nBlob; /* Size of BLOB written into %_stat */
2433 u32 *a; /* Array of integers that becomes the BLOB */
2434 sqlite3_stmt *pStmt; /* Statement for reading and writing */
2435 int i; /* Loop counter */
2436 int rc; /* Result code from subfunctions */
2437
2438 if( *pRC ) return;
2439 a = sqlite3_malloc( (sizeof(u32)+10)*(p->nColumn+1) );
2440 if( a==0 ){
2441 *pRC = SQLITE_NOMEM;
2442 return;
2443 }
2444 pBlob = (char*)&a[p->nColumn+1];
2445 rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
2446 if( rc ){
2447 sqlite3_free(a);
2448 *pRC = rc;
2449 return;
2450 }
2451 if( sqlite3_step(pStmt)==SQLITE_ROW ){
2452 fts3DecodeIntArray(p->nColumn+1, a,
2453 sqlite3_column_blob(pStmt, 0),
2454 sqlite3_column_bytes(pStmt, 0));
2455 }else{
2456 memset(a, 0, sizeof(u32)*(p->nColumn+1) );
2457 }
2458 sqlite3_reset(pStmt);
2459 if( nChng<0 && a[0]<(u32)(-nChng) ){
2460 a[0] = 0;
2461 }else{
2462 a[0] += nChng;
2463 }
2464 for(i=0; i<p->nColumn; i++){
2465 u32 x = a[i+1];
2466 if( x+aSzIns[i] < aSzDel[i] ){
2467 x = 0;
2468 }else{
2469 x = x + aSzIns[i] - aSzDel[i];
2470 }
2471 a[i+1] = x;
2472 }
2473 fts3EncodeIntArray(p->nColumn+1, a, pBlob, &nBlob);
2474 rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
2475 if( rc ){
2476 sqlite3_free(a);
2477 *pRC = rc;
2478 return;
2479 }
2480 sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
2481 sqlite3_step(pStmt);
2482 *pRC = sqlite3_reset(pStmt);
2483 sqlite3_free(a);
2484 }
2485
2486 /*
2487 ** Handle a 'special' INSERT of the form:
2488 **
2489 ** "INSERT INTO tbl(tbl) VALUES(<expr>)"
2490 **
2491 ** Argument pVal contains the result of <expr>. Currently the only
2492 ** meaningful value to insert is the text 'optimize'.
2493 */
2494 static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
2495 int rc; /* Return Code */
2496 const char *zVal = (const char *)sqlite3_value_text(pVal);
2497 int nVal = sqlite3_value_bytes(pVal);
2498
2499 if( !zVal ){
2500 return SQLITE_NOMEM;
2501 }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
2502 rc = fts3SegmentMerge(p, -1);
2503 if( rc==SQLITE_DONE ){
2504 rc = SQLITE_OK;
2505 }else{
2506 sqlite3Fts3PendingTermsClear(p);
2507 }
2508 #ifdef SQLITE_TEST
2509 }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
2510 p->nNodeSize = atoi(&zVal[9]);
2511 rc = SQLITE_OK;
2512 }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
2513 p->nMaxPendingData = atoi(&zVal[11]);
2514 rc = SQLITE_OK;
2515 #endif
2516 }else{
2517 rc = SQLITE_ERROR;
2518 }
2519
2520 return rc;
2521 }
2522
2523 /*
2524 ** This function does the work for the xUpdate method of FTS3 virtual
2525 ** tables.
2526 */
2527 int sqlite3Fts3UpdateMethod(
2528 sqlite3_vtab *pVtab, /* FTS3 vtab object */
2529 int nArg, /* Size of argument array */
2530 sqlite3_value **apVal, /* Array of arguments */
2531 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
2532 ){
2533 Fts3Table *p = (Fts3Table *)pVtab;
2534 int rc = SQLITE_OK; /* Return Code */
2535 int isRemove = 0; /* True for an UPDATE or DELETE */
2536 sqlite3_int64 iRemove = 0; /* Rowid removed by UPDATE or DELETE */
2537 u32 *aSzIns; /* Sizes of inserted documents */
2538 u32 *aSzDel; /* Sizes of deleted documents */
2539 int nChng = 0; /* Net change in number of documents */
2540
2541
2542 /* Allocate space to hold the change in document sizes */
2543 aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*p->nColumn*2 );
2544 if( aSzIns==0 ) return SQLITE_NOMEM;
2545 aSzDel = &aSzIns[p->nColumn];
2546 memset(aSzIns, 0, sizeof(aSzIns[0])*p->nColumn*2);
2547
2548 /* If this is a DELETE or UPDATE operation, remove the old record. */
2549 if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
2550 int isEmpty;
2551 rc = fts3IsEmpty(p, apVal, &isEmpty);
2552 if( rc==SQLITE_OK ){
2553 if( isEmpty ){
2554 /* Deleting this row means the whole table is empty. In this case
2555 ** delete the contents of all three tables and throw away any
2556 ** data in the pendingTerms hash table.
2557 */
2558 rc = fts3DeleteAll(p);
2559 }else{
2560 isRemove = 1;
2561 iRemove = sqlite3_value_int64(apVal[0]);
2562 rc = fts3PendingTermsDocid(p, iRemove);
2563 fts3DeleteTerms(&rc, p, apVal, aSzDel);
2564 fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, apVal);
2565 if( p->bHasDocsize ){
2566 fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, apVal);
2567 nChng--;
2568 }
2569 }
2570 }
2571 }else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){
2572 sqlite3_free(aSzIns);
2573 return fts3SpecialInsert(p, apVal[p->nColumn+2]);
2574 }
2575
2576 /* If this is an INSERT or UPDATE operation, insert the new record. */
2577 if( nArg>1 && rc==SQLITE_OK ){
2578 rc = fts3InsertData(p, apVal, pRowid);
2579 if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
2580 rc = fts3PendingTermsDocid(p, *pRowid);
2581 }
2582 if( rc==SQLITE_OK ){
2583 rc = fts3InsertTerms(p, apVal, aSzIns);
2584 }
2585 if( p->bHasDocsize ){
2586 nChng++;
2587 fts3InsertDocsize(&rc, p, aSzIns);
2588 }
2589 }
2590
2591 if( p->bHasDocsize ){
2592 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
2593 }
2594
2595 sqlite3_free(aSzIns);
2596 return rc;
2597 }
2598
2599 /*
2600 ** Flush any data in the pending-terms hash table to disk. If successful,
2601 ** merge all segments in the database (including the new segment, if
2602 ** there was any data to flush) into a single segment.
2603 */
2604 int sqlite3Fts3Optimize(Fts3Table *p){
2605 int rc;
2606 rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
2607 if( rc==SQLITE_OK ){
2608 rc = fts3SegmentMerge(p, -1);
2609 if( rc==SQLITE_OK ){
2610 rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
2611 if( rc==SQLITE_OK ){
2612 sqlite3Fts3PendingTermsClear(p);
2613 }
2614 }else{
2615 sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
2616 sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
2617 }
2618 }
2619 return rc;
2620 }
2621
2622 #endif
OLDNEW
« no previous file with comments | « third_party/sqlite/src/ext/fts3/fts3_tokenizer1.c ('k') | third_party/sqlite/src/ext/fts3/mkfts3amal.tcl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698