Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(200)

Side by Side Diff: third_party/sqlite/src/ext/fts2/fts2.c

Issue 1073293002: [sql] Remove fts2 patches. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Put back SQLITE_CORE only for the icu component for iOS Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/sqlite/src/Makefile.linux-gcc ('k') | third_party/sqlite/src/ext/fts2/fts2_icu.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* fts2 has a design flaw which can lead to database corruption (see 1 /* fts2 has a design flaw which can lead to database corruption (see
2 ** below). It is recommended not to use it any longer, instead use 2 ** below). It is recommended not to use it any longer, instead use
3 ** fts3 (or higher). If you believe that your use of fts2 is safe, 3 ** fts3 (or higher). If you believe that your use of fts2 is safe,
4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. 4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS.
5 */ 5 */
6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ 6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \
7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) 7 && !defined(SQLITE_ENABLE_BROKEN_FTS2)
8 #error fts2 has a design flaw and has been deprecated. 8 #error fts2 has a design flaw and has been deprecated.
9 #endif 9 #endif
10 /* The flaw is that fts2 uses the content table's unaliased rowid as 10 /* The flaw is that fts2 uses the content table's unaliased rowid as
(...skipping 19 matching lines...) Expand all
30 ** 30 **
31 ** May you do good and not evil. 31 ** May you do good and not evil.
32 ** May you find forgiveness for yourself and forgive others. 32 ** May you find forgiveness for yourself and forgive others.
33 ** May you share freely, never taking more than you give. 33 ** May you share freely, never taking more than you give.
34 ** 34 **
35 ****************************************************************************** 35 ******************************************************************************
36 ** 36 **
37 ** This is an SQLite module implementing full-text search. 37 ** This is an SQLite module implementing full-text search.
38 */ 38 */
39 39
40 /* TODO(shess): To make it easier to spot changes without groveling
41 ** through changelogs, I've defined GEARS_FTS2_CHANGES to call them
42 ** out, and I will document them here. On imports, these changes
43 ** should be reviewed to make sure they are still present, or are
44 ** dropped as appropriate.
45 **
46 ** SQLite core adds the custom function fts2_tokenizer() to be used
47 ** for defining new tokenizers. The second parameter is a vtable
48 ** pointer encoded as a blob. Obviously this cannot be exposed to
49 ** Gears callers for security reasons. It could be suppressed in the
50 ** authorizer, but for now I have simply commented the definition out.
51 */
52 #define GEARS_FTS2_CHANGES 1
53
54 /* 40 /*
55 ** The code in this file is only compiled if: 41 ** The code in this file is only compiled if:
56 ** 42 **
57 ** * The FTS2 module is being built as an extension 43 ** * The FTS2 module is being built as an extension
58 ** (in which case SQLITE_CORE is not defined), or 44 ** (in which case SQLITE_CORE is not defined), or
59 ** 45 **
60 ** * The FTS2 module is being built into the core of 46 ** * The FTS2 module is being built into the core of
61 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). 47 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
62 */ 48 */
63 49
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 #endif 303 #endif
318 304
319 #include <assert.h> 305 #include <assert.h>
320 #include <stdlib.h> 306 #include <stdlib.h>
321 #include <stdio.h> 307 #include <stdio.h>
322 #include <string.h> 308 #include <string.h>
323 #include "fts2.h" 309 #include "fts2.h"
324 #include "fts2_hash.h" 310 #include "fts2_hash.h"
325 #include "fts2_tokenizer.h" 311 #include "fts2_tokenizer.h"
326 #include "sqlite3.h" 312 #include "sqlite3.h"
327 #ifndef SQLITE_CORE 313 #include "sqlite3ext.h"
328 # include "sqlite3ext.h" 314 SQLITE_EXTENSION_INIT1
329 SQLITE_EXTENSION_INIT1
330 #endif
331 315
332 316
333 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it 317 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it
334 ** would be nice to order the file better, perhaps something along the 318 ** would be nice to order the file better, perhaps something along the
335 ** lines of: 319 ** lines of:
336 ** 320 **
337 ** - utility functions 321 ** - utility functions
338 ** - table setup functions 322 ** - table setup functions
339 ** - table update functions 323 ** - table update functions
340 ** - table query functions 324 ** - table query functions
341 ** 325 **
342 ** Put the query functions last because they're likely to reference 326 ** Put the query functions last because they're likely to reference
343 ** typedefs or functions from the table update section. 327 ** typedefs or functions from the table update section.
344 */ 328 */
345 329
346 #if 0 330 #if 0
347 # define TRACE(A) printf A; fflush(stdout) 331 # define TRACE(A) printf A; fflush(stdout)
348 #else 332 #else
349 # define TRACE(A) 333 # define TRACE(A)
350 #endif 334 #endif
351 335
352 #if 0
353 /* Useful to set breakpoints. See main.c sqlite3Corrupt(). */
354 static int fts2Corrupt(void){
355 return SQLITE_CORRUPT;
356 }
357 # define SQLITE_CORRUPT_BKPT fts2Corrupt()
358 #else
359 # define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT
360 #endif
361
362 /* It is not safe to call isspace(), tolower(), or isalnum() on 336 /* It is not safe to call isspace(), tolower(), or isalnum() on
363 ** hi-bit-set characters. This is the same solution used in the 337 ** hi-bit-set characters. This is the same solution used in the
364 ** tokenizer. 338 ** tokenizer.
365 */ 339 */
366 /* TODO(shess) The snippet-generation code should be using the 340 /* TODO(shess) The snippet-generation code should be using the
367 ** tokenizer-generated tokens rather than doing its own local 341 ** tokenizer-generated tokens rather than doing its own local
368 ** tokenization. 342 ** tokenization.
369 */ 343 */
370 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ 344 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
371 static int safe_isspace(char c){ 345 static int safe_isspace(char c){
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
440 vu >>= 7; 414 vu >>= 7;
441 }while( vu!=0 ); 415 }while( vu!=0 );
442 q[-1] &= 0x7f; /* turn off high bit in final byte */ 416 q[-1] &= 0x7f; /* turn off high bit in final byte */
443 assert( q - (unsigned char *)p <= VARINT_MAX ); 417 assert( q - (unsigned char *)p <= VARINT_MAX );
444 return (int) (q - (unsigned char *)p); 418 return (int) (q - (unsigned char *)p);
445 } 419 }
446 420
447 /* Read a 64-bit variable-length integer from memory starting at p[0]. 421 /* Read a 64-bit variable-length integer from memory starting at p[0].
448 * Return the number of bytes read, or 0 on error. 422 * Return the number of bytes read, or 0 on error.
449 * The value is stored in *v. */ 423 * The value is stored in *v. */
450 static int getVarintSafe(const char *p, sqlite_int64 *v, int max){ 424 static int getVarint(const char *p, sqlite_int64 *v){
451 const unsigned char *q = (const unsigned char *) p; 425 const unsigned char *q = (const unsigned char *) p;
452 sqlite_uint64 x = 0, y = 1; 426 sqlite_uint64 x = 0, y = 1;
453 if( max>VARINT_MAX ) max = VARINT_MAX; 427 while( (*q & 0x80) == 0x80 ){
454 while( max && (*q & 0x80) == 0x80 ){
455 max--;
456 x += y * (*q++ & 0x7f); 428 x += y * (*q++ & 0x7f);
457 y <<= 7; 429 y <<= 7;
458 } 430 if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */
459 if ( !max ){ 431 assert( 0 );
460 assert( 0 ); 432 return 0;
461 return 0; /* tried to read too much; bad data */ 433 }
462 } 434 }
463 x += y * (*q++); 435 x += y * (*q++);
464 *v = (sqlite_int64) x; 436 *v = (sqlite_int64) x;
465 return (int) (q - (unsigned char *)p); 437 return (int) (q - (unsigned char *)p);
466 } 438 }
467 439
468 static int getVarint(const char *p, sqlite_int64 *v){ 440 static int getVarint32(const char *p, int *pi){
469 return getVarintSafe(p, v, VARINT_MAX);
470 }
471
472 static int getVarint32Safe(const char *p, int *pi, int max){
473 sqlite_int64 i; 441 sqlite_int64 i;
474 int ret = getVarintSafe(p, &i, max); 442 int ret = getVarint(p, &i);
475 if( !ret ) return ret;
476 *pi = (int) i; 443 *pi = (int) i;
477 assert( *pi==i ); 444 assert( *pi==i );
478 return ret; 445 return ret;
479 } 446 }
480 447
481 static int getVarint32(const char* p, int *pi){
482 return getVarint32Safe(p, pi, VARINT_MAX);
483 }
484
485 /*******************************************************************/ 448 /*******************************************************************/
486 /* DataBuffer is used to collect data into a buffer in piecemeal 449 /* DataBuffer is used to collect data into a buffer in piecemeal
487 ** fashion. It implements the usual distinction between amount of 450 ** fashion. It implements the usual distinction between amount of
488 ** data currently stored (nData) and buffer capacity (nCapacity). 451 ** data currently stored (nData) and buffer capacity (nCapacity).
489 ** 452 **
490 ** dataBufferInit - create a buffer with given initial capacity. 453 ** dataBufferInit - create a buffer with given initial capacity.
491 ** dataBufferReset - forget buffer's data, retaining capacity. 454 ** dataBufferReset - forget buffer's data, retaining capacity.
492 ** dataBufferDestroy - free buffer's data. 455 ** dataBufferDestroy - free buffer's data.
493 ** dataBufferSwap - swap contents of two buffers. 456 ** dataBufferSwap - swap contents of two buffers.
494 ** dataBufferExpand - expand capacity without adding data. 457 ** dataBufferExpand - expand capacity without adding data.
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
643 DocListType iType; 606 DocListType iType;
644 const char *pData; 607 const char *pData;
645 int nData; 608 int nData;
646 609
647 sqlite_int64 iDocid; 610 sqlite_int64 iDocid;
648 int nElement; 611 int nElement;
649 } DLReader; 612 } DLReader;
650 613
651 static int dlrAtEnd(DLReader *pReader){ 614 static int dlrAtEnd(DLReader *pReader){
652 assert( pReader->nData>=0 ); 615 assert( pReader->nData>=0 );
653 return pReader->nData<=0; 616 return pReader->nData==0;
654 } 617 }
655 static sqlite_int64 dlrDocid(DLReader *pReader){ 618 static sqlite_int64 dlrDocid(DLReader *pReader){
656 assert( !dlrAtEnd(pReader) ); 619 assert( !dlrAtEnd(pReader) );
657 return pReader->iDocid; 620 return pReader->iDocid;
658 } 621 }
659 static const char *dlrDocData(DLReader *pReader){ 622 static const char *dlrDocData(DLReader *pReader){
660 assert( !dlrAtEnd(pReader) ); 623 assert( !dlrAtEnd(pReader) );
661 return pReader->pData; 624 return pReader->pData;
662 } 625 }
663 static int dlrDocDataBytes(DLReader *pReader){ 626 static int dlrDocDataBytes(DLReader *pReader){
664 assert( !dlrAtEnd(pReader) ); 627 assert( !dlrAtEnd(pReader) );
665 return pReader->nElement; 628 return pReader->nElement;
666 } 629 }
667 static int dlrAllDataBytes(DLReader *pReader){ 630 static int dlrAllDataBytes(DLReader *pReader){
668 assert( !dlrAtEnd(pReader) ); 631 assert( !dlrAtEnd(pReader) );
669 return pReader->nData; 632 return pReader->nData;
670 } 633 }
671 /* TODO(shess) Consider adding a field to track iDocid varint length 634 /* TODO(shess) Consider adding a field to track iDocid varint length
672 ** to make these two functions faster. This might matter (a tiny bit) 635 ** to make these two functions faster. This might matter (a tiny bit)
673 ** for queries. 636 ** for queries.
674 */ 637 */
675 static const char *dlrPosData(DLReader *pReader){ 638 static const char *dlrPosData(DLReader *pReader){
676 sqlite_int64 iDummy; 639 sqlite_int64 iDummy;
677 int n = getVarintSafe(pReader->pData, &iDummy, pReader->nElement); 640 int n = getVarint(pReader->pData, &iDummy);
678 if( !n ) return NULL;
679 assert( !dlrAtEnd(pReader) ); 641 assert( !dlrAtEnd(pReader) );
680 return pReader->pData+n; 642 return pReader->pData+n;
681 } 643 }
682 static int dlrPosDataLen(DLReader *pReader){ 644 static int dlrPosDataLen(DLReader *pReader){
683 sqlite_int64 iDummy; 645 sqlite_int64 iDummy;
684 int n = getVarint(pReader->pData, &iDummy); 646 int n = getVarint(pReader->pData, &iDummy);
685 assert( !dlrAtEnd(pReader) ); 647 assert( !dlrAtEnd(pReader) );
686 return pReader->nElement-n; 648 return pReader->nElement-n;
687 } 649 }
688 static int dlrStep(DLReader *pReader){ 650 static void dlrStep(DLReader *pReader){
689 assert( !dlrAtEnd(pReader) ); 651 assert( !dlrAtEnd(pReader) );
690 652
691 /* Skip past current doclist element. */ 653 /* Skip past current doclist element. */
692 assert( pReader->nElement<=pReader->nData ); 654 assert( pReader->nElement<=pReader->nData );
693 pReader->pData += pReader->nElement; 655 pReader->pData += pReader->nElement;
694 pReader->nData -= pReader->nElement; 656 pReader->nData -= pReader->nElement;
695 657
696 /* If there is more data, read the next doclist element. */ 658 /* If there is more data, read the next doclist element. */
697 if( pReader->nData>0 ){ 659 if( pReader->nData!=0 ){
698 sqlite_int64 iDocidDelta; 660 sqlite_int64 iDocidDelta;
699 int nTotal = 0; 661 int iDummy, n = getVarint(pReader->pData, &iDocidDelta);
700 int iDummy, n = getVarintSafe(pReader->pData, &iDocidDelta, pReader->nData);
701 if( !n ) return SQLITE_CORRUPT_BKPT;
702 nTotal += n;
703 pReader->iDocid += iDocidDelta; 662 pReader->iDocid += iDocidDelta;
704 if( pReader->iType>=DL_POSITIONS ){ 663 if( pReader->iType>=DL_POSITIONS ){
664 assert( n<pReader->nData );
705 while( 1 ){ 665 while( 1 ){
706 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 666 n += getVarint32(pReader->pData+n, &iDummy);
707 pReader->nData-nTotal); 667 assert( n<=pReader->nData );
708 if( !n ) return SQLITE_CORRUPT_BKPT;
709 nTotal += n;
710 if( iDummy==POS_END ) break; 668 if( iDummy==POS_END ) break;
711 if( iDummy==POS_COLUMN ){ 669 if( iDummy==POS_COLUMN ){
712 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 670 n += getVarint32(pReader->pData+n, &iDummy);
713 pReader->nData-nTotal); 671 assert( n<pReader->nData );
714 if( !n ) return SQLITE_CORRUPT_BKPT;
715 nTotal += n;
716 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ 672 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){
717 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, 673 n += getVarint32(pReader->pData+n, &iDummy);
718 pReader->nData-nTotal); 674 n += getVarint32(pReader->pData+n, &iDummy);
719 if( !n ) return SQLITE_CORRUPT_BKPT; 675 assert( n<pReader->nData );
720 nTotal += n;
721 n = getVarint32Safe(pReader->pData+nTotal, &iDummy,
722 pReader->nData-nTotal);
723 if( !n ) return SQLITE_CORRUPT_BKPT;
724 nTotal += n;
725 } 676 }
726 } 677 }
727 } 678 }
728 pReader->nElement = nTotal; 679 pReader->nElement = n;
729 assert( pReader->nElement<=pReader->nData ); 680 assert( pReader->nElement<=pReader->nData );
730 } 681 }
731 return SQLITE_OK;
732 } 682 }
733 static void dlrDestroy(DLReader *pReader){ 683 static void dlrInit(DLReader *pReader, DocListType iType,
734 SCRAMBLE(pReader); 684 const char *pData, int nData){
735 }
736 static int dlrInit(DLReader *pReader, DocListType iType,
737 const char *pData, int nData){
738 int rc;
739 assert( pData!=NULL && nData!=0 ); 685 assert( pData!=NULL && nData!=0 );
740 pReader->iType = iType; 686 pReader->iType = iType;
741 pReader->pData = pData; 687 pReader->pData = pData;
742 pReader->nData = nData; 688 pReader->nData = nData;
743 pReader->nElement = 0; 689 pReader->nElement = 0;
744 pReader->iDocid = 0; 690 pReader->iDocid = 0;
745 691
746 /* Load the first element's data. There must be a first element. */ 692 /* Load the first element's data. There must be a first element. */
747 rc = dlrStep(pReader); 693 dlrStep(pReader);
748 if( rc!=SQLITE_OK ) dlrDestroy(pReader); 694 }
749 return rc; 695 static void dlrDestroy(DLReader *pReader){
696 SCRAMBLE(pReader);
750 } 697 }
751 698
752 #ifndef NDEBUG 699 #ifndef NDEBUG
753 /* Verify that the doclist can be validly decoded. Also returns the 700 /* Verify that the doclist can be validly decoded. Also returns the
754 ** last docid found because it is convenient in other assertions for 701 ** last docid found because it is convenient in other assertions for
755 ** DLWriter. 702 ** DLWriter.
756 */ 703 */
757 static void docListValidate(DocListType iType, const char *pData, int nData, 704 static void docListValidate(DocListType iType, const char *pData, int nData,
758 sqlite_int64 *pLastDocid){ 705 sqlite_int64 *pLastDocid){
759 sqlite_int64 iPrevDocid = 0; 706 sqlite_int64 iPrevDocid = 0;
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
826 ** 773 **
827 ** iLastDocid is the final docid in the doclist in pData. It is 774 ** iLastDocid is the final docid in the doclist in pData. It is
828 ** needed to create the new iPrevDocid for future delta-encoding. The 775 ** needed to create the new iPrevDocid for future delta-encoding. The
829 ** code could decode the passed doclist to recreate iLastDocid, but 776 ** code could decode the passed doclist to recreate iLastDocid, but
830 ** the only current user (docListMerge) already has decoded this 777 ** the only current user (docListMerge) already has decoded this
831 ** information. 778 ** information.
832 */ 779 */
833 /* TODO(shess) This has become just a helper for docListMerge. 780 /* TODO(shess) This has become just a helper for docListMerge.
834 ** Consider a refactor to make this cleaner. 781 ** Consider a refactor to make this cleaner.
835 */ 782 */
836 static int dlwAppend(DLWriter *pWriter, 783 static void dlwAppend(DLWriter *pWriter,
837 const char *pData, int nData, 784 const char *pData, int nData,
838 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ 785 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){
839 sqlite_int64 iDocid = 0; 786 sqlite_int64 iDocid = 0;
840 char c[VARINT_MAX]; 787 char c[VARINT_MAX];
841 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ 788 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */
842 #ifndef NDEBUG 789 #ifndef NDEBUG
843 sqlite_int64 iLastDocidDelta; 790 sqlite_int64 iLastDocidDelta;
844 #endif 791 #endif
845 792
846 /* Recode the initial docid as delta from iPrevDocid. */ 793 /* Recode the initial docid as delta from iPrevDocid. */
847 nFirstOld = getVarintSafe(pData, &iDocid, nData); 794 nFirstOld = getVarint(pData, &iDocid);
848 if( !nFirstOld ) return SQLITE_CORRUPT_BKPT;
849 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); 795 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) );
850 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); 796 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid);
851 797
852 /* Verify that the incoming doclist is valid AND that it ends with 798 /* Verify that the incoming doclist is valid AND that it ends with
853 ** the expected docid. This is essential because we'll trust this 799 ** the expected docid. This is essential because we'll trust this
854 ** docid in future delta-encoding. 800 ** docid in future delta-encoding.
855 */ 801 */
856 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); 802 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta);
857 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); 803 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta );
858 804
859 /* Append recoded initial docid and everything else. Rest of docids 805 /* Append recoded initial docid and everything else. Rest of docids
860 ** should have been delta-encoded from previous initial docid. 806 ** should have been delta-encoded from previous initial docid.
861 */ 807 */
862 if( nFirstOld<nData ){ 808 if( nFirstOld<nData ){
863 dataBufferAppend2(pWriter->b, c, nFirstNew, 809 dataBufferAppend2(pWriter->b, c, nFirstNew,
864 pData+nFirstOld, nData-nFirstOld); 810 pData+nFirstOld, nData-nFirstOld);
865 }else{ 811 }else{
866 dataBufferAppend(pWriter->b, c, nFirstNew); 812 dataBufferAppend(pWriter->b, c, nFirstNew);
867 } 813 }
868 pWriter->iPrevDocid = iLastDocid; 814 pWriter->iPrevDocid = iLastDocid;
869 return SQLITE_OK;
870 } 815 }
871 static int dlwCopy(DLWriter *pWriter, DLReader *pReader){ 816 static void dlwCopy(DLWriter *pWriter, DLReader *pReader){
872 return dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), 817 dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
873 dlrDocid(pReader), dlrDocid(pReader)); 818 dlrDocid(pReader), dlrDocid(pReader));
874 } 819 }
875 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ 820 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
876 char c[VARINT_MAX]; 821 char c[VARINT_MAX];
877 int n = putVarint(c, iDocid-pWriter->iPrevDocid); 822 int n = putVarint(c, iDocid-pWriter->iPrevDocid);
878 823
879 /* Docids must ascend. */ 824 /* Docids must ascend. */
880 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); 825 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
881 assert( pWriter->iType==DL_DOCIDS ); 826 assert( pWriter->iType==DL_DOCIDS );
882 827
883 dataBufferAppend(pWriter->b, c, n); 828 dataBufferAppend(pWriter->b, c, n);
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
924 return pReader->iPosition; 869 return pReader->iPosition;
925 } 870 }
926 static int plrStartOffset(PLReader *pReader){ 871 static int plrStartOffset(PLReader *pReader){
927 assert( !plrAtEnd(pReader) ); 872 assert( !plrAtEnd(pReader) );
928 return pReader->iStartOffset; 873 return pReader->iStartOffset;
929 } 874 }
930 static int plrEndOffset(PLReader *pReader){ 875 static int plrEndOffset(PLReader *pReader){
931 assert( !plrAtEnd(pReader) ); 876 assert( !plrAtEnd(pReader) );
932 return pReader->iEndOffset; 877 return pReader->iEndOffset;
933 } 878 }
934 static int plrStep(PLReader *pReader){ 879 static void plrStep(PLReader *pReader){
935 int i, n, nTotal = 0; 880 int i, n;
936 881
937 assert( !plrAtEnd(pReader) ); 882 assert( !plrAtEnd(pReader) );
938 883
939 if( pReader->nData<=0 ){ 884 if( pReader->nData==0 ){
940 pReader->pData = NULL; 885 pReader->pData = NULL;
941 return SQLITE_OK; 886 return;
942 } 887 }
943 888
944 n = getVarint32Safe(pReader->pData, &i, pReader->nData); 889 n = getVarint32(pReader->pData, &i);
945 if( !n ) return SQLITE_CORRUPT_BKPT;
946 nTotal += n;
947 if( i==POS_COLUMN ){ 890 if( i==POS_COLUMN ){
948 n = getVarint32Safe(pReader->pData+nTotal, &pReader->iColumn, 891 n += getVarint32(pReader->pData+n, &pReader->iColumn);
949 pReader->nData-nTotal);
950 if( !n ) return SQLITE_CORRUPT_BKPT;
951 nTotal += n;
952 pReader->iPosition = 0; 892 pReader->iPosition = 0;
953 pReader->iStartOffset = 0; 893 pReader->iStartOffset = 0;
954 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 894 n += getVarint32(pReader->pData+n, &i);
955 if( !n ) return SQLITE_CORRUPT_BKPT;
956 nTotal += n;
957 } 895 }
958 /* Should never see adjacent column changes. */ 896 /* Should never see adjacent column changes. */
959 assert( i!=POS_COLUMN ); 897 assert( i!=POS_COLUMN );
960 898
961 if( i==POS_END ){ 899 if( i==POS_END ){
962 assert( nTotal<=pReader->nData );
963 pReader->nData = 0; 900 pReader->nData = 0;
964 pReader->pData = NULL; 901 pReader->pData = NULL;
965 return SQLITE_OK; 902 return;
966 } 903 }
967 904
968 pReader->iPosition += i-POS_BASE; 905 pReader->iPosition += i-POS_BASE;
969 if( pReader->iType==DL_POSITIONS_OFFSETS ){ 906 if( pReader->iType==DL_POSITIONS_OFFSETS ){
970 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 907 n += getVarint32(pReader->pData+n, &i);
971 if( !n ) return SQLITE_CORRUPT_BKPT;
972 nTotal += n;
973 pReader->iStartOffset += i; 908 pReader->iStartOffset += i;
974 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); 909 n += getVarint32(pReader->pData+n, &i);
975 if( !n ) return SQLITE_CORRUPT_BKPT;
976 nTotal += n;
977 pReader->iEndOffset = pReader->iStartOffset+i; 910 pReader->iEndOffset = pReader->iStartOffset+i;
978 } 911 }
979 assert( nTotal<=pReader->nData ); 912 assert( n<=pReader->nData );
980 pReader->pData += nTotal; 913 pReader->pData += n;
981 pReader->nData -= nTotal; 914 pReader->nData -= n;
982 return SQLITE_OK;
983 } 915 }
984 916
985 static void plrDestroy(PLReader *pReader){ 917 static void plrInit(PLReader *pReader, DLReader *pDLReader){
986 SCRAMBLE(pReader);
987 }
988
989 static int plrInit(PLReader *pReader, DLReader *pDLReader){
990 int rc;
991 pReader->pData = dlrPosData(pDLReader); 918 pReader->pData = dlrPosData(pDLReader);
992 pReader->nData = dlrPosDataLen(pDLReader); 919 pReader->nData = dlrPosDataLen(pDLReader);
993 pReader->iType = pDLReader->iType; 920 pReader->iType = pDLReader->iType;
994 pReader->iColumn = 0; 921 pReader->iColumn = 0;
995 pReader->iPosition = 0; 922 pReader->iPosition = 0;
996 pReader->iStartOffset = 0; 923 pReader->iStartOffset = 0;
997 pReader->iEndOffset = 0; 924 pReader->iEndOffset = 0;
998 rc = plrStep(pReader); 925 plrStep(pReader);
999 if( rc!=SQLITE_OK ) plrDestroy(pReader); 926 }
1000 return rc; 927 static void plrDestroy(PLReader *pReader){
928 SCRAMBLE(pReader);
1001 } 929 }
1002 930
1003 /*******************************************************************/ 931 /*******************************************************************/
1004 /* PLWriter is used in constructing a document's position list. As a 932 /* PLWriter is used in constructing a document's position list. As a
1005 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. 933 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
1006 ** PLWriter writes to the associated DLWriter's buffer. 934 ** PLWriter writes to the associated DLWriter's buffer.
1007 ** 935 **
1008 ** plwInit - init for writing a document's poslist. 936 ** plwInit - init for writing a document's poslist.
1009 ** plwDestroy - clear a writer. 937 ** plwDestroy - clear a writer.
1010 ** plwAdd - append position and offset information. 938 ** plwAdd - append position and offset information.
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
1176 /* Copy the doclist data of iType in pData/nData into *out, trimming 1104 /* Copy the doclist data of iType in pData/nData into *out, trimming
1177 ** unnecessary data as we go. Only columns matching iColumn are 1105 ** unnecessary data as we go. Only columns matching iColumn are
1178 ** copied, all columns copied if iColumn is -1. Elements with no 1106 ** copied, all columns copied if iColumn is -1. Elements with no
1179 ** matching columns are dropped. The output is an iOutType doclist. 1107 ** matching columns are dropped. The output is an iOutType doclist.
1180 */ 1108 */
1181 /* NOTE(shess) This code is only valid after all doclists are merged. 1109 /* NOTE(shess) This code is only valid after all doclists are merged.
1182 ** If this is run before merges, then doclist items which represent 1110 ** If this is run before merges, then doclist items which represent
1183 ** deletion will be trimmed, and will thus not effect a deletion 1111 ** deletion will be trimmed, and will thus not effect a deletion
1184 ** during the merge. 1112 ** during the merge.
1185 */ 1113 */
1186 static int docListTrim(DocListType iType, const char *pData, int nData, 1114 static void docListTrim(DocListType iType, const char *pData, int nData,
1187 int iColumn, DocListType iOutType, DataBuffer *out){ 1115 int iColumn, DocListType iOutType, DataBuffer *out){
1188 DLReader dlReader; 1116 DLReader dlReader;
1189 DLWriter dlWriter; 1117 DLWriter dlWriter;
1190 int rc;
1191 1118
1192 assert( iOutType<=iType ); 1119 assert( iOutType<=iType );
1193 1120
1194 rc = dlrInit(&dlReader, iType, pData, nData); 1121 dlrInit(&dlReader, iType, pData, nData);
1195 if( rc!=SQLITE_OK ) return rc;
1196 dlwInit(&dlWriter, iOutType, out); 1122 dlwInit(&dlWriter, iOutType, out);
1197 1123
1198 while( !dlrAtEnd(&dlReader) ){ 1124 while( !dlrAtEnd(&dlReader) ){
1199 PLReader plReader; 1125 PLReader plReader;
1200 PLWriter plWriter; 1126 PLWriter plWriter;
1201 int match = 0; 1127 int match = 0;
1202 1128
1203 rc = plrInit(&plReader, &dlReader); 1129 plrInit(&plReader, &dlReader);
1204 if( rc!=SQLITE_OK ) break;
1205 1130
1206 while( !plrAtEnd(&plReader) ){ 1131 while( !plrAtEnd(&plReader) ){
1207 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ 1132 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
1208 if( !match ){ 1133 if( !match ){
1209 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); 1134 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
1210 match = 1; 1135 match = 1;
1211 } 1136 }
1212 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), 1137 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
1213 plrStartOffset(&plReader), plrEndOffset(&plReader)); 1138 plrStartOffset(&plReader), plrEndOffset(&plReader));
1214 } 1139 }
1215 rc = plrStep(&plReader); 1140 plrStep(&plReader);
1216 if( rc!=SQLITE_OK ){
1217 plrDestroy(&plReader);
1218 goto err;
1219 }
1220 } 1141 }
1221 if( match ){ 1142 if( match ){
1222 plwTerminate(&plWriter); 1143 plwTerminate(&plWriter);
1223 plwDestroy(&plWriter); 1144 plwDestroy(&plWriter);
1224 } 1145 }
1225 1146
1226 plrDestroy(&plReader); 1147 plrDestroy(&plReader);
1227 rc = dlrStep(&dlReader); 1148 dlrStep(&dlReader);
1228 if( rc!=SQLITE_OK ) break;
1229 } 1149 }
1230 err:
1231 dlwDestroy(&dlWriter); 1150 dlwDestroy(&dlWriter);
1232 dlrDestroy(&dlReader); 1151 dlrDestroy(&dlReader);
1233 return rc;
1234 } 1152 }
1235 1153
1236 /* Used by docListMerge() to keep doclists in the ascending order by 1154 /* Used by docListMerge() to keep doclists in the ascending order by
1237 ** docid, then ascending order by age (so the newest comes first). 1155 ** docid, then ascending order by age (so the newest comes first).
1238 */ 1156 */
1239 typedef struct OrderedDLReader { 1157 typedef struct OrderedDLReader {
1240 DLReader *pReader; 1158 DLReader *pReader;
1241 1159
1242 /* TODO(shess) If we assume that docListMerge pReaders is ordered by 1160 /* TODO(shess) If we assume that docListMerge pReaders is ordered by
1243 ** age (which we do), then we could use pReader comparisons to break 1161 ** age (which we do), then we could use pReader comparisons to break
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1280 } 1198 }
1281 1199
1282 /* Given an array of doclist readers, merge their doclist elements 1200 /* Given an array of doclist readers, merge their doclist elements
1283 ** into out in sorted order (by docid), dropping elements from older 1201 ** into out in sorted order (by docid), dropping elements from older
1284 ** readers when there is a duplicate docid. pReaders is assumed to be 1202 ** readers when there is a duplicate docid. pReaders is assumed to be
1285 ** ordered by age, oldest first. 1203 ** ordered by age, oldest first.
1286 */ 1204 */
1287 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably 1205 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably
1288 ** be fixed. 1206 ** be fixed.
1289 */ 1207 */
1290 static int docListMerge(DataBuffer *out, 1208 static void docListMerge(DataBuffer *out,
1291 DLReader *pReaders, int nReaders){ 1209 DLReader *pReaders, int nReaders){
1292 OrderedDLReader readers[MERGE_COUNT]; 1210 OrderedDLReader readers[MERGE_COUNT];
1293 DLWriter writer; 1211 DLWriter writer;
1294 int i, n; 1212 int i, n;
1295 const char *pStart = 0; 1213 const char *pStart = 0;
1296 int nStart = 0; 1214 int nStart = 0;
1297 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; 1215 sqlite_int64 iFirstDocid = 0, iLastDocid = 0;
1298 int rc = SQLITE_OK;
1299 1216
1300 assert( nReaders>0 ); 1217 assert( nReaders>0 );
1301 if( nReaders==1 ){ 1218 if( nReaders==1 ){
1302 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); 1219 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders));
1303 return SQLITE_OK; 1220 return;
1304 } 1221 }
1305 1222
1306 assert( nReaders<=MERGE_COUNT ); 1223 assert( nReaders<=MERGE_COUNT );
1307 n = 0; 1224 n = 0;
1308 for(i=0; i<nReaders; i++){ 1225 for(i=0; i<nReaders; i++){
1309 assert( pReaders[i].iType==pReaders[0].iType ); 1226 assert( pReaders[i].iType==pReaders[0].iType );
1310 readers[i].pReader = pReaders+i; 1227 readers[i].pReader = pReaders+i;
1311 readers[i].idx = i; 1228 readers[i].idx = i;
1312 n += dlrAllDataBytes(&pReaders[i]); 1229 n += dlrAllDataBytes(&pReaders[i]);
1313 } 1230 }
(...skipping 12 matching lines...) Expand all
1326 sqlite_int64 iDocid = dlrDocid(readers[0].pReader); 1243 sqlite_int64 iDocid = dlrDocid(readers[0].pReader);
1327 1244
1328 /* If this is a continuation of the current buffer to copy, extend 1245 /* If this is a continuation of the current buffer to copy, extend
1329 ** that buffer. memcpy() seems to be more efficient if it has a 1246 ** that buffer. memcpy() seems to be more efficient if it has a
1330 ** lots of data to copy. 1247 ** lots of data to copy.
1331 */ 1248 */
1332 if( dlrDocData(readers[0].pReader)==pStart+nStart ){ 1249 if( dlrDocData(readers[0].pReader)==pStart+nStart ){
1333 nStart += dlrDocDataBytes(readers[0].pReader); 1250 nStart += dlrDocDataBytes(readers[0].pReader);
1334 }else{ 1251 }else{
1335 if( pStart!=0 ){ 1252 if( pStart!=0 ){
1336 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); 1253 dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
1337 if( rc!=SQLITE_OK ) goto err;
1338 } 1254 }
1339 pStart = dlrDocData(readers[0].pReader); 1255 pStart = dlrDocData(readers[0].pReader);
1340 nStart = dlrDocDataBytes(readers[0].pReader); 1256 nStart = dlrDocDataBytes(readers[0].pReader);
1341 iFirstDocid = iDocid; 1257 iFirstDocid = iDocid;
1342 } 1258 }
1343 iLastDocid = iDocid; 1259 iLastDocid = iDocid;
1344 rc = dlrStep(readers[0].pReader); 1260 dlrStep(readers[0].pReader);
1345 if( rc!=SQLITE_OK ) goto err;
1346 1261
1347 /* Drop all of the older elements with the same docid. */ 1262 /* Drop all of the older elements with the same docid. */
1348 for(i=1; i<nReaders && 1263 for(i=1; i<nReaders &&
1349 !dlrAtEnd(readers[i].pReader) && 1264 !dlrAtEnd(readers[i].pReader) &&
1350 dlrDocid(readers[i].pReader)==iDocid; i++){ 1265 dlrDocid(readers[i].pReader)==iDocid; i++){
1351 rc = dlrStep(readers[i].pReader); 1266 dlrStep(readers[i].pReader);
1352 if( rc!=SQLITE_OK ) goto err;
1353 } 1267 }
1354 1268
1355 /* Get the readers back into order. */ 1269 /* Get the readers back into order. */
1356 while( i-->0 ){ 1270 while( i-->0 ){
1357 orderedDLReaderReorder(readers+i, nReaders-i); 1271 orderedDLReaderReorder(readers+i, nReaders-i);
1358 } 1272 }
1359 } 1273 }
1360 1274
1361 /* Copy over any remaining elements. */ 1275 /* Copy over any remaining elements. */
1362 if( nStart>0 ) 1276 if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
1363 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid);
1364 err:
1365 dlwDestroy(&writer); 1277 dlwDestroy(&writer);
1366 return rc;
1367 } 1278 }
1368 1279
1369 /* Helper function for posListUnion(). Compares the current position 1280 /* Helper function for posListUnion(). Compares the current position
1370 ** between left and right, returning as standard C idiom of <0 if 1281 ** between left and right, returning as standard C idiom of <0 if
1371 ** left<right, >0 if left>right, and 0 if left==right. "End" always 1282 ** left<right, >0 if left>right, and 0 if left==right. "End" always
1372 ** compares greater. 1283 ** compares greater.
1373 */ 1284 */
1374 static int posListCmp(PLReader *pLeft, PLReader *pRight){ 1285 static int posListCmp(PLReader *pLeft, PLReader *pRight){
1375 assert( pLeft->iType==pRight->iType ); 1286 assert( pLeft->iType==pRight->iType );
1376 if( pLeft->iType==DL_DOCIDS ) return 0; 1287 if( pLeft->iType==DL_DOCIDS ) return 0;
(...skipping 15 matching lines...) Expand all
1392 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; 1303 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1;
1393 1304
1394 return 0; 1305 return 0;
1395 } 1306 }
1396 1307
1397 /* Write the union of position lists in pLeft and pRight to pOut. 1308 /* Write the union of position lists in pLeft and pRight to pOut.
1398 ** "Union" in this case meaning "All unique position tuples". Should 1309 ** "Union" in this case meaning "All unique position tuples". Should
1399 ** work with any doclist type, though both inputs and the output 1310 ** work with any doclist type, though both inputs and the output
1400 ** should be the same type. 1311 ** should be the same type.
1401 */ 1312 */
1402 static int posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ 1313 static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){
1403 PLReader left, right; 1314 PLReader left, right;
1404 PLWriter writer; 1315 PLWriter writer;
1405 int rc;
1406 1316
1407 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); 1317 assert( dlrDocid(pLeft)==dlrDocid(pRight) );
1408 assert( pLeft->iType==pRight->iType ); 1318 assert( pLeft->iType==pRight->iType );
1409 assert( pLeft->iType==pOut->iType ); 1319 assert( pLeft->iType==pOut->iType );
1410 1320
1411 rc = plrInit(&left, pLeft); 1321 plrInit(&left, pLeft);
1412 if( rc != SQLITE_OK ) return rc; 1322 plrInit(&right, pRight);
1413 rc = plrInit(&right, pRight);
1414 if( rc != SQLITE_OK ){
1415 plrDestroy(&left);
1416 return rc;
1417 }
1418 plwInit(&writer, pOut, dlrDocid(pLeft)); 1323 plwInit(&writer, pOut, dlrDocid(pLeft));
1419 1324
1420 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ 1325 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
1421 int c = posListCmp(&left, &right); 1326 int c = posListCmp(&left, &right);
1422 if( c<0 ){ 1327 if( c<0 ){
1423 plwCopy(&writer, &left); 1328 plwCopy(&writer, &left);
1424 rc = plrStep(&left); 1329 plrStep(&left);
1425 if( rc != SQLITE_OK ) break;
1426 }else if( c>0 ){ 1330 }else if( c>0 ){
1427 plwCopy(&writer, &right); 1331 plwCopy(&writer, &right);
1428 rc = plrStep(&right); 1332 plrStep(&right);
1429 if( rc != SQLITE_OK ) break;
1430 }else{ 1333 }else{
1431 plwCopy(&writer, &left); 1334 plwCopy(&writer, &left);
1432 rc = plrStep(&left); 1335 plrStep(&left);
1433 if( rc != SQLITE_OK ) break; 1336 plrStep(&right);
1434 rc = plrStep(&right);
1435 if( rc != SQLITE_OK ) break;
1436 } 1337 }
1437 } 1338 }
1438 1339
1439 plwTerminate(&writer); 1340 plwTerminate(&writer);
1440 plwDestroy(&writer); 1341 plwDestroy(&writer);
1441 plrDestroy(&left); 1342 plrDestroy(&left);
1442 plrDestroy(&right); 1343 plrDestroy(&right);
1443 return rc;
1444 } 1344 }
1445 1345
1446 /* Write the union of doclists in pLeft and pRight to pOut. For 1346 /* Write the union of doclists in pLeft and pRight to pOut. For
1447 ** docids in common between the inputs, the union of the position 1347 ** docids in common between the inputs, the union of the position
1448 ** lists is written. Inputs and outputs are always type DL_DEFAULT. 1348 ** lists is written. Inputs and outputs are always type DL_DEFAULT.
1449 */ 1349 */
1450 static int docListUnion( 1350 static void docListUnion(
1451 const char *pLeft, int nLeft, 1351 const char *pLeft, int nLeft,
1452 const char *pRight, int nRight, 1352 const char *pRight, int nRight,
1453 DataBuffer *pOut /* Write the combined doclist here */ 1353 DataBuffer *pOut /* Write the combined doclist here */
1454 ){ 1354 ){
1455 DLReader left, right; 1355 DLReader left, right;
1456 DLWriter writer; 1356 DLWriter writer;
1457 int rc;
1458 1357
1459 if( nLeft==0 ){ 1358 if( nLeft==0 ){
1460 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); 1359 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight);
1461 return SQLITE_OK; 1360 return;
1462 } 1361 }
1463 if( nRight==0 ){ 1362 if( nRight==0 ){
1464 dataBufferAppend(pOut, pLeft, nLeft); 1363 dataBufferAppend(pOut, pLeft, nLeft);
1465 return SQLITE_OK; 1364 return;
1466 } 1365 }
1467 1366
1468 rc = dlrInit(&left, DL_DEFAULT, pLeft, nLeft); 1367 dlrInit(&left, DL_DEFAULT, pLeft, nLeft);
1469 if( rc!=SQLITE_OK ) return rc; 1368 dlrInit(&right, DL_DEFAULT, pRight, nRight);
1470 rc = dlrInit(&right, DL_DEFAULT, pRight, nRight);
1471 if( rc!=SQLITE_OK ){
1472 dlrDestroy(&left);
1473 return rc;
1474 }
1475 dlwInit(&writer, DL_DEFAULT, pOut); 1369 dlwInit(&writer, DL_DEFAULT, pOut);
1476 1370
1477 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ 1371 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
1478 if( dlrAtEnd(&right) ){ 1372 if( dlrAtEnd(&right) ){
1479 rc = dlwCopy(&writer, &left); 1373 dlwCopy(&writer, &left);
1480 if( rc!=SQLITE_OK ) break; 1374 dlrStep(&left);
1481 rc = dlrStep(&left);
1482 if( rc!=SQLITE_OK ) break;
1483 }else if( dlrAtEnd(&left) ){ 1375 }else if( dlrAtEnd(&left) ){
1484 rc = dlwCopy(&writer, &right); 1376 dlwCopy(&writer, &right);
1485 if( rc!=SQLITE_OK ) break; 1377 dlrStep(&right);
1486 rc = dlrStep(&right);
1487 if( rc!=SQLITE_OK ) break;
1488 }else if( dlrDocid(&left)<dlrDocid(&right) ){ 1378 }else if( dlrDocid(&left)<dlrDocid(&right) ){
1489 rc = dlwCopy(&writer, &left); 1379 dlwCopy(&writer, &left);
1490 if( rc!=SQLITE_OK ) break; 1380 dlrStep(&left);
1491 rc = dlrStep(&left);
1492 if( rc!=SQLITE_OK ) break;
1493 }else if( dlrDocid(&left)>dlrDocid(&right) ){ 1381 }else if( dlrDocid(&left)>dlrDocid(&right) ){
1494 rc = dlwCopy(&writer, &right); 1382 dlwCopy(&writer, &right);
1495 if( rc!=SQLITE_OK ) break; 1383 dlrStep(&right);
1496 rc = dlrStep(&right);
1497 if( rc!=SQLITE_OK ) break;
1498 }else{ 1384 }else{
1499 rc = posListUnion(&left, &right, &writer); 1385 posListUnion(&left, &right, &writer);
1500 if( rc!=SQLITE_OK ) break; 1386 dlrStep(&left);
1501 rc = dlrStep(&left); 1387 dlrStep(&right);
1502 if( rc!=SQLITE_OK ) break;
1503 rc = dlrStep(&right);
1504 if( rc!=SQLITE_OK ) break;
1505 } 1388 }
1506 } 1389 }
1507 1390
1508 dlrDestroy(&left); 1391 dlrDestroy(&left);
1509 dlrDestroy(&right); 1392 dlrDestroy(&right);
1510 dlwDestroy(&writer); 1393 dlwDestroy(&writer);
1511 return rc;
1512 } 1394 }
1513 1395
1514 /* pLeft and pRight are DLReaders positioned to the same docid. 1396 /* pLeft and pRight are DLReaders positioned to the same docid.
1515 ** 1397 **
1516 ** If there are no instances in pLeft or pRight where the position 1398 ** If there are no instances in pLeft or pRight where the position
1517 ** of pLeft is one less than the position of pRight, then this 1399 ** of pLeft is one less than the position of pRight, then this
1518 ** routine adds nothing to pOut. 1400 ** routine adds nothing to pOut.
1519 ** 1401 **
1520 ** If there are one or more instances where positions from pLeft 1402 ** If there are one or more instances where positions from pLeft
1521 ** are exactly one less than positions from pRight, then add a new 1403 ** are exactly one less than positions from pRight, then add a new
1522 ** document record to pOut. If pOut wants to hold positions, then 1404 ** document record to pOut. If pOut wants to hold positions, then
1523 ** include the positions from pRight that are one more than a 1405 ** include the positions from pRight that are one more than a
1524 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. 1406 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1.
1525 */ 1407 */
1526 static int posListPhraseMerge(DLReader *pLeft, DLReader *pRight, 1408 static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight,
1527 DLWriter *pOut){ 1409 DLWriter *pOut){
1528 PLReader left, right; 1410 PLReader left, right;
1529 PLWriter writer; 1411 PLWriter writer;
1530 int match = 0; 1412 int match = 0;
1531 int rc;
1532 1413
1533 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); 1414 assert( dlrDocid(pLeft)==dlrDocid(pRight) );
1534 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); 1415 assert( pOut->iType!=DL_POSITIONS_OFFSETS );
1535 1416
1536 rc = plrInit(&left, pLeft); 1417 plrInit(&left, pLeft);
1537 if( rc!=SQLITE_OK ) return rc; 1418 plrInit(&right, pRight);
1538 rc = plrInit(&right, pRight);
1539 if( rc!=SQLITE_OK ){
1540 plrDestroy(&left);
1541 return rc;
1542 }
1543 1419
1544 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ 1420 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
1545 if( plrColumn(&left)<plrColumn(&right) ){ 1421 if( plrColumn(&left)<plrColumn(&right) ){
1546 rc = plrStep(&left); 1422 plrStep(&left);
1547 if( rc!=SQLITE_OK ) break;
1548 }else if( plrColumn(&left)>plrColumn(&right) ){ 1423 }else if( plrColumn(&left)>plrColumn(&right) ){
1549 rc = plrStep(&right); 1424 plrStep(&right);
1550 if( rc!=SQLITE_OK ) break;
1551 }else if( plrPosition(&left)+1<plrPosition(&right) ){ 1425 }else if( plrPosition(&left)+1<plrPosition(&right) ){
1552 rc = plrStep(&left); 1426 plrStep(&left);
1553 if( rc!=SQLITE_OK ) break;
1554 }else if( plrPosition(&left)+1>plrPosition(&right) ){ 1427 }else if( plrPosition(&left)+1>plrPosition(&right) ){
1555 rc = plrStep(&right); 1428 plrStep(&right);
1556 if( rc!=SQLITE_OK ) break;
1557 }else{ 1429 }else{
1558 if( !match ){ 1430 if( !match ){
1559 plwInit(&writer, pOut, dlrDocid(pLeft)); 1431 plwInit(&writer, pOut, dlrDocid(pLeft));
1560 match = 1; 1432 match = 1;
1561 } 1433 }
1562 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); 1434 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
1563 rc = plrStep(&left); 1435 plrStep(&left);
1564 if( rc!=SQLITE_OK ) break; 1436 plrStep(&right);
1565 rc = plrStep(&right);
1566 if( rc!=SQLITE_OK ) break;
1567 } 1437 }
1568 } 1438 }
1569 1439
1570 if( match ){ 1440 if( match ){
1571 plwTerminate(&writer); 1441 plwTerminate(&writer);
1572 plwDestroy(&writer); 1442 plwDestroy(&writer);
1573 } 1443 }
1574 1444
1575 plrDestroy(&left); 1445 plrDestroy(&left);
1576 plrDestroy(&right); 1446 plrDestroy(&right);
1577 return rc;
1578 } 1447 }
1579 1448
1580 /* We have two doclists with positions: pLeft and pRight. 1449 /* We have two doclists with positions: pLeft and pRight.
1581 ** Write the phrase intersection of these two doclists into pOut. 1450 ** Write the phrase intersection of these two doclists into pOut.
1582 ** 1451 **
1583 ** A phrase intersection means that two documents only match 1452 ** A phrase intersection means that two documents only match
1584 ** if pLeft.iPos+1==pRight.iPos. 1453 ** if pLeft.iPos+1==pRight.iPos.
1585 ** 1454 **
1586 ** iType controls the type of data written to pOut. If iType is 1455 ** iType controls the type of data written to pOut. If iType is
1587 ** DL_POSITIONS, the positions are those from pRight. 1456 ** DL_POSITIONS, the positions are those from pRight.
1588 */ 1457 */
1589 static int docListPhraseMerge( 1458 static void docListPhraseMerge(
1590 const char *pLeft, int nLeft, 1459 const char *pLeft, int nLeft,
1591 const char *pRight, int nRight, 1460 const char *pRight, int nRight,
1592 DocListType iType, 1461 DocListType iType,
1593 DataBuffer *pOut /* Write the combined doclist here */ 1462 DataBuffer *pOut /* Write the combined doclist here */
1594 ){ 1463 ){
1595 DLReader left, right; 1464 DLReader left, right;
1596 DLWriter writer; 1465 DLWriter writer;
1597 int rc;
1598 1466
1599 if( nLeft==0 || nRight==0 ) return SQLITE_OK; 1467 if( nLeft==0 || nRight==0 ) return;
1600 1468
1601 assert( iType!=DL_POSITIONS_OFFSETS ); 1469 assert( iType!=DL_POSITIONS_OFFSETS );
1602 1470
1603 rc = dlrInit(&left, DL_POSITIONS, pLeft, nLeft); 1471 dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
1604 if( rc!=SQLITE_OK ) return rc; 1472 dlrInit(&right, DL_POSITIONS, pRight, nRight);
1605 rc = dlrInit(&right, DL_POSITIONS, pRight, nRight);
1606 if( rc!=SQLITE_OK ){
1607 dlrDestroy(&left);
1608 return rc;
1609 }
1610 dlwInit(&writer, iType, pOut); 1473 dlwInit(&writer, iType, pOut);
1611 1474
1612 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ 1475 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
1613 if( dlrDocid(&left)<dlrDocid(&right) ){ 1476 if( dlrDocid(&left)<dlrDocid(&right) ){
1614 rc = dlrStep(&left); 1477 dlrStep(&left);
1615 if( rc!=SQLITE_OK ) break;
1616 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 1478 }else if( dlrDocid(&right)<dlrDocid(&left) ){
1617 rc = dlrStep(&right); 1479 dlrStep(&right);
1618 if( rc!=SQLITE_OK ) break;
1619 }else{ 1480 }else{
1620 rc = posListPhraseMerge(&left, &right, &writer); 1481 posListPhraseMerge(&left, &right, &writer);
1621 if( rc!=SQLITE_OK ) break; 1482 dlrStep(&left);
1622 rc = dlrStep(&left); 1483 dlrStep(&right);
1623 if( rc!=SQLITE_OK ) break;
1624 rc = dlrStep(&right);
1625 if( rc!=SQLITE_OK ) break;
1626 } 1484 }
1627 } 1485 }
1628 1486
1629 dlrDestroy(&left); 1487 dlrDestroy(&left);
1630 dlrDestroy(&right); 1488 dlrDestroy(&right);
1631 dlwDestroy(&writer); 1489 dlwDestroy(&writer);
1632 return rc;
1633 } 1490 }
1634 1491
1635 /* We have two DL_DOCIDS doclists: pLeft and pRight. 1492 /* We have two DL_DOCIDS doclists: pLeft and pRight.
1636 ** Write the intersection of these two doclists into pOut as a 1493 ** Write the intersection of these two doclists into pOut as a
1637 ** DL_DOCIDS doclist. 1494 ** DL_DOCIDS doclist.
1638 */ 1495 */
1639 static int docListAndMerge( 1496 static void docListAndMerge(
1640 const char *pLeft, int nLeft, 1497 const char *pLeft, int nLeft,
1641 const char *pRight, int nRight, 1498 const char *pRight, int nRight,
1642 DataBuffer *pOut /* Write the combined doclist here */ 1499 DataBuffer *pOut /* Write the combined doclist here */
1643 ){ 1500 ){
1644 DLReader left, right; 1501 DLReader left, right;
1645 DLWriter writer; 1502 DLWriter writer;
1646 int rc;
1647 1503
1648 if( nLeft==0 || nRight==0 ) return SQLITE_OK; 1504 if( nLeft==0 || nRight==0 ) return;
1649 1505
1650 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 1506 dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
1651 if( rc!=SQLITE_OK ) return rc; 1507 dlrInit(&right, DL_DOCIDS, pRight, nRight);
1652 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight);
1653 if( rc!=SQLITE_OK ){
1654 dlrDestroy(&left);
1655 return rc;
1656 }
1657 dlwInit(&writer, DL_DOCIDS, pOut); 1508 dlwInit(&writer, DL_DOCIDS, pOut);
1658 1509
1659 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ 1510 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
1660 if( dlrDocid(&left)<dlrDocid(&right) ){ 1511 if( dlrDocid(&left)<dlrDocid(&right) ){
1661 rc = dlrStep(&left); 1512 dlrStep(&left);
1662 if( rc!=SQLITE_OK ) break;
1663 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 1513 }else if( dlrDocid(&right)<dlrDocid(&left) ){
1664 rc = dlrStep(&right); 1514 dlrStep(&right);
1665 if( rc!=SQLITE_OK ) break;
1666 }else{ 1515 }else{
1667 dlwAdd(&writer, dlrDocid(&left)); 1516 dlwAdd(&writer, dlrDocid(&left));
1668 rc = dlrStep(&left); 1517 dlrStep(&left);
1669 if( rc!=SQLITE_OK ) break; 1518 dlrStep(&right);
1670 rc = dlrStep(&right);
1671 if( rc!=SQLITE_OK ) break;
1672 } 1519 }
1673 } 1520 }
1674 1521
1675 dlrDestroy(&left); 1522 dlrDestroy(&left);
1676 dlrDestroy(&right); 1523 dlrDestroy(&right);
1677 dlwDestroy(&writer); 1524 dlwDestroy(&writer);
1678 return rc;
1679 } 1525 }
1680 1526
1681 /* We have two DL_DOCIDS doclists: pLeft and pRight. 1527 /* We have two DL_DOCIDS doclists: pLeft and pRight.
1682 ** Write the union of these two doclists into pOut as a 1528 ** Write the union of these two doclists into pOut as a
1683 ** DL_DOCIDS doclist. 1529 ** DL_DOCIDS doclist.
1684 */ 1530 */
1685 static int docListOrMerge( 1531 static void docListOrMerge(
1686 const char *pLeft, int nLeft, 1532 const char *pLeft, int nLeft,
1687 const char *pRight, int nRight, 1533 const char *pRight, int nRight,
1688 DataBuffer *pOut /* Write the combined doclist here */ 1534 DataBuffer *pOut /* Write the combined doclist here */
1689 ){ 1535 ){
1690 DLReader left, right; 1536 DLReader left, right;
1691 DLWriter writer; 1537 DLWriter writer;
1692 int rc;
1693 1538
1694 if( nLeft==0 ){ 1539 if( nLeft==0 ){
1695 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); 1540 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight);
1696 return SQLITE_OK; 1541 return;
1697 } 1542 }
1698 if( nRight==0 ){ 1543 if( nRight==0 ){
1699 dataBufferAppend(pOut, pLeft, nLeft); 1544 dataBufferAppend(pOut, pLeft, nLeft);
1700 return SQLITE_OK; 1545 return;
1701 } 1546 }
1702 1547
1703 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 1548 dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
1704 if( rc!=SQLITE_OK ) return rc; 1549 dlrInit(&right, DL_DOCIDS, pRight, nRight);
1705 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight);
1706 if( rc!=SQLITE_OK ){
1707 dlrDestroy(&left);
1708 return rc;
1709 }
1710 dlwInit(&writer, DL_DOCIDS, pOut); 1550 dlwInit(&writer, DL_DOCIDS, pOut);
1711 1551
1712 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ 1552 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
1713 if( dlrAtEnd(&right) ){ 1553 if( dlrAtEnd(&right) ){
1714 dlwAdd(&writer, dlrDocid(&left)); 1554 dlwAdd(&writer, dlrDocid(&left));
1715 rc = dlrStep(&left); 1555 dlrStep(&left);
1716 if( rc!=SQLITE_OK ) break;
1717 }else if( dlrAtEnd(&left) ){ 1556 }else if( dlrAtEnd(&left) ){
1718 dlwAdd(&writer, dlrDocid(&right)); 1557 dlwAdd(&writer, dlrDocid(&right));
1719 rc = dlrStep(&right); 1558 dlrStep(&right);
1720 if( rc!=SQLITE_OK ) break;
1721 }else if( dlrDocid(&left)<dlrDocid(&right) ){ 1559 }else if( dlrDocid(&left)<dlrDocid(&right) ){
1722 dlwAdd(&writer, dlrDocid(&left)); 1560 dlwAdd(&writer, dlrDocid(&left));
1723 rc = dlrStep(&left); 1561 dlrStep(&left);
1724 if( rc!=SQLITE_OK ) break;
1725 }else if( dlrDocid(&right)<dlrDocid(&left) ){ 1562 }else if( dlrDocid(&right)<dlrDocid(&left) ){
1726 dlwAdd(&writer, dlrDocid(&right)); 1563 dlwAdd(&writer, dlrDocid(&right));
1727 rc = dlrStep(&right); 1564 dlrStep(&right);
1728 if( rc!=SQLITE_OK ) break;
1729 }else{ 1565 }else{
1730 dlwAdd(&writer, dlrDocid(&left)); 1566 dlwAdd(&writer, dlrDocid(&left));
1731 rc = dlrStep(&left); 1567 dlrStep(&left);
1732 if( rc!=SQLITE_OK ) break; 1568 dlrStep(&right);
1733 rc = dlrStep(&right);
1734 if( rc!=SQLITE_OK ) break;
1735 } 1569 }
1736 } 1570 }
1737 1571
1738 dlrDestroy(&left); 1572 dlrDestroy(&left);
1739 dlrDestroy(&right); 1573 dlrDestroy(&right);
1740 dlwDestroy(&writer); 1574 dlwDestroy(&writer);
1741 return rc;
1742 } 1575 }
1743 1576
1744 /* We have two DL_DOCIDS doclists: pLeft and pRight. 1577 /* We have two DL_DOCIDS doclists: pLeft and pRight.
1745 ** Write into pOut as DL_DOCIDS doclist containing all documents that 1578 ** Write into pOut as DL_DOCIDS doclist containing all documents that
1746 ** occur in pLeft but not in pRight. 1579 ** occur in pLeft but not in pRight.
1747 */ 1580 */
1748 static int docListExceptMerge( 1581 static void docListExceptMerge(
1749 const char *pLeft, int nLeft, 1582 const char *pLeft, int nLeft,
1750 const char *pRight, int nRight, 1583 const char *pRight, int nRight,
1751 DataBuffer *pOut /* Write the combined doclist here */ 1584 DataBuffer *pOut /* Write the combined doclist here */
1752 ){ 1585 ){
1753 DLReader left, right; 1586 DLReader left, right;
1754 DLWriter writer; 1587 DLWriter writer;
1755 int rc;
1756 1588
1757 if( nLeft==0 ) return SQLITE_OK; 1589 if( nLeft==0 ) return;
1758 if( nRight==0 ){ 1590 if( nRight==0 ){
1759 dataBufferAppend(pOut, pLeft, nLeft); 1591 dataBufferAppend(pOut, pLeft, nLeft);
1760 return SQLITE_OK; 1592 return;
1761 } 1593 }
1762 1594
1763 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); 1595 dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
1764 if( rc!=SQLITE_OK ) return rc; 1596 dlrInit(&right, DL_DOCIDS, pRight, nRight);
1765 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight);
1766 if( rc!=SQLITE_OK ){
1767 dlrDestroy(&left);
1768 return rc;
1769 }
1770 dlwInit(&writer, DL_DOCIDS, pOut); 1597 dlwInit(&writer, DL_DOCIDS, pOut);
1771 1598
1772 while( !dlrAtEnd(&left) ){ 1599 while( !dlrAtEnd(&left) ){
1773 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ 1600 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
1774 rc = dlrStep(&right); 1601 dlrStep(&right);
1775 if( rc!=SQLITE_OK ) goto err;
1776 } 1602 }
1777 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ 1603 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
1778 dlwAdd(&writer, dlrDocid(&left)); 1604 dlwAdd(&writer, dlrDocid(&left));
1779 } 1605 }
1780 rc = dlrStep(&left); 1606 dlrStep(&left);
1781 if( rc!=SQLITE_OK ) break;
1782 } 1607 }
1783 1608
1784 err:
1785 dlrDestroy(&left); 1609 dlrDestroy(&left);
1786 dlrDestroy(&right); 1610 dlrDestroy(&right);
1787 dlwDestroy(&writer); 1611 dlwDestroy(&writer);
1788 return rc;
1789 } 1612 }
1790 1613
1791 static char *string_dup_n(const char *s, int n){ 1614 static char *string_dup_n(const char *s, int n){
1792 char *str = sqlite3_malloc(n + 1); 1615 char *str = sqlite3_malloc(n + 1);
1793 memcpy(str, s, n); 1616 memcpy(str, s, n);
1794 str[n] = '\0'; 1617 str[n] = '\0';
1795 return str; 1618 return str;
1796 } 1619 }
1797 1620
1798 /* Duplicate a string; the caller must free() the returned string. 1621 /* Duplicate a string; the caller must free() the returned string.
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
1982 /* CONTENT_EXISTS */ "select rowid from %_content limit 1", 1805 /* CONTENT_EXISTS */ "select rowid from %_content limit 1",
1983 1806
1984 /* BLOCK_INSERT */ "insert into %_segments values (?)", 1807 /* BLOCK_INSERT */ "insert into %_segments values (?)",
1985 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", 1808 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?",
1986 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", 1809 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?",
1987 /* BLOCK_DELETE_ALL */ "delete from %_segments", 1810 /* BLOCK_DELETE_ALL */ "delete from %_segments",
1988 1811
1989 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", 1812 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
1990 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", 1813 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
1991 /* SEGDIR_SELECT_LEVEL */ 1814 /* SEGDIR_SELECT_LEVEL */
1992 "select start_block, leaves_end_block, root, idx from %_segdir " 1815 "select start_block, leaves_end_block, root from %_segdir "
1993 " where level = ? order by idx", 1816 " where level = ? order by idx",
1994 /* SEGDIR_SPAN */ 1817 /* SEGDIR_SPAN */
1995 "select min(start_block), max(end_block) from %_segdir " 1818 "select min(start_block), max(end_block) from %_segdir "
1996 " where level = ? and start_block <> 0", 1819 " where level = ? and start_block <> 0",
1997 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", 1820 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?",
1998 1821
1999 /* NOTE(shess): The first three results of the following two 1822 /* NOTE(shess): The first three results of the following two
2000 ** statements must match. 1823 ** statements must match.
2001 */ 1824 */
2002 /* SEGDIR_SELECT_SEGMENT */ 1825 /* SEGDIR_SELECT_SEGMENT */
(...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after
3581 } 3404 }
3582 } else { /* full-text query */ 3405 } else { /* full-text query */
3583 rc = sqlite3_reset(c->pStmt); 3406 rc = sqlite3_reset(c->pStmt);
3584 if( rc!=SQLITE_OK ) return rc; 3407 if( rc!=SQLITE_OK ) return rc;
3585 3408
3586 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ 3409 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){
3587 c->eof = 1; 3410 c->eof = 1;
3588 return SQLITE_OK; 3411 return SQLITE_OK;
3589 } 3412 }
3590 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); 3413 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader));
3591 if( rc!=SQLITE_OK ) return rc; 3414 dlrStep(&c->reader);
3592 rc = dlrStep(&c->reader);
3593 if( rc!=SQLITE_OK ) return rc; 3415 if( rc!=SQLITE_OK ) return rc;
3594 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ 3416 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
3595 rc = sqlite3_step(c->pStmt); 3417 rc = sqlite3_step(c->pStmt);
3596 if( rc==SQLITE_ROW ){ /* the case we expect */ 3418 if( rc==SQLITE_ROW ){ /* the case we expect */
3597 c->eof = 0; 3419 c->eof = 0;
3598 return SQLITE_OK; 3420 return SQLITE_OK;
3599 } 3421 }
3600 3422 /* an error occurred; abort */
3601 /* Corrupt if the index refers to missing document. */ 3423 return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
3602 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT;
3603
3604 return rc;
3605 } 3424 }
3606 } 3425 }
3607 3426
3608 3427
3609 /* TODO(shess) If we pushed LeafReader to the top of the file, or to 3428 /* TODO(shess) If we pushed LeafReader to the top of the file, or to
3610 ** another file, term_select() could be pushed above 3429 ** another file, term_select() could be pushed above
3611 ** docListOfTerm(). 3430 ** docListOfTerm().
3612 */ 3431 */
3613 static int termSelect(fulltext_vtab *v, int iColumn, 3432 static int termSelect(fulltext_vtab *v, int iColumn,
3614 const char *pTerm, int nTerm, int isPrefix, 3433 const char *pTerm, int nTerm, int isPrefix,
(...skipping 27 matching lines...) Expand all
3642 if( rc ) return rc; 3461 if( rc ) return rc;
3643 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ 3462 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
3644 dataBufferInit(&right, 0); 3463 dataBufferInit(&right, 0);
3645 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, 3464 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
3646 pQTerm[i].isPrefix, DL_POSITIONS, &right); 3465 pQTerm[i].isPrefix, DL_POSITIONS, &right);
3647 if( rc ){ 3466 if( rc ){
3648 dataBufferDestroy(&left); 3467 dataBufferDestroy(&left);
3649 return rc; 3468 return rc;
3650 } 3469 }
3651 dataBufferInit(&new, 0); 3470 dataBufferInit(&new, 0);
3652 rc = docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, 3471 docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
3653 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); 3472 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);
3654 dataBufferDestroy(&left); 3473 dataBufferDestroy(&left);
3655 dataBufferDestroy(&right); 3474 dataBufferDestroy(&right);
3656 if( rc!=SQLITE_OK ){
3657 dataBufferDestroy(&new);
3658 return rc;
3659 }
3660 left = new; 3475 left = new;
3661 } 3476 }
3662 *pResult = left; 3477 *pResult = left;
3663 return rc; 3478 return SQLITE_OK;
3664 } 3479 }
3665 3480
3666 /* Add a new term pTerm[0..nTerm-1] to the query *q. 3481 /* Add a new term pTerm[0..nTerm-1] to the query *q.
3667 */ 3482 */
3668 static void queryAdd(Query *q, const char *pTerm, int nTerm){ 3483 static void queryAdd(Query *q, const char *pTerm, int nTerm){
3669 QueryTerm *t; 3484 QueryTerm *t;
3670 ++q->nTerms; 3485 ++q->nTerms;
3671 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); 3486 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
3672 if( q->pTerms==0 ){ 3487 if( q->pTerms==0 ){
3673 q->nTerms = 0; 3488 q->nTerms = 0;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
3720 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ 3535 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */
3721 const char *pSegment, int nSegment, /* Query expression being parsed */ 3536 const char *pSegment, int nSegment, /* Query expression being parsed */
3722 int inPhrase, /* True if within "..." */ 3537 int inPhrase, /* True if within "..." */
3723 Query *pQuery /* Append results here */ 3538 Query *pQuery /* Append results here */
3724 ){ 3539 ){
3725 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; 3540 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
3726 sqlite3_tokenizer_cursor *pCursor; 3541 sqlite3_tokenizer_cursor *pCursor;
3727 int firstIndex = pQuery->nTerms; 3542 int firstIndex = pQuery->nTerms;
3728 int iCol; 3543 int iCol;
3729 int nTerm = 1; 3544 int nTerm = 1;
3730 int iEndLast = -1;
3731 3545
3732 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); 3546 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
3733 if( rc!=SQLITE_OK ) return rc; 3547 if( rc!=SQLITE_OK ) return rc;
3734 pCursor->pTokenizer = pTokenizer; 3548 pCursor->pTokenizer = pTokenizer;
3735 3549
3736 while( 1 ){ 3550 while( 1 ){
3737 const char *pToken; 3551 const char *pToken;
3738 int nToken, iBegin, iEnd, iPos; 3552 int nToken, iBegin, iEnd, iPos;
3739 3553
3740 rc = pModule->xNext(pCursor, 3554 rc = pModule->xNext(pCursor,
3741 &pToken, &nToken, 3555 &pToken, &nToken,
3742 &iBegin, &iEnd, &iPos); 3556 &iBegin, &iEnd, &iPos);
3743 if( rc!=SQLITE_OK ) break; 3557 if( rc!=SQLITE_OK ) break;
3744 if( !inPhrase && 3558 if( !inPhrase &&
3745 pSegment[iEnd]==':' && 3559 pSegment[iEnd]==':' &&
3746 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ 3560 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
3747 pQuery->nextColumn = iCol; 3561 pQuery->nextColumn = iCol;
3748 continue; 3562 continue;
3749 } 3563 }
3750 if( !inPhrase && pQuery->nTerms>0 && nToken==2 3564 if( !inPhrase && pQuery->nTerms>0 && nToken==2
3751 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ 3565 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
3752 pQuery->nextIsOr = 1; 3566 pQuery->nextIsOr = 1;
3753 continue; 3567 continue;
3754 } 3568 }
3755
3756 /*
3757 * The ICU tokenizer considers '*' a break character, so the code below
3758 * sets isPrefix correctly, but since that code doesn't eat the '*', the
3759 * ICU tokenizer returns it as the next token. So eat it here until a
3760 * better solution presents itself.
3761 */
3762 if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' &&
3763 iEndLast==iBegin){
3764 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
3765 continue;
3766 }
3767 iEndLast = iEnd;
3768
3769 queryAdd(pQuery, pToken, nToken); 3569 queryAdd(pQuery, pToken, nToken);
3770 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ 3570 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
3771 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; 3571 pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
3772 } 3572 }
3773 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ 3573 if( iEnd<nSegment && pSegment[iEnd]=='*' ){
3774 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; 3574 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
3775 } 3575 }
3776 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; 3576 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
3777 if( inPhrase ){ 3577 if( inPhrase ){
3778 nTerm++; 3578 nTerm++;
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
3898 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ 3698 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
3899 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); 3699 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or);
3900 iNext += aTerm[iNext].nPhrase + 1; 3700 iNext += aTerm[iNext].nPhrase + 1;
3901 if( rc ){ 3701 if( rc ){
3902 if( i!=nNot ) dataBufferDestroy(&left); 3702 if( i!=nNot ) dataBufferDestroy(&left);
3903 dataBufferDestroy(&right); 3703 dataBufferDestroy(&right);
3904 queryClear(pQuery); 3704 queryClear(pQuery);
3905 return rc; 3705 return rc;
3906 } 3706 }
3907 dataBufferInit(&new, 0); 3707 dataBufferInit(&new, 0);
3908 rc = docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); 3708 docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new);
3909 dataBufferDestroy(&right); 3709 dataBufferDestroy(&right);
3910 dataBufferDestroy(&or); 3710 dataBufferDestroy(&or);
3911 if( rc!=SQLITE_OK ){
3912 if( i!=nNot ) dataBufferDestroy(&left);
3913 queryClear(pQuery);
3914 dataBufferDestroy(&new);
3915 return rc;
3916 }
3917 right = new; 3711 right = new;
3918 } 3712 }
3919 if( i==nNot ){ /* first term processed. */ 3713 if( i==nNot ){ /* first term processed. */
3920 left = right; 3714 left = right;
3921 }else{ 3715 }else{
3922 dataBufferInit(&new, 0); 3716 dataBufferInit(&new, 0);
3923 rc = docListAndMerge(left.pData, left.nData, 3717 docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new);
3924 right.pData, right.nData, &new);
3925 dataBufferDestroy(&right); 3718 dataBufferDestroy(&right);
3926 dataBufferDestroy(&left); 3719 dataBufferDestroy(&left);
3927 if( rc!=SQLITE_OK ){
3928 queryClear(pQuery);
3929 dataBufferDestroy(&new);
3930 return rc;
3931 }
3932 left = new; 3720 left = new;
3933 } 3721 }
3934 } 3722 }
3935 3723
3936 if( nNot==pQuery->nTerms ){ 3724 if( nNot==pQuery->nTerms ){
3937 /* We do not yet know how to handle a query of only NOT terms */ 3725 /* We do not yet know how to handle a query of only NOT terms */
3938 return SQLITE_ERROR; 3726 return SQLITE_ERROR;
3939 } 3727 }
3940 3728
3941 /* Do the EXCEPT terms */ 3729 /* Do the EXCEPT terms */
3942 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){ 3730 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){
3943 if( !aTerm[i].isNot ) continue; 3731 if( !aTerm[i].isNot ) continue;
3944 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); 3732 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right);
3945 if( rc ){ 3733 if( rc ){
3946 queryClear(pQuery); 3734 queryClear(pQuery);
3947 dataBufferDestroy(&left); 3735 dataBufferDestroy(&left);
3948 return rc; 3736 return rc;
3949 } 3737 }
3950 dataBufferInit(&new, 0); 3738 dataBufferInit(&new, 0);
3951 rc = docListExceptMerge(left.pData, left.nData, 3739 docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new);
3952 right.pData, right.nData, &new);
3953 dataBufferDestroy(&right); 3740 dataBufferDestroy(&right);
3954 dataBufferDestroy(&left); 3741 dataBufferDestroy(&left);
3955 if( rc!=SQLITE_OK ){
3956 queryClear(pQuery);
3957 dataBufferDestroy(&new);
3958 return rc;
3959 }
3960 left = new; 3742 left = new;
3961 } 3743 }
3962 3744
3963 *pResult = left; 3745 *pResult = left;
3964 return rc; 3746 return rc;
3965 } 3747 }
3966 3748
3967 /* 3749 /*
3968 ** This is the xFilter interface for the virtual table. See 3750 ** This is the xFilter interface for the virtual table. See
3969 ** the virtual table xFilter method documentation for additional 3751 ** the virtual table xFilter method documentation for additional
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
4043 if( c->result.nData!=0 ){ 3825 if( c->result.nData!=0 ){
4044 /* This case happens if the same cursor is used repeatedly. */ 3826 /* This case happens if the same cursor is used repeatedly. */
4045 dlrDestroy(&c->reader); 3827 dlrDestroy(&c->reader);
4046 dataBufferReset(&c->result); 3828 dataBufferReset(&c->result);
4047 }else{ 3829 }else{
4048 dataBufferInit(&c->result, 0); 3830 dataBufferInit(&c->result, 0);
4049 } 3831 }
4050 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q ); 3832 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q );
4051 if( rc!=SQLITE_OK ) return rc; 3833 if( rc!=SQLITE_OK ) return rc;
4052 if( c->result.nData!=0 ){ 3834 if( c->result.nData!=0 ){
4053 rc = dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); 3835 dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
4054 if( rc!=SQLITE_OK ) return rc;
4055 } 3836 }
4056 break; 3837 break;
4057 } 3838 }
4058 } 3839 }
4059 3840
4060 return fulltextNext(pCursor); 3841 return fulltextNext(pCursor);
4061 } 3842 }
4062 3843
4063 /* This is the xEof method of the virtual table. The SQLite core 3844 /* This is the xEof method of the virtual table. The SQLite core
4064 ** calls this routine to find out if it has reached the end of 3845 ** calls this routine to find out if it has reached the end of
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after
4545 DataBuffer term; /* previous term, for decoding term delta. */ 4326 DataBuffer term; /* previous term, for decoding term delta. */
4546 4327
4547 sqlite_int64 iBlockid; 4328 sqlite_int64 iBlockid;
4548 } InteriorReader; 4329 } InteriorReader;
4549 4330
4550 static void interiorReaderDestroy(InteriorReader *pReader){ 4331 static void interiorReaderDestroy(InteriorReader *pReader){
4551 dataBufferDestroy(&pReader->term); 4332 dataBufferDestroy(&pReader->term);
4552 SCRAMBLE(pReader); 4333 SCRAMBLE(pReader);
4553 } 4334 }
4554 4335
4555 static int interiorReaderInit(const char *pData, int nData, 4336 /* TODO(shess) The assertions are great, but what if we're in NDEBUG
4556 InteriorReader *pReader){ 4337 ** and the blob is empty or otherwise contains suspect data?
4338 */
4339 static void interiorReaderInit(const char *pData, int nData,
4340 InteriorReader *pReader){
4557 int n, nTerm; 4341 int n, nTerm;
4558 4342
4559 /* These conditions are checked and met by the callers. */ 4343 /* Require at least the leading flag byte */
4560 assert( nData>0 ); 4344 assert( nData>0 );
4561 assert( pData[0]!='\0' ); 4345 assert( pData[0]!='\0' );
4562 4346
4563 CLEAR(pReader); 4347 CLEAR(pReader);
4564 4348
4565 /* Decode the base blockid, and set the cursor to the first term. */ 4349 /* Decode the base blockid, and set the cursor to the first term. */
4566 n = getVarintSafe(pData+1, &pReader->iBlockid, nData-1); 4350 n = getVarint(pData+1, &pReader->iBlockid);
4567 if( !n ) return SQLITE_CORRUPT_BKPT; 4351 assert( 1+n<=nData );
4568 pReader->pData = pData+1+n; 4352 pReader->pData = pData+1+n;
4569 pReader->nData = nData-(1+n); 4353 pReader->nData = nData-(1+n);
4570 4354
4571 /* A single-child interior node (such as when a leaf node was too 4355 /* A single-child interior node (such as when a leaf node was too
4572 ** large for the segment directory) won't have any terms. 4356 ** large for the segment directory) won't have any terms.
4573 ** Otherwise, decode the first term. 4357 ** Otherwise, decode the first term.
4574 */ 4358 */
4575 if( pReader->nData==0 ){ 4359 if( pReader->nData==0 ){
4576 dataBufferInit(&pReader->term, 0); 4360 dataBufferInit(&pReader->term, 0);
4577 }else{ 4361 }else{
4578 n = getVarint32Safe(pReader->pData, &nTerm, pReader->nData); 4362 n = getVarint32(pReader->pData, &nTerm);
4579 if( !n || nTerm<0 || nTerm>pReader->nData-n) return SQLITE_CORRUPT_BKPT;
4580 dataBufferInit(&pReader->term, nTerm); 4363 dataBufferInit(&pReader->term, nTerm);
4581 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); 4364 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm);
4365 assert( n+nTerm<=pReader->nData );
4582 pReader->pData += n+nTerm; 4366 pReader->pData += n+nTerm;
4583 pReader->nData -= n+nTerm; 4367 pReader->nData -= n+nTerm;
4584 } 4368 }
4585 return SQLITE_OK;
4586 } 4369 }
4587 4370
4588 static int interiorReaderAtEnd(InteriorReader *pReader){ 4371 static int interiorReaderAtEnd(InteriorReader *pReader){
4589 return pReader->term.nData<=0; 4372 return pReader->term.nData==0;
4590 } 4373 }
4591 4374
4592 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ 4375 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){
4593 return pReader->iBlockid; 4376 return pReader->iBlockid;
4594 } 4377 }
4595 4378
4596 static int interiorReaderTermBytes(InteriorReader *pReader){ 4379 static int interiorReaderTermBytes(InteriorReader *pReader){
4597 assert( !interiorReaderAtEnd(pReader) ); 4380 assert( !interiorReaderAtEnd(pReader) );
4598 return pReader->term.nData; 4381 return pReader->term.nData;
4599 } 4382 }
4600 static const char *interiorReaderTerm(InteriorReader *pReader){ 4383 static const char *interiorReaderTerm(InteriorReader *pReader){
4601 assert( !interiorReaderAtEnd(pReader) ); 4384 assert( !interiorReaderAtEnd(pReader) );
4602 return pReader->term.pData; 4385 return pReader->term.pData;
4603 } 4386 }
4604 4387
4605 /* Step forward to the next term in the node. */ 4388 /* Step forward to the next term in the node. */
4606 static int interiorReaderStep(InteriorReader *pReader){ 4389 static void interiorReaderStep(InteriorReader *pReader){
4607 assert( !interiorReaderAtEnd(pReader) ); 4390 assert( !interiorReaderAtEnd(pReader) );
4608 4391
4609 /* If the last term has been read, signal eof, else construct the 4392 /* If the last term has been read, signal eof, else construct the
4610 ** next term. 4393 ** next term.
4611 */ 4394 */
4612 if( pReader->nData==0 ){ 4395 if( pReader->nData==0 ){
4613 dataBufferReset(&pReader->term); 4396 dataBufferReset(&pReader->term);
4614 }else{ 4397 }else{
4615 int n, nPrefix, nSuffix; 4398 int n, nPrefix, nSuffix;
4616 4399
4617 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); 4400 n = getVarint32(pReader->pData, &nPrefix);
4618 if( !n ) return SQLITE_CORRUPT_BKPT; 4401 n += getVarint32(pReader->pData+n, &nSuffix);
4619 pReader->nData -= n;
4620 pReader->pData += n;
4621 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData);
4622 if( !n ) return SQLITE_CORRUPT_BKPT;
4623 pReader->nData -= n;
4624 pReader->pData += n;
4625 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT;
4626 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT;
4627 4402
4628 /* Truncate the current term and append suffix data. */ 4403 /* Truncate the current term and append suffix data. */
4629 pReader->term.nData = nPrefix; 4404 pReader->term.nData = nPrefix;
4630 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); 4405 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
4631 4406
4632 pReader->pData += nSuffix; 4407 assert( n+nSuffix<=pReader->nData );
4633 pReader->nData -= nSuffix; 4408 pReader->pData += n+nSuffix;
4409 pReader->nData -= n+nSuffix;
4634 } 4410 }
4635 pReader->iBlockid++; 4411 pReader->iBlockid++;
4636 return SQLITE_OK;
4637 } 4412 }
4638 4413
4639 /* Compare the current term to pTerm[nTerm], returning strcmp-style 4414 /* Compare the current term to pTerm[nTerm], returning strcmp-style
4640 ** results. If isPrefix, equality means equal through nTerm bytes. 4415 ** results. If isPrefix, equality means equal through nTerm bytes.
4641 */ 4416 */
4642 static int interiorReaderTermCmp(InteriorReader *pReader, 4417 static int interiorReaderTermCmp(InteriorReader *pReader,
4643 const char *pTerm, int nTerm, int isPrefix){ 4418 const char *pTerm, int nTerm, int isPrefix){
4644 const char *pReaderTerm = interiorReaderTerm(pReader); 4419 const char *pReaderTerm = interiorReaderTerm(pReader);
4645 int nReaderTerm = interiorReaderTermBytes(pReader); 4420 int nReaderTerm = interiorReaderTermBytes(pReader);
4646 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; 4421 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm;
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after
4998 4773
4999 /* Estimate the length of the merged doclist so we can leave space 4774 /* Estimate the length of the merged doclist so we can leave space
5000 ** to encode it. 4775 ** to encode it.
5001 */ 4776 */
5002 for(i=0, nData=0; i<nReaders; i++){ 4777 for(i=0, nData=0; i<nReaders; i++){
5003 nData += dlrAllDataBytes(&pReaders[i]); 4778 nData += dlrAllDataBytes(&pReaders[i]);
5004 } 4779 }
5005 n = putVarint(c, nData); 4780 n = putVarint(c, nData);
5006 dataBufferAppend(&pWriter->data, c, n); 4781 dataBufferAppend(&pWriter->data, c, n);
5007 4782
5008 rc = docListMerge(&pWriter->data, pReaders, nReaders); 4783 docListMerge(&pWriter->data, pReaders, nReaders);
5009 if( rc!= SQLITE_OK ) return rc;
5010 ASSERT_VALID_DOCLIST(DL_DEFAULT, 4784 ASSERT_VALID_DOCLIST(DL_DEFAULT,
5011 pWriter->data.pData+iDoclistData+n, 4785 pWriter->data.pData+iDoclistData+n,
5012 pWriter->data.nData-iDoclistData-n, NULL); 4786 pWriter->data.nData-iDoclistData-n, NULL);
5013 4787
5014 /* The actual amount of doclist data at this point could be smaller 4788 /* The actual amount of doclist data at this point could be smaller
5015 ** than the length we encoded. Additionally, the space required to 4789 ** than the length we encoded. Additionally, the space required to
5016 ** encode this length could be smaller. For small doclists, this is 4790 ** encode this length could be smaller. For small doclists, this is
5017 ** not a big deal, we can just use memmove() to adjust things. 4791 ** not a big deal, we can just use memmove() to adjust things.
5018 */ 4792 */
5019 nActualData = pWriter->data.nData-(iDoclistData+n); 4793 nActualData = pWriter->data.nData-(iDoclistData+n);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
5109 */ 4883 */
5110 /* TODO(shess) Revise writeZeroSegment() so that doclists are 4884 /* TODO(shess) Revise writeZeroSegment() so that doclists are
5111 ** constructed directly in pWriter->data. 4885 ** constructed directly in pWriter->data.
5112 */ 4886 */
5113 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, 4887 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter,
5114 const char *pTerm, int nTerm, 4888 const char *pTerm, int nTerm,
5115 const char *pData, int nData){ 4889 const char *pData, int nData){
5116 int rc; 4890 int rc;
5117 DLReader reader; 4891 DLReader reader;
5118 4892
5119 rc = dlrInit(&reader, DL_DEFAULT, pData, nData); 4893 dlrInit(&reader, DL_DEFAULT, pData, nData);
5120 if( rc!=SQLITE_OK ) return rc;
5121 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); 4894 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1);
5122 dlrDestroy(&reader); 4895 dlrDestroy(&reader);
5123 4896
5124 return rc; 4897 return rc;
5125 } 4898 }
5126 4899
5127 4900
5128 /****************************************************************/ 4901 /****************************************************************/
5129 /* LeafReader is used to iterate over an individual leaf node. */ 4902 /* LeafReader is used to iterate over an individual leaf node. */
5130 typedef struct LeafReader { 4903 typedef struct LeafReader {
(...skipping 24 matching lines...) Expand all
5155 /* Access the doclist data for the current term. */ 4928 /* Access the doclist data for the current term. */
5156 static int leafReaderDataBytes(LeafReader *pReader){ 4929 static int leafReaderDataBytes(LeafReader *pReader){
5157 int nData; 4930 int nData;
5158 assert( pReader->term.nData>0 ); 4931 assert( pReader->term.nData>0 );
5159 getVarint32(pReader->pData, &nData); 4932 getVarint32(pReader->pData, &nData);
5160 return nData; 4933 return nData;
5161 } 4934 }
5162 static const char *leafReaderData(LeafReader *pReader){ 4935 static const char *leafReaderData(LeafReader *pReader){
5163 int n, nData; 4936 int n, nData;
5164 assert( pReader->term.nData>0 ); 4937 assert( pReader->term.nData>0 );
5165 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); 4938 n = getVarint32(pReader->pData, &nData);
5166 if( !n || nData>pReader->nData-n ) return NULL;
5167 return pReader->pData+n; 4939 return pReader->pData+n;
5168 } 4940 }
5169 4941
5170 static int leafReaderInit(const char *pData, int nData, LeafReader *pReader){ 4942 static void leafReaderInit(const char *pData, int nData,
4943 LeafReader *pReader){
5171 int nTerm, n; 4944 int nTerm, n;
5172 4945
5173 /* All callers check this precondition. */
5174 assert( nData>0 ); 4946 assert( nData>0 );
5175 assert( pData[0]=='\0' ); 4947 assert( pData[0]=='\0' );
5176 4948
5177 CLEAR(pReader); 4949 CLEAR(pReader);
5178 4950
5179 /* Read the first term, skipping the header byte. */ 4951 /* Read the first term, skipping the header byte. */
5180 n = getVarint32Safe(pData+1, &nTerm, nData-1); 4952 n = getVarint32(pData+1, &nTerm);
5181 if( !n || nTerm<0 || nTerm>nData-1-n ) return SQLITE_CORRUPT_BKPT;
5182 dataBufferInit(&pReader->term, nTerm); 4953 dataBufferInit(&pReader->term, nTerm);
5183 dataBufferReplace(&pReader->term, pData+1+n, nTerm); 4954 dataBufferReplace(&pReader->term, pData+1+n, nTerm);
5184 4955
5185 /* Position after the first term. */ 4956 /* Position after the first term. */
4957 assert( 1+n+nTerm<nData );
5186 pReader->pData = pData+1+n+nTerm; 4958 pReader->pData = pData+1+n+nTerm;
5187 pReader->nData = nData-1-n-nTerm; 4959 pReader->nData = nData-1-n-nTerm;
5188 return SQLITE_OK;
5189 } 4960 }
5190 4961
5191 /* Step the reader forward to the next term. */ 4962 /* Step the reader forward to the next term. */
5192 static int leafReaderStep(LeafReader *pReader){ 4963 static void leafReaderStep(LeafReader *pReader){
5193 int n, nData, nPrefix, nSuffix; 4964 int n, nData, nPrefix, nSuffix;
5194 assert( !leafReaderAtEnd(pReader) ); 4965 assert( !leafReaderAtEnd(pReader) );
5195 4966
5196 /* Skip previous entry's data block. */ 4967 /* Skip previous entry's data block. */
5197 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); 4968 n = getVarint32(pReader->pData, &nData);
5198 if( !n || nData<0 || nData>pReader->nData-n ) return SQLITE_CORRUPT_BKPT; 4969 assert( n+nData<=pReader->nData );
5199 pReader->pData += n+nData; 4970 pReader->pData += n+nData;
5200 pReader->nData -= n+nData; 4971 pReader->nData -= n+nData;
5201 4972
5202 if( !leafReaderAtEnd(pReader) ){ 4973 if( !leafReaderAtEnd(pReader) ){
5203 /* Construct the new term using a prefix from the old term plus a 4974 /* Construct the new term using a prefix from the old term plus a
5204 ** suffix from the leaf data. 4975 ** suffix from the leaf data.
5205 */ 4976 */
5206 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); 4977 n = getVarint32(pReader->pData, &nPrefix);
5207 if( !n ) return SQLITE_CORRUPT_BKPT; 4978 n += getVarint32(pReader->pData+n, &nSuffix);
5208 pReader->nData -= n; 4979 assert( n+nSuffix<pReader->nData );
5209 pReader->pData += n;
5210 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData);
5211 if( !n ) return SQLITE_CORRUPT_BKPT;
5212 pReader->nData -= n;
5213 pReader->pData += n;
5214 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT;
5215 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT;
5216 pReader->term.nData = nPrefix; 4980 pReader->term.nData = nPrefix;
5217 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); 4981 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix);
5218 4982
5219 pReader->pData += nSuffix; 4983 pReader->pData += n+nSuffix;
5220 pReader->nData -= nSuffix; 4984 pReader->nData -= n+nSuffix;
5221 } 4985 }
5222 return SQLITE_OK;
5223 } 4986 }
5224 4987
5225 /* strcmp-style comparison of pReader's current term against pTerm. 4988 /* strcmp-style comparison of pReader's current term against pTerm.
5226 ** If isPrefix, equality means equal through nTerm bytes. 4989 ** If isPrefix, equality means equal through nTerm bytes.
5227 */ 4990 */
5228 static int leafReaderTermCmp(LeafReader *pReader, 4991 static int leafReaderTermCmp(LeafReader *pReader,
5229 const char *pTerm, int nTerm, int isPrefix){ 4992 const char *pTerm, int nTerm, int isPrefix){
5230 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; 4993 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm;
5231 if( n==0 ){ 4994 if( n==0 ){
5232 if( pReader->term.nData>0 ) return -1; 4995 if( pReader->term.nData>0 ) return -1;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
5305 } 5068 }
5306 leafReaderDestroy(&pReader->leafReader); 5069 leafReaderDestroy(&pReader->leafReader);
5307 dataBufferDestroy(&pReader->rootData); 5070 dataBufferDestroy(&pReader->rootData);
5308 SCRAMBLE(pReader); 5071 SCRAMBLE(pReader);
5309 } 5072 }
5310 5073
5311 /* Initialize pReader with the given root data (if iStartBlockid==0 5074 /* Initialize pReader with the given root data (if iStartBlockid==0
5312 ** the leaf data was entirely contained in the root), or from the 5075 ** the leaf data was entirely contained in the root), or from the
5313 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. 5076 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
5314 */ 5077 */
5315 /* TODO(shess): Figure out a means of indicating how many leaves are
5316 ** expected, for purposes of detecting corruption.
5317 */
5318 static int leavesReaderInit(fulltext_vtab *v, 5078 static int leavesReaderInit(fulltext_vtab *v,
5319 int idx, 5079 int idx,
5320 sqlite_int64 iStartBlockid, 5080 sqlite_int64 iStartBlockid,
5321 sqlite_int64 iEndBlockid, 5081 sqlite_int64 iEndBlockid,
5322 const char *pRootData, int nRootData, 5082 const char *pRootData, int nRootData,
5323 LeavesReader *pReader){ 5083 LeavesReader *pReader){
5324 CLEAR(pReader); 5084 CLEAR(pReader);
5325 pReader->idx = idx; 5085 pReader->idx = idx;
5326 5086
5327 dataBufferInit(&pReader->rootData, 0); 5087 dataBufferInit(&pReader->rootData, 0);
5328 if( iStartBlockid==0 ){ 5088 if( iStartBlockid==0 ){
5329 int rc;
5330 /* Corrupt if this can't be a leaf node. */
5331 if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){
5332 return SQLITE_CORRUPT_BKPT;
5333 }
5334 /* Entire leaf level fit in root data. */ 5089 /* Entire leaf level fit in root data. */
5335 dataBufferReplace(&pReader->rootData, pRootData, nRootData); 5090 dataBufferReplace(&pReader->rootData, pRootData, nRootData);
5336 rc = leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, 5091 leafReaderInit(pReader->rootData.pData, pReader->rootData.nData,
5337 &pReader->leafReader); 5092 &pReader->leafReader);
5338 if( rc!=SQLITE_OK ){
5339 dataBufferDestroy(&pReader->rootData);
5340 return rc;
5341 }
5342 }else{ 5093 }else{
5343 sqlite3_stmt *s; 5094 sqlite3_stmt *s;
5344 int rc = sql_get_leaf_statement(v, idx, &s); 5095 int rc = sql_get_leaf_statement(v, idx, &s);
5345 if( rc!=SQLITE_OK ) return rc; 5096 if( rc!=SQLITE_OK ) return rc;
5346 5097
5347 rc = sqlite3_bind_int64(s, 1, iStartBlockid); 5098 rc = sqlite3_bind_int64(s, 1, iStartBlockid);
5348 if( rc!=SQLITE_OK ) goto err; 5099 if( rc!=SQLITE_OK ) return rc;
5349 5100
5350 rc = sqlite3_bind_int64(s, 2, iEndBlockid); 5101 rc = sqlite3_bind_int64(s, 2, iEndBlockid);
5351 if( rc!=SQLITE_OK ) goto err; 5102 if( rc!=SQLITE_OK ) return rc;
5352 5103
5353 rc = sqlite3_step(s); 5104 rc = sqlite3_step(s);
5354
5355 /* Corrupt if interior node referenced missing leaf node. */
5356 if( rc==SQLITE_DONE ){ 5105 if( rc==SQLITE_DONE ){
5357 rc = SQLITE_CORRUPT_BKPT; 5106 pReader->eof = 1;
5358 goto err; 5107 return SQLITE_OK;
5359 } 5108 }
5360 5109 if( rc!=SQLITE_ROW ) return rc;
5361 if( rc!=SQLITE_ROW ) goto err;
5362 rc = SQLITE_OK;
5363
5364 /* Corrupt if leaf data isn't a blob. */
5365 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){
5366 rc = SQLITE_CORRUPT_BKPT;
5367 }else{
5368 const char *pLeafData = sqlite3_column_blob(s, 0);
5369 int nLeafData = sqlite3_column_bytes(s, 0);
5370
5371 /* Corrupt if this can't be a leaf node. */
5372 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){
5373 rc = SQLITE_CORRUPT_BKPT;
5374 }else{
5375 rc = leafReaderInit(pLeafData, nLeafData, &pReader->leafReader);
5376 }
5377 }
5378
5379 err:
5380 if( rc!=SQLITE_OK ){
5381 if( idx==-1 ){
5382 sqlite3_finalize(s);
5383 }else{
5384 sqlite3_reset(s);
5385 }
5386 return rc;
5387 }
5388 5110
5389 pReader->pStmt = s; 5111 pReader->pStmt = s;
5112 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
5113 sqlite3_column_bytes(pReader->pStmt, 0),
5114 &pReader->leafReader);
5390 } 5115 }
5391 return SQLITE_OK; 5116 return SQLITE_OK;
5392 } 5117 }
5393 5118
5394 /* Step the current leaf forward to the next term. If we reach the 5119 /* Step the current leaf forward to the next term. If we reach the
5395 ** end of the current leaf, step forward to the next leaf block. 5120 ** end of the current leaf, step forward to the next leaf block.
5396 */ 5121 */
5397 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ 5122 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){
5398 int rc;
5399 assert( !leavesReaderAtEnd(pReader) ); 5123 assert( !leavesReaderAtEnd(pReader) );
5400 rc = leafReaderStep(&pReader->leafReader); 5124 leafReaderStep(&pReader->leafReader);
5401 if( rc!=SQLITE_OK ) return rc;
5402 5125
5403 if( leafReaderAtEnd(&pReader->leafReader) ){ 5126 if( leafReaderAtEnd(&pReader->leafReader) ){
5127 int rc;
5404 if( pReader->rootData.pData ){ 5128 if( pReader->rootData.pData ){
5405 pReader->eof = 1; 5129 pReader->eof = 1;
5406 return SQLITE_OK; 5130 return SQLITE_OK;
5407 } 5131 }
5408 rc = sqlite3_step(pReader->pStmt); 5132 rc = sqlite3_step(pReader->pStmt);
5409 if( rc!=SQLITE_ROW ){ 5133 if( rc!=SQLITE_ROW ){
5410 pReader->eof = 1; 5134 pReader->eof = 1;
5411 return rc==SQLITE_DONE ? SQLITE_OK : rc; 5135 return rc==SQLITE_DONE ? SQLITE_OK : rc;
5412 } 5136 }
5413 5137 leafReaderDestroy(&pReader->leafReader);
5414 /* Corrupt if leaf data isn't a blob. */ 5138 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
5415 if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){ 5139 sqlite3_column_bytes(pReader->pStmt, 0),
5416 return SQLITE_CORRUPT_BKPT; 5140 &pReader->leafReader);
5417 }else{
5418 LeafReader tmp;
5419 const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0);
5420 int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0);
5421
5422 /* Corrupt if this can't be a leaf node. */
5423 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){
5424 return SQLITE_CORRUPT_BKPT;
5425 }
5426
5427 rc = leafReaderInit(pLeafData, nLeafData, &tmp);
5428 if( rc!=SQLITE_OK ) return rc;
5429 leafReaderDestroy(&pReader->leafReader);
5430 pReader->leafReader = tmp;
5431 }
5432 } 5141 }
5433 return SQLITE_OK; 5142 return SQLITE_OK;
5434 } 5143 }
5435 5144
5436 /* Order LeavesReaders by their term, ignoring idx. Readers at eof 5145 /* Order LeavesReaders by their term, ignoring idx. Readers at eof
5437 ** always sort to the end. 5146 ** always sort to the end.
5438 */ 5147 */
5439 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ 5148 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){
5440 if( leavesReaderAtEnd(lr1) ){ 5149 if( leavesReaderAtEnd(lr1) ){
5441 if( leavesReaderAtEnd(lr2) ) return 0; 5150 if( leavesReaderAtEnd(lr2) ) return 0;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
5482 5191
5483 rc = sqlite3_bind_int(s, 1, iLevel); 5192 rc = sqlite3_bind_int(s, 1, iLevel);
5484 if( rc!=SQLITE_OK ) return rc; 5193 if( rc!=SQLITE_OK ) return rc;
5485 5194
5486 i = 0; 5195 i = 0;
5487 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ 5196 while( (rc = sqlite3_step(s))==SQLITE_ROW ){
5488 sqlite_int64 iStart = sqlite3_column_int64(s, 0); 5197 sqlite_int64 iStart = sqlite3_column_int64(s, 0);
5489 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); 5198 sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
5490 const char *pRootData = sqlite3_column_blob(s, 2); 5199 const char *pRootData = sqlite3_column_blob(s, 2);
5491 int nRootData = sqlite3_column_bytes(s, 2); 5200 int nRootData = sqlite3_column_bytes(s, 2);
5492 sqlite_int64 iIndex = sqlite3_column_int64(s, 3);
5493 5201
5494 /* Corrupt if we get back different types than we stored. */ 5202 assert( i<MERGE_COUNT );
5495 /* Also corrupt if the index is not sequential starting at 0. */
5496 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
5497 sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
5498 sqlite3_column_type(s, 2)!=SQLITE_BLOB ||
5499 i!=iIndex ||
5500 i>=MERGE_COUNT ){
5501 rc = SQLITE_CORRUPT_BKPT;
5502 break;
5503 }
5504
5505 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, 5203 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData,
5506 &pReaders[i]); 5204 &pReaders[i]);
5507 if( rc!=SQLITE_OK ) break; 5205 if( rc!=SQLITE_OK ) break;
5508 5206
5509 i++; 5207 i++;
5510 } 5208 }
5511 if( rc!=SQLITE_DONE ){ 5209 if( rc!=SQLITE_DONE ){
5512 while( i-->0 ){ 5210 while( i-->0 ){
5513 leavesReaderDestroy(&pReaders[i]); 5211 leavesReaderDestroy(&pReaders[i]);
5514 } 5212 }
5515 sqlite3_reset(s); /* So we don't leave a lock. */
5516 return rc; 5213 return rc;
5517 } 5214 }
5518 5215
5519 *piReaders = i; 5216 *piReaders = i;
5520 5217
5521 /* Leave our results sorted by term, then age. */ 5218 /* Leave our results sorted by term, then age. */
5522 while( i-- ){ 5219 while( i-- ){
5523 leavesReaderReorder(pReaders+i, *piReaders-i); 5220 leavesReaderReorder(pReaders+i, *piReaders-i);
5524 } 5221 }
5525 return SQLITE_OK; 5222 return SQLITE_OK;
5526 } 5223 }
5527 5224
5528 /* Merge doclists from pReaders[nReaders] into a single doclist, which 5225 /* Merge doclists from pReaders[nReaders] into a single doclist, which
5529 ** is written to pWriter. Assumes pReaders is ordered oldest to 5226 ** is written to pWriter. Assumes pReaders is ordered oldest to
5530 ** newest. 5227 ** newest.
5531 */ 5228 */
5532 /* TODO(shess) Consider putting this inline in segmentMerge(). */ 5229 /* TODO(shess) Consider putting this inline in segmentMerge(). */
5533 static int leavesReadersMerge(fulltext_vtab *v, 5230 static int leavesReadersMerge(fulltext_vtab *v,
5534 LeavesReader *pReaders, int nReaders, 5231 LeavesReader *pReaders, int nReaders,
5535 LeafWriter *pWriter){ 5232 LeafWriter *pWriter){
5536 DLReader dlReaders[MERGE_COUNT]; 5233 DLReader dlReaders[MERGE_COUNT];
5537 const char *pTerm = leavesReaderTerm(pReaders); 5234 const char *pTerm = leavesReaderTerm(pReaders);
5538 int i, nTerm = leavesReaderTermBytes(pReaders); 5235 int i, nTerm = leavesReaderTermBytes(pReaders);
5539 int rc;
5540 5236
5541 assert( nReaders<=MERGE_COUNT ); 5237 assert( nReaders<=MERGE_COUNT );
5542 5238
5543 for(i=0; i<nReaders; i++){ 5239 for(i=0; i<nReaders; i++){
5544 const char *pData = leavesReaderData(pReaders+i); 5240 dlrInit(&dlReaders[i], DL_DEFAULT,
5545 if( pData==NULL ){ 5241 leavesReaderData(pReaders+i),
5546 rc = SQLITE_CORRUPT_BKPT; 5242 leavesReaderDataBytes(pReaders+i));
5547 break;
5548 }
5549 rc = dlrInit(&dlReaders[i], DL_DEFAULT,
5550 pData,
5551 leavesReaderDataBytes(pReaders+i));
5552 if( rc!=SQLITE_OK ) break;
5553 }
5554 if( rc!=SQLITE_OK ){
5555 while( i-->0 ){
5556 dlrDestroy(&dlReaders[i]);
5557 }
5558 return rc;
5559 } 5243 }
5560 5244
5561 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); 5245 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders);
5562 } 5246 }
5563 5247
5564 /* Forward ref due to mutual recursion with segdirNextIndex(). */ 5248 /* Forward ref due to mutual recursion with segdirNextIndex(). */
5565 static int segmentMerge(fulltext_vtab *v, int iLevel); 5249 static int segmentMerge(fulltext_vtab *v, int iLevel);
5566 5250
5567 /* Put the next available index at iLevel into *pidx. If iLevel 5251 /* Put the next available index at iLevel into *pidx. If iLevel
5568 ** already has MERGE_COUNT segments, they are merged to a higher 5252 ** already has MERGE_COUNT segments, they are merged to a higher
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
5602 if( rc!=SQLITE_OK ) return rc; 5286 if( rc!=SQLITE_OK ) return rc;
5603 5287
5604 /* TODO(shess) This assumes that we'll always see exactly 5288 /* TODO(shess) This assumes that we'll always see exactly
5605 ** MERGE_COUNT segments to merge at a given level. That will be 5289 ** MERGE_COUNT segments to merge at a given level. That will be
5606 ** broken if we allow the developer to request preemptive or 5290 ** broken if we allow the developer to request preemptive or
5607 ** deferred merging. 5291 ** deferred merging.
5608 */ 5292 */
5609 memset(&lrs, '\0', sizeof(lrs)); 5293 memset(&lrs, '\0', sizeof(lrs));
5610 rc = leavesReadersInit(v, iLevel, lrs, &i); 5294 rc = leavesReadersInit(v, iLevel, lrs, &i);
5611 if( rc!=SQLITE_OK ) return rc; 5295 if( rc!=SQLITE_OK ) return rc;
5296 assert( i==MERGE_COUNT );
5612 5297
5613 leafWriterInit(iLevel+1, idx, &writer); 5298 leafWriterInit(iLevel+1, idx, &writer);
5614 5299
5615 if( i!=MERGE_COUNT ){
5616 rc = SQLITE_CORRUPT_BKPT;
5617 goto err;
5618 }
5619
5620 /* Since leavesReaderReorder() pushes readers at eof to the end, 5300 /* Since leavesReaderReorder() pushes readers at eof to the end,
5621 ** when the first reader is empty, all will be empty. 5301 ** when the first reader is empty, all will be empty.
5622 */ 5302 */
5623 while( !leavesReaderAtEnd(lrs) ){ 5303 while( !leavesReaderAtEnd(lrs) ){
5624 /* Figure out how many readers share their next term. */ 5304 /* Figure out how many readers share their next term. */
5625 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ 5305 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){
5626 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; 5306 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break;
5627 } 5307 }
5628 5308
5629 rc = leavesReadersMerge(v, lrs, i, &writer); 5309 rc = leavesReadersMerge(v, lrs, i, &writer);
(...skipping 22 matching lines...) Expand all
5652 5332
5653 err: 5333 err:
5654 for(i=0; i<MERGE_COUNT; i++){ 5334 for(i=0; i<MERGE_COUNT; i++){
5655 leavesReaderDestroy(&lrs[i]); 5335 leavesReaderDestroy(&lrs[i]);
5656 } 5336 }
5657 leafWriterDestroy(&writer); 5337 leafWriterDestroy(&writer);
5658 return rc; 5338 return rc;
5659 } 5339 }
5660 5340
5661 /* Accumulate the union of *acc and *pData into *acc. */ 5341 /* Accumulate the union of *acc and *pData into *acc. */
5662 static int docListAccumulateUnion(DataBuffer *acc, 5342 static void docListAccumulateUnion(DataBuffer *acc,
5663 const char *pData, int nData) { 5343 const char *pData, int nData) {
5664 DataBuffer tmp = *acc; 5344 DataBuffer tmp = *acc;
5665 int rc;
5666 dataBufferInit(acc, tmp.nData+nData); 5345 dataBufferInit(acc, tmp.nData+nData);
5667 rc = docListUnion(tmp.pData, tmp.nData, pData, nData, acc); 5346 docListUnion(tmp.pData, tmp.nData, pData, nData, acc);
5668 dataBufferDestroy(&tmp); 5347 dataBufferDestroy(&tmp);
5669 return rc;
5670 } 5348 }
5671 5349
5672 /* TODO(shess) It might be interesting to explore different merge 5350 /* TODO(shess) It might be interesting to explore different merge
5673 ** strategies, here. For instance, since this is a sorted merge, we 5351 ** strategies, here. For instance, since this is a sorted merge, we
5674 ** could easily merge many doclists in parallel. With some 5352 ** could easily merge many doclists in parallel. With some
5675 ** comprehension of the storage format, we could merge all of the 5353 ** comprehension of the storage format, we could merge all of the
5676 ** doclists within a leaf node directly from the leaf node's storage. 5354 ** doclists within a leaf node directly from the leaf node's storage.
5677 ** It may be worthwhile to merge smaller doclists before larger 5355 ** It may be worthwhile to merge smaller doclists before larger
5678 ** doclists, since they can be traversed more quickly - but the 5356 ** doclists, since they can be traversed more quickly - but the
5679 ** results may have less overlap, making them more expensive in a 5357 ** results may have less overlap, making them more expensive in a
(...skipping 21 matching lines...) Expand all
5701 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); 5379 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader);
5702 rc=leavesReaderStep(v, pReader)){ 5380 rc=leavesReaderStep(v, pReader)){
5703 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is 5381 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is
5704 ** already taken to compare the terms of two LeavesReaders. Think 5382 ** already taken to compare the terms of two LeavesReaders. Think
5705 ** on a better name. [Meanwhile, break encapsulation rather than 5383 ** on a better name. [Meanwhile, break encapsulation rather than
5706 ** use a confusing name.] 5384 ** use a confusing name.]
5707 */ 5385 */
5708 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); 5386 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix);
5709 if( c>0 ) break; /* Past any possible matches. */ 5387 if( c>0 ) break; /* Past any possible matches. */
5710 if( c==0 ){ 5388 if( c==0 ){
5711 int iBuffer, nData;
5712 const char *pData = leavesReaderData(pReader); 5389 const char *pData = leavesReaderData(pReader);
5713 if( pData==NULL ){ 5390 int iBuffer, nData = leavesReaderDataBytes(pReader);
5714 rc = SQLITE_CORRUPT_BKPT;
5715 break;
5716 }
5717 nData = leavesReaderDataBytes(pReader);
5718 5391
5719 /* Find the first empty buffer. */ 5392 /* Find the first empty buffer. */
5720 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ 5393 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
5721 if( 0==pBuffers[iBuffer].nData ) break; 5394 if( 0==pBuffers[iBuffer].nData ) break;
5722 } 5395 }
5723 5396
5724 /* Out of buffers, add an empty one. */ 5397 /* Out of buffers, add an empty one. */
5725 if( iBuffer==nBuffers ){ 5398 if( iBuffer==nBuffers ){
5726 if( nBuffers==nMaxBuffers ){ 5399 if( nBuffers==nMaxBuffers ){
5727 DataBuffer *p; 5400 DataBuffer *p;
(...skipping 25 matching lines...) Expand all
5753 dataBufferReplace(&(pBuffers[0]), pData, nData); 5426 dataBufferReplace(&(pBuffers[0]), pData, nData);
5754 }else{ 5427 }else{
5755 /* pAcc is the empty buffer the merged data will end up in. */ 5428 /* pAcc is the empty buffer the merged data will end up in. */
5756 DataBuffer *pAcc = &(pBuffers[iBuffer]); 5429 DataBuffer *pAcc = &(pBuffers[iBuffer]);
5757 DataBuffer *p = &(pBuffers[0]); 5430 DataBuffer *p = &(pBuffers[0]);
5758 5431
5759 /* Handle position 0 specially to avoid need to prime pAcc 5432 /* Handle position 0 specially to avoid need to prime pAcc
5760 ** with pData/nData. 5433 ** with pData/nData.
5761 */ 5434 */
5762 dataBufferSwap(p, pAcc); 5435 dataBufferSwap(p, pAcc);
5763 rc = docListAccumulateUnion(pAcc, pData, nData); 5436 docListAccumulateUnion(pAcc, pData, nData);
5764 if( rc!=SQLITE_OK ) goto err;
5765 5437
5766 /* Accumulate remaining doclists into pAcc. */ 5438 /* Accumulate remaining doclists into pAcc. */
5767 for(++p; p<pAcc; ++p){ 5439 for(++p; p<pAcc; ++p){
5768 rc = docListAccumulateUnion(pAcc, p->pData, p->nData); 5440 docListAccumulateUnion(pAcc, p->pData, p->nData);
5769 if( rc!=SQLITE_OK ) goto err;
5770 5441
5771 /* dataBufferReset() could allow a large doclist to blow up 5442 /* dataBufferReset() could allow a large doclist to blow up
5772 ** our memory requirements. 5443 ** our memory requirements.
5773 */ 5444 */
5774 if( p->nCapacity<1024 ){ 5445 if( p->nCapacity<1024 ){
5775 dataBufferReset(p); 5446 dataBufferReset(p);
5776 }else{ 5447 }else{
5777 dataBufferDestroy(p); 5448 dataBufferDestroy(p);
5778 dataBufferInit(p, 0); 5449 dataBufferInit(p, 0);
5779 } 5450 }
5780 } 5451 }
5781 } 5452 }
5782 } 5453 }
5783 } 5454 }
5784 5455
5785 /* Union all the doclists together into *out. */ 5456 /* Union all the doclists together into *out. */
5786 /* TODO(shess) What if *out is big? Sigh. */ 5457 /* TODO(shess) What if *out is big? Sigh. */
5787 if( rc==SQLITE_OK && nBuffers>0 ){ 5458 if( rc==SQLITE_OK && nBuffers>0 ){
5788 int iBuffer; 5459 int iBuffer;
5789 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ 5460 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){
5790 if( pBuffers[iBuffer].nData>0 ){ 5461 if( pBuffers[iBuffer].nData>0 ){
5791 if( out->nData==0 ){ 5462 if( out->nData==0 ){
5792 dataBufferSwap(out, &(pBuffers[iBuffer])); 5463 dataBufferSwap(out, &(pBuffers[iBuffer]));
5793 }else{ 5464 }else{
5794 rc = docListAccumulateUnion(out, pBuffers[iBuffer].pData, 5465 docListAccumulateUnion(out, pBuffers[iBuffer].pData,
5795 pBuffers[iBuffer].nData); 5466 pBuffers[iBuffer].nData);
5796 if( rc!=SQLITE_OK ) break;
5797 } 5467 }
5798 } 5468 }
5799 } 5469 }
5800 } 5470 }
5801 5471
5802 err:
5803 while( nBuffers-- ){ 5472 while( nBuffers-- ){
5804 dataBufferDestroy(&(pBuffers[nBuffers])); 5473 dataBufferDestroy(&(pBuffers[nBuffers]));
5805 } 5474 }
5806 if( pBuffers!=NULL ) sqlite3_free(pBuffers); 5475 if( pBuffers!=NULL ) sqlite3_free(pBuffers);
5807 5476
5808 return rc; 5477 return rc;
5809 } 5478 }
5810 5479
5811 /* Call loadSegmentLeavesInt() with pData/nData as input. */ 5480 /* Call loadSegmentLeavesInt() with pData/nData as input. */
5812 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, 5481 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
5851 ** nodes which could include pTerm/nTerm/isPrefix. Note that the 5520 ** nodes which could include pTerm/nTerm/isPrefix. Note that the
5852 ** interior node terms logically come between the blocks, so there is 5521 ** interior node terms logically come between the blocks, so there is
5853 ** one more blockid than there are terms (that block contains terms >= 5522 ** one more blockid than there are terms (that block contains terms >=
5854 ** the last interior-node term). 5523 ** the last interior-node term).
5855 */ 5524 */
5856 /* TODO(shess) The calling code may already know that the end child is 5525 /* TODO(shess) The calling code may already know that the end child is
5857 ** not worth calculating, because the end may be in a later sibling 5526 ** not worth calculating, because the end may be in a later sibling
5858 ** node. Consider whether breaking symmetry is worthwhile. I suspect 5527 ** node. Consider whether breaking symmetry is worthwhile. I suspect
5859 ** it is not worthwhile. 5528 ** it is not worthwhile.
5860 */ 5529 */
5861 static int getChildrenContaining(const char *pData, int nData, 5530 static void getChildrenContaining(const char *pData, int nData,
5862 const char *pTerm, int nTerm, int isPrefix, 5531 const char *pTerm, int nTerm, int isPrefix,
5863 sqlite_int64 *piStartChild, 5532 sqlite_int64 *piStartChild,
5864 sqlite_int64 *piEndChild){ 5533 sqlite_int64 *piEndChild){
5865 InteriorReader reader; 5534 InteriorReader reader;
5866 int rc;
5867 5535
5868 assert( nData>1 ); 5536 assert( nData>1 );
5869 assert( *pData!='\0' ); 5537 assert( *pData!='\0' );
5870 rc = interiorReaderInit(pData, nData, &reader); 5538 interiorReaderInit(pData, nData, &reader);
5871 if( rc!=SQLITE_OK ) return rc;
5872 5539
5873 /* Scan for the first child which could contain pTerm/nTerm. */ 5540 /* Scan for the first child which could contain pTerm/nTerm. */
5874 while( !interiorReaderAtEnd(&reader) ){ 5541 while( !interiorReaderAtEnd(&reader) ){
5875 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; 5542 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break;
5876 rc = interiorReaderStep(&reader); 5543 interiorReaderStep(&reader);
5877 if( rc!=SQLITE_OK ){
5878 interiorReaderDestroy(&reader);
5879 return rc;
5880 }
5881 } 5544 }
5882 *piStartChild = interiorReaderCurrentBlockid(&reader); 5545 *piStartChild = interiorReaderCurrentBlockid(&reader);
5883 5546
5884 /* Keep scanning to find a term greater than our term, using prefix 5547 /* Keep scanning to find a term greater than our term, using prefix
5885 ** comparison if indicated. If isPrefix is false, this will be the 5548 ** comparison if indicated. If isPrefix is false, this will be the
5886 ** same blockid as the starting block. 5549 ** same blockid as the starting block.
5887 */ 5550 */
5888 while( !interiorReaderAtEnd(&reader) ){ 5551 while( !interiorReaderAtEnd(&reader) ){
5889 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; 5552 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break;
5890 rc = interiorReaderStep(&reader); 5553 interiorReaderStep(&reader);
5891 if( rc!=SQLITE_OK ){
5892 interiorReaderDestroy(&reader);
5893 return rc;
5894 }
5895 } 5554 }
5896 *piEndChild = interiorReaderCurrentBlockid(&reader); 5555 *piEndChild = interiorReaderCurrentBlockid(&reader);
5897 5556
5898 interiorReaderDestroy(&reader); 5557 interiorReaderDestroy(&reader);
5899 5558
5900 /* Children must ascend, and if !prefix, both must be the same. */ 5559 /* Children must ascend, and if !prefix, both must be the same. */
5901 assert( *piEndChild>=*piStartChild ); 5560 assert( *piEndChild>=*piStartChild );
5902 assert( isPrefix || *piStartChild==*piEndChild ); 5561 assert( isPrefix || *piStartChild==*piEndChild );
5903 return rc;
5904 } 5562 }
5905 5563
5906 /* Read block at iBlockid and pass it with other params to 5564 /* Read block at iBlockid and pass it with other params to
5907 ** getChildrenContaining(). 5565 ** getChildrenContaining().
5908 */ 5566 */
5909 static int loadAndGetChildrenContaining( 5567 static int loadAndGetChildrenContaining(
5910 fulltext_vtab *v, 5568 fulltext_vtab *v,
5911 sqlite_int64 iBlockid, 5569 sqlite_int64 iBlockid,
5912 const char *pTerm, int nTerm, int isPrefix, 5570 const char *pTerm, int nTerm, int isPrefix,
5913 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild 5571 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild
5914 ){ 5572 ){
5915 sqlite3_stmt *s = NULL; 5573 sqlite3_stmt *s = NULL;
5916 int rc; 5574 int rc;
5917 5575
5918 assert( iBlockid!=0 ); 5576 assert( iBlockid!=0 );
5919 assert( pTerm!=NULL ); 5577 assert( pTerm!=NULL );
5920 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ 5578 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */
5921 assert( piStartChild!=NULL ); 5579 assert( piStartChild!=NULL );
5922 assert( piEndChild!=NULL ); 5580 assert( piEndChild!=NULL );
5923 5581
5924 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); 5582 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s);
5925 if( rc!=SQLITE_OK ) return rc; 5583 if( rc!=SQLITE_OK ) return rc;
5926 5584
5927 rc = sqlite3_bind_int64(s, 1, iBlockid); 5585 rc = sqlite3_bind_int64(s, 1, iBlockid);
5928 if( rc!=SQLITE_OK ) return rc; 5586 if( rc!=SQLITE_OK ) return rc;
5929 5587
5930 rc = sqlite3_step(s); 5588 rc = sqlite3_step(s);
5931 /* Corrupt if interior node references missing child node. */ 5589 if( rc==SQLITE_DONE ) return SQLITE_ERROR;
5932 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT;
5933 if( rc!=SQLITE_ROW ) return rc; 5590 if( rc!=SQLITE_ROW ) return rc;
5934 5591
5935 /* Corrupt if child node isn't a blob. */ 5592 getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
5936 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ 5593 pTerm, nTerm, isPrefix, piStartChild, piEndChild);
5937 sqlite3_reset(s); /* So we don't leave a lock. */
5938 return SQLITE_CORRUPT_BKPT;
5939 }else{
5940 const char *pData = sqlite3_column_blob(s, 0);
5941 int nData = sqlite3_column_bytes(s, 0);
5942
5943 /* Corrupt if child is not a valid interior node. */
5944 if( pData==NULL || nData<1 || pData[0]=='\0' ){
5945 sqlite3_reset(s); /* So we don't leave a lock. */
5946 return SQLITE_CORRUPT_BKPT;
5947 }
5948
5949 rc = getChildrenContaining(pData, nData, pTerm, nTerm,
5950 isPrefix, piStartChild, piEndChild);
5951 if( rc!=SQLITE_OK ){
5952 sqlite3_reset(s);
5953 return rc;
5954 }
5955 }
5956 5594
5957 /* We expect only one row. We must execute another sqlite3_step() 5595 /* We expect only one row. We must execute another sqlite3_step()
5958 * to complete the iteration; otherwise the table will remain 5596 * to complete the iteration; otherwise the table will remain
5959 * locked. */ 5597 * locked. */
5960 rc = sqlite3_step(s); 5598 rc = sqlite3_step(s);
5961 if( rc==SQLITE_ROW ) return SQLITE_ERROR; 5599 if( rc==SQLITE_ROW ) return SQLITE_ERROR;
5962 if( rc!=SQLITE_DONE ) return rc; 5600 if( rc!=SQLITE_DONE ) return rc;
5963 5601
5964 return SQLITE_OK; 5602 return SQLITE_OK;
5965 } 5603 }
5966 5604
5967 /* Traverse the tree represented by pData[nData] looking for 5605 /* Traverse the tree represented by pData[nData] looking for
5968 ** pTerm[nTerm], placing its doclist into *out. This is internal to 5606 ** pTerm[nTerm], placing its doclist into *out. This is internal to
5969 ** loadSegment() to make error-handling cleaner. 5607 ** loadSegment() to make error-handling cleaner.
5970 */ 5608 */
5971 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, 5609 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData,
5972 sqlite_int64 iLeavesEnd, 5610 sqlite_int64 iLeavesEnd,
5973 const char *pTerm, int nTerm, int isPrefix, 5611 const char *pTerm, int nTerm, int isPrefix,
5974 DataBuffer *out){ 5612 DataBuffer *out){
5975 /* Special case where root is a leaf. */ 5613 /* Special case where root is a leaf. */
5976 if( *pData=='\0' ){ 5614 if( *pData=='\0' ){
5977 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); 5615 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out);
5978 }else{ 5616 }else{
5979 int rc; 5617 int rc;
5980 sqlite_int64 iStartChild, iEndChild; 5618 sqlite_int64 iStartChild, iEndChild;
5981 5619
5982 /* Process pData as an interior node, then loop down the tree 5620 /* Process pData as an interior node, then loop down the tree
5983 ** until we find the set of leaf nodes to scan for the term. 5621 ** until we find the set of leaf nodes to scan for the term.
5984 */ 5622 */
5985 rc = getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, 5623 getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix,
5986 &iStartChild, &iEndChild); 5624 &iStartChild, &iEndChild);
5987 if( rc!=SQLITE_OK ) return rc;
5988 while( iStartChild>iLeavesEnd ){ 5625 while( iStartChild>iLeavesEnd ){
5989 sqlite_int64 iNextStart, iNextEnd; 5626 sqlite_int64 iNextStart, iNextEnd;
5990 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, 5627 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix,
5991 &iNextStart, &iNextEnd); 5628 &iNextStart, &iNextEnd);
5992 if( rc!=SQLITE_OK ) return rc; 5629 if( rc!=SQLITE_OK ) return rc;
5993 5630
5994 /* If we've branched, follow the end branch, too. */ 5631 /* If we've branched, follow the end branch, too. */
5995 if( iStartChild!=iEndChild ){ 5632 if( iStartChild!=iEndChild ){
5996 sqlite_int64 iDummy; 5633 sqlite_int64 iDummy;
5997 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, 5634 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix,
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
6029 ** within a segment, with pairwise merges across segments, or across 5666 ** within a segment, with pairwise merges across segments, or across
6030 ** all segments at once. 5667 ** all segments at once.
6031 */ 5668 */
6032 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, 5669 static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
6033 sqlite_int64 iLeavesEnd, 5670 sqlite_int64 iLeavesEnd,
6034 const char *pTerm, int nTerm, int isPrefix, 5671 const char *pTerm, int nTerm, int isPrefix,
6035 DataBuffer *out){ 5672 DataBuffer *out){
6036 DataBuffer result; 5673 DataBuffer result;
6037 int rc; 5674 int rc;
6038 5675
6039 /* Corrupt if segment root can't be valid. */ 5676 assert( nData>1 );
6040 if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT;
6041 5677
6042 /* This code should never be called with buffered updates. */ 5678 /* This code should never be called with buffered updates. */
6043 assert( v->nPendingData<0 ); 5679 assert( v->nPendingData<0 );
6044 5680
6045 dataBufferInit(&result, 0); 5681 dataBufferInit(&result, 0);
6046 rc = loadSegmentInt(v, pData, nData, iLeavesEnd, 5682 rc = loadSegmentInt(v, pData, nData, iLeavesEnd,
6047 pTerm, nTerm, isPrefix, &result); 5683 pTerm, nTerm, isPrefix, &result);
6048 if( rc==SQLITE_OK && result.nData>0 ){ 5684 if( rc==SQLITE_OK && result.nData>0 ){
6049 if( out->nData==0 ){ 5685 if( out->nData==0 ){
6050 DataBuffer tmp = *out; 5686 DataBuffer tmp = *out;
6051 *out = result; 5687 *out = result;
6052 result = tmp; 5688 result = tmp;
6053 }else{ 5689 }else{
6054 DataBuffer merged; 5690 DataBuffer merged;
6055 DLReader readers[2]; 5691 DLReader readers[2];
6056 5692
6057 rc = dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); 5693 dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData);
6058 if( rc==SQLITE_OK ){ 5694 dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData);
6059 rc = dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); 5695 dataBufferInit(&merged, out->nData+result.nData);
6060 if( rc==SQLITE_OK ){ 5696 docListMerge(&merged, readers, 2);
6061 dataBufferInit(&merged, out->nData+result.nData); 5697 dataBufferDestroy(out);
6062 rc = docListMerge(&merged, readers, 2); 5698 *out = merged;
6063 dataBufferDestroy(out); 5699 dlrDestroy(&readers[0]);
6064 *out = merged; 5700 dlrDestroy(&readers[1]);
6065 dlrDestroy(&readers[1]);
6066 }
6067 dlrDestroy(&readers[0]);
6068 }
6069 } 5701 }
6070 } 5702 }
6071
6072 dataBufferDestroy(&result); 5703 dataBufferDestroy(&result);
6073 return rc; 5704 return rc;
6074 } 5705 }
6075 5706
6076 /* Scan the database and merge together the posting lists for the term 5707 /* Scan the database and merge together the posting lists for the term
6077 ** into *out. 5708 ** into *out.
6078 */ 5709 */
6079 static int termSelect(fulltext_vtab *v, int iColumn, 5710 static int termSelect(fulltext_vtab *v, int iColumn,
6080 const char *pTerm, int nTerm, int isPrefix, 5711 const char *pTerm, int nTerm, int isPrefix,
6081 DocListType iType, DataBuffer *out){ 5712 DocListType iType, DataBuffer *out){
6082 DataBuffer doclist; 5713 DataBuffer doclist;
6083 sqlite3_stmt *s; 5714 sqlite3_stmt *s;
6084 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); 5715 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
6085 if( rc!=SQLITE_OK ) return rc; 5716 if( rc!=SQLITE_OK ) return rc;
6086 5717
6087 /* This code should never be called with buffered updates. */ 5718 /* This code should never be called with buffered updates. */
6088 assert( v->nPendingData<0 ); 5719 assert( v->nPendingData<0 );
6089 5720
6090 dataBufferInit(&doclist, 0); 5721 dataBufferInit(&doclist, 0);
6091 5722
6092 /* Traverse the segments from oldest to newest so that newer doclist 5723 /* Traverse the segments from oldest to newest so that newer doclist
6093 ** elements for given docids overwrite older elements. 5724 ** elements for given docids overwrite older elements.
6094 */ 5725 */
6095 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ 5726 while( (rc = sqlite3_step(s))==SQLITE_ROW ){
6096 const char *pData = sqlite3_column_blob(s, 2); 5727 const char *pData = sqlite3_column_blob(s, 2);
6097 const int nData = sqlite3_column_bytes(s, 2); 5728 const int nData = sqlite3_column_bytes(s, 2);
6098 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); 5729 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
6099
6100 /* Corrupt if we get back different types than we stored. */
6101 if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
6102 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
6103 rc = SQLITE_CORRUPT_BKPT;
6104 goto err;
6105 }
6106
6107 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, 5730 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
6108 &doclist); 5731 &doclist);
6109 if( rc!=SQLITE_OK ) goto err; 5732 if( rc!=SQLITE_OK ) goto err;
6110 } 5733 }
6111 if( rc==SQLITE_DONE ){ 5734 if( rc==SQLITE_DONE ){
6112 rc = SQLITE_OK;
6113 if( doclist.nData!=0 ){ 5735 if( doclist.nData!=0 ){
6114 /* TODO(shess) The old term_select_all() code applied the column 5736 /* TODO(shess) The old term_select_all() code applied the column
6115 ** restrict as we merged segments, leading to smaller buffers. 5737 ** restrict as we merged segments, leading to smaller buffers.
6116 ** This is probably worthwhile to bring back, once the new storage 5738 ** This is probably worthwhile to bring back, once the new storage
6117 ** system is checked in. 5739 ** system is checked in.
6118 */ 5740 */
6119 if( iColumn==v->nColumn) iColumn = -1; 5741 if( iColumn==v->nColumn) iColumn = -1;
6120 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 5742 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
6121 iColumn, iType, out); 5743 iColumn, iType, out);
6122 } 5744 }
5745 rc = SQLITE_OK;
6123 } 5746 }
6124 5747
6125 err: 5748 err:
6126 sqlite3_reset(s); /* So we don't leave a lock. */
6127 dataBufferDestroy(&doclist); 5749 dataBufferDestroy(&doclist);
6128 return rc; 5750 return rc;
6129 } 5751 }
6130 5752
6131 /****************************************************************/ 5753 /****************************************************************/
6132 /* Used to hold hashtable data for sorting. */ 5754 /* Used to hold hashtable data for sorting. */
6133 typedef struct TermData { 5755 typedef struct TermData {
6134 const char *pTerm; 5756 const char *pTerm;
6135 int nTerm; 5757 int nTerm;
6136 DLCollector *pCollector; 5758 DLCollector *pCollector;
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after
6458 /* optimize() helper function. Put the readers in order and iterate 6080 /* optimize() helper function. Put the readers in order and iterate
6459 ** through them, merging doclists for matching terms into pWriter. 6081 ** through them, merging doclists for matching terms into pWriter.
6460 ** Returns SQLITE_OK on success, or the SQLite error code which 6082 ** Returns SQLITE_OK on success, or the SQLite error code which
6461 ** prevented success. 6083 ** prevented success.
6462 */ 6084 */
6463 static int optimizeInternal(fulltext_vtab *v, 6085 static int optimizeInternal(fulltext_vtab *v,
6464 OptLeavesReader *readers, int nReaders, 6086 OptLeavesReader *readers, int nReaders,
6465 LeafWriter *pWriter){ 6087 LeafWriter *pWriter){
6466 int i, rc = SQLITE_OK; 6088 int i, rc = SQLITE_OK;
6467 DataBuffer doclist, merged, tmp; 6089 DataBuffer doclist, merged, tmp;
6468 const char *pData;
6469 6090
6470 /* Order the readers. */ 6091 /* Order the readers. */
6471 i = nReaders; 6092 i = nReaders;
6472 while( i-- > 0 ){ 6093 while( i-- > 0 ){
6473 optLeavesReaderReorder(&readers[i], nReaders-i); 6094 optLeavesReaderReorder(&readers[i], nReaders-i);
6474 } 6095 }
6475 6096
6476 dataBufferInit(&doclist, LEAF_MAX); 6097 dataBufferInit(&doclist, LEAF_MAX);
6477 dataBufferInit(&merged, LEAF_MAX); 6098 dataBufferInit(&merged, LEAF_MAX);
6478 6099
6479 /* Exhausted readers bubble to the end, so when the first reader is 6100 /* Exhausted readers bubble to the end, so when the first reader is
6480 ** at eof, all are at eof. 6101 ** at eof, all are at eof.
6481 */ 6102 */
6482 while( !optLeavesReaderAtEnd(&readers[0]) ){ 6103 while( !optLeavesReaderAtEnd(&readers[0]) ){
6483 6104
6484 /* Figure out how many readers share the next term. */ 6105 /* Figure out how many readers share the next term. */
6485 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ 6106 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){
6486 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; 6107 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break;
6487 } 6108 }
6488 6109
6489 pData = optLeavesReaderData(&readers[0]);
6490 if( pData==NULL ){
6491 rc = SQLITE_CORRUPT_BKPT;
6492 break;
6493 }
6494
6495 /* Special-case for no merge. */ 6110 /* Special-case for no merge. */
6496 if( i==1 ){ 6111 if( i==1 ){
6497 /* Trim deletions from the doclist. */ 6112 /* Trim deletions from the doclist. */
6498 dataBufferReset(&merged); 6113 dataBufferReset(&merged);
6499 rc = docListTrim(DL_DEFAULT, 6114 docListTrim(DL_DEFAULT,
6500 pData, 6115 optLeavesReaderData(&readers[0]),
6501 optLeavesReaderDataBytes(&readers[0]), 6116 optLeavesReaderDataBytes(&readers[0]),
6502 -1, DL_DEFAULT, &merged); 6117 -1, DL_DEFAULT, &merged);
6503 if( rc!= SQLITE_OK ) break;
6504 }else{ 6118 }else{
6505 DLReader dlReaders[MERGE_COUNT]; 6119 DLReader dlReaders[MERGE_COUNT];
6506 int iReader, nReaders; 6120 int iReader, nReaders;
6507 6121
6508 /* Prime the pipeline with the first reader's doclist. After 6122 /* Prime the pipeline with the first reader's doclist. After
6509 ** one pass index 0 will reference the accumulated doclist. 6123 ** one pass index 0 will reference the accumulated doclist.
6510 */ 6124 */
6511 rc = dlrInit(&dlReaders[0], DL_DEFAULT, 6125 dlrInit(&dlReaders[0], DL_DEFAULT,
6512 pData, 6126 optLeavesReaderData(&readers[0]),
6513 optLeavesReaderDataBytes(&readers[0])); 6127 optLeavesReaderDataBytes(&readers[0]));
6514 if( rc!=SQLITE_OK ) break;
6515 iReader = 1; 6128 iReader = 1;
6516 6129
6517 assert( iReader<i ); /* Must execute the loop at least once. */ 6130 assert( iReader<i ); /* Must execute the loop at least once. */
6518 while( iReader<i ){ 6131 while( iReader<i ){
6519 /* Merge 16 inputs per pass. */ 6132 /* Merge 16 inputs per pass. */
6520 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; 6133 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT;
6521 iReader++, nReaders++ ){ 6134 iReader++, nReaders++ ){
6522 pData = optLeavesReaderData(&readers[iReader]); 6135 dlrInit(&dlReaders[nReaders], DL_DEFAULT,
6523 if( pData == NULL ){ 6136 optLeavesReaderData(&readers[iReader]),
6524 rc = SQLITE_CORRUPT_BKPT; 6137 optLeavesReaderDataBytes(&readers[iReader]));
6525 break;
6526 }
6527 rc = dlrInit(&dlReaders[nReaders], DL_DEFAULT,
6528 pData,
6529 optLeavesReaderDataBytes(&readers[iReader]));
6530 if( rc != SQLITE_OK ) break;
6531 } 6138 }
6532 6139
6533 /* Merge doclists and swap result into accumulator. */ 6140 /* Merge doclists and swap result into accumulator. */
6534 if( rc==SQLITE_OK ){ 6141 dataBufferReset(&merged);
6535 dataBufferReset(&merged); 6142 docListMerge(&merged, dlReaders, nReaders);
6536 rc = docListMerge(&merged, dlReaders, nReaders); 6143 tmp = merged;
6537 tmp = merged; 6144 merged = doclist;
6538 merged = doclist; 6145 doclist = tmp;
6539 doclist = tmp;
6540 }
6541 6146
6542 while( nReaders-- > 0 ){ 6147 while( nReaders-- > 0 ){
6543 dlrDestroy(&dlReaders[nReaders]); 6148 dlrDestroy(&dlReaders[nReaders]);
6544 } 6149 }
6545 6150
6546 if( rc!=SQLITE_OK ) goto err;
6547
6548 /* Accumulated doclist to reader 0 for next pass. */ 6151 /* Accumulated doclist to reader 0 for next pass. */
6549 rc = dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); 6152 dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData);
6550 if( rc!=SQLITE_OK ) goto err;
6551 } 6153 }
6552 6154
6553 /* Destroy reader that was left in the pipeline. */ 6155 /* Destroy reader that was left in the pipeline. */
6554 dlrDestroy(&dlReaders[0]); 6156 dlrDestroy(&dlReaders[0]);
6555 6157
6556 /* Trim deletions from the doclist. */ 6158 /* Trim deletions from the doclist. */
6557 dataBufferReset(&merged); 6159 dataBufferReset(&merged);
6558 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, 6160 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData,
6559 -1, DL_DEFAULT, &merged); 6161 -1, DL_DEFAULT, &merged);
6560 if( rc!=SQLITE_OK ) goto err;
6561 } 6162 }
6562 6163
6563 /* Only pass doclists with hits (skip if all hits deleted). */ 6164 /* Only pass doclists with hits (skip if all hits deleted). */
6564 if( merged.nData>0 ){ 6165 if( merged.nData>0 ){
6565 rc = leafWriterStep(v, pWriter, 6166 rc = leafWriterStep(v, pWriter,
6566 optLeavesReaderTerm(&readers[0]), 6167 optLeavesReaderTerm(&readers[0]),
6567 optLeavesReaderTermBytes(&readers[0]), 6168 optLeavesReaderTermBytes(&readers[0]),
6568 merged.pData, merged.nData); 6169 merged.pData, merged.nData);
6569 if( rc!=SQLITE_OK ) goto err; 6170 if( rc!=SQLITE_OK ) goto err;
6570 } 6171 }
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
6630 */ 6231 */
6631 leafWriterInit(iMaxLevel, 0, &writer); 6232 leafWriterInit(iMaxLevel, 0, &writer);
6632 6233
6633 i = 0; 6234 i = 0;
6634 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ 6235 while( (rc = sqlite3_step(s))==SQLITE_ROW ){
6635 sqlite_int64 iStart = sqlite3_column_int64(s, 0); 6236 sqlite_int64 iStart = sqlite3_column_int64(s, 0);
6636 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); 6237 sqlite_int64 iEnd = sqlite3_column_int64(s, 1);
6637 const char *pRootData = sqlite3_column_blob(s, 2); 6238 const char *pRootData = sqlite3_column_blob(s, 2);
6638 int nRootData = sqlite3_column_bytes(s, 2); 6239 int nRootData = sqlite3_column_bytes(s, 2);
6639 6240
6640 /* Corrupt if we get back different types than we stored. */
6641 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
6642 sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
6643 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
6644 rc = SQLITE_CORRUPT_BKPT;
6645 break;
6646 }
6647
6648 assert( i<nReaders ); 6241 assert( i<nReaders );
6649 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, 6242 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
6650 &readers[i].reader); 6243 &readers[i].reader);
6651 if( rc!=SQLITE_OK ) break; 6244 if( rc!=SQLITE_OK ) break;
6652 6245
6653 readers[i].segment = i; 6246 readers[i].segment = i;
6654 i++; 6247 i++;
6655 } 6248 }
6656 6249
6657 /* If we managed to successfully read them all, optimize them. */ 6250 /* If we managed to successfully read them all, optimize them. */
6658 if( rc==SQLITE_DONE ){ 6251 if( rc==SQLITE_DONE ){
6659 assert( i==nReaders ); 6252 assert( i==nReaders );
6660 rc = optimizeInternal(v, readers, nReaders, &writer); 6253 rc = optimizeInternal(v, readers, nReaders, &writer);
6661 }else{
6662 sqlite3_reset(s); /* So we don't leave a lock. */
6663 } 6254 }
6664 6255
6665 while( i-- > 0 ){ 6256 while( i-- > 0 ){
6666 leavesReaderDestroy(&readers[i].reader); 6257 leavesReaderDestroy(&readers[i].reader);
6667 } 6258 }
6668 sqlite3_free(readers); 6259 sqlite3_free(readers);
6669 6260
6670 /* If we've successfully gotten to here, delete the old segments 6261 /* If we've successfully gotten to here, delete the old segments
6671 ** and flush the interior structure of the new segment. 6262 ** and flush the interior structure of the new segment.
6672 */ 6263 */
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
6716 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of 6307 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of
6717 ** pRootData if iStartBlockid is 0 (in which case the entire segment 6308 ** pRootData if iStartBlockid is 0 (in which case the entire segment
6718 ** fit in a leaf). 6309 ** fit in a leaf).
6719 */ 6310 */
6720 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, 6311 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s,
6721 fts2Hash *pTerms){ 6312 fts2Hash *pTerms){
6722 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); 6313 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0);
6723 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); 6314 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1);
6724 const char *pRootData = sqlite3_column_blob(s, 2); 6315 const char *pRootData = sqlite3_column_blob(s, 2);
6725 const int nRootData = sqlite3_column_bytes(s, 2); 6316 const int nRootData = sqlite3_column_bytes(s, 2);
6726 int rc;
6727 LeavesReader reader; 6317 LeavesReader reader;
6728 6318 int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
6729 /* Corrupt if we get back different types than we stored. */ 6319 pRootData, nRootData, &reader);
6730 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
6731 sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
6732 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
6733 return SQLITE_CORRUPT_BKPT;
6734 }
6735
6736 rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
6737 pRootData, nRootData, &reader);
6738 if( rc!=SQLITE_OK ) return rc; 6320 if( rc!=SQLITE_OK ) return rc;
6739 6321
6740 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ 6322 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){
6741 const char *pTerm = leavesReaderTerm(&reader); 6323 const char *pTerm = leavesReaderTerm(&reader);
6742 const int nTerm = leavesReaderTermBytes(&reader); 6324 const int nTerm = leavesReaderTermBytes(&reader);
6743 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); 6325 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm);
6744 void *newValue = (void *)((char *)oldValue+1); 6326 void *newValue = (void *)((char *)oldValue+1);
6745 6327
6746 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, 6328 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c,
6747 ** the data value passed is returned in case of malloc failure. 6329 ** the data value passed is returned in case of malloc failure.
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
6889 } 6471 }
6890 } 6472 }
6891 6473
6892 /* Expand the DL_DEFAULT doclist in pData into a text result in 6474 /* Expand the DL_DEFAULT doclist in pData into a text result in
6893 ** pContext. 6475 ** pContext.
6894 */ 6476 */
6895 static void createDoclistResult(sqlite3_context *pContext, 6477 static void createDoclistResult(sqlite3_context *pContext,
6896 const char *pData, int nData){ 6478 const char *pData, int nData){
6897 DataBuffer dump; 6479 DataBuffer dump;
6898 DLReader dlReader; 6480 DLReader dlReader;
6899 int rc;
6900 6481
6901 assert( pData!=NULL && nData>0 ); 6482 assert( pData!=NULL && nData>0 );
6902 6483
6903 rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData);
6904 if( rc!=SQLITE_OK ) return;
6905 dataBufferInit(&dump, 0); 6484 dataBufferInit(&dump, 0);
6906 for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ 6485 dlrInit(&dlReader, DL_DEFAULT, pData, nData);
6486 for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){
6907 char buf[256]; 6487 char buf[256];
6908 PLReader plReader; 6488 PLReader plReader;
6909 6489
6910 rc = plrInit(&plReader, &dlReader); 6490 plrInit(&plReader, &dlReader);
6911 if( rc!=SQLITE_OK ) break;
6912 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ 6491 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){
6913 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); 6492 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader));
6914 dataBufferAppend(&dump, buf, strlen(buf)); 6493 dataBufferAppend(&dump, buf, strlen(buf));
6915 }else{ 6494 }else{
6916 int iColumn = plrColumn(&plReader); 6495 int iColumn = plrColumn(&plReader);
6917 6496
6918 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", 6497 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[",
6919 dlrDocid(&dlReader), iColumn); 6498 dlrDocid(&dlReader), iColumn);
6920 dataBufferAppend(&dump, buf, strlen(buf)); 6499 dataBufferAppend(&dump, buf, strlen(buf));
6921 6500
6922 for( ; !plrAtEnd(&plReader); rc = plrStep(&plReader) ){ 6501 for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){
6923 if( rc!=SQLITE_OK ) break;
6924 if( plrColumn(&plReader)!=iColumn ){ 6502 if( plrColumn(&plReader)!=iColumn ){
6925 iColumn = plrColumn(&plReader); 6503 iColumn = plrColumn(&plReader);
6926 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); 6504 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn);
6927 assert( dump.nData>0 ); 6505 assert( dump.nData>0 );
6928 dump.nData--; /* Overwrite trailing space. */ 6506 dump.nData--; /* Overwrite trailing space. */
6929 assert( dump.pData[dump.nData]==' '); 6507 assert( dump.pData[dump.nData]==' ');
6930 dataBufferAppend(&dump, buf, strlen(buf)); 6508 dataBufferAppend(&dump, buf, strlen(buf));
6931 } 6509 }
6932 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ 6510 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){
6933 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", 6511 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ",
6934 plrPosition(&plReader), 6512 plrPosition(&plReader),
6935 plrStartOffset(&plReader), plrEndOffset(&plReader)); 6513 plrStartOffset(&plReader), plrEndOffset(&plReader));
6936 }else if( DL_DEFAULT==DL_POSITIONS ){ 6514 }else if( DL_DEFAULT==DL_POSITIONS ){
6937 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); 6515 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader));
6938 }else{ 6516 }else{
6939 assert( NULL=="Unhandled DL_DEFAULT value"); 6517 assert( NULL=="Unhandled DL_DEFAULT value");
6940 } 6518 }
6941 dataBufferAppend(&dump, buf, strlen(buf)); 6519 dataBufferAppend(&dump, buf, strlen(buf));
6942 } 6520 }
6943 plrDestroy(&plReader); 6521 plrDestroy(&plReader);
6944 if( rc!= SQLITE_OK ) break;
6945 6522
6946 assert( dump.nData>0 ); 6523 assert( dump.nData>0 );
6947 dump.nData--; /* Overwrite trailing space. */ 6524 dump.nData--; /* Overwrite trailing space. */
6948 assert( dump.pData[dump.nData]==' '); 6525 assert( dump.pData[dump.nData]==' ');
6949 dataBufferAppend(&dump, "]] ", 3); 6526 dataBufferAppend(&dump, "]] ", 3);
6950 } 6527 }
6951 } 6528 }
6952 dlrDestroy(&dlReader); 6529 dlrDestroy(&dlReader);
6953 if( rc!=SQLITE_OK ){
6954 dataBufferDestroy(&dump);
6955 return;
6956 }
6957 6530
6958 assert( dump.nData>0 ); 6531 assert( dump.nData>0 );
6959 dump.nData--; /* Overwrite trailing space. */ 6532 dump.nData--; /* Overwrite trailing space. */
6960 assert( dump.pData[dump.nData]==' '); 6533 assert( dump.pData[dump.nData]==' ');
6961 dump.pData[dump.nData] = '\0'; 6534 dump.pData[dump.nData] = '\0';
6962 assert( dump.nData>0 ); 6535 assert( dump.nData>0 );
6963 6536
6964 /* Passes ownership of dump's buffer to pContext. */ 6537 /* Passes ownership of dump's buffer to pContext. */
6965 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); 6538 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free);
6966 dump.pData = NULL; 6539 dump.pData = NULL;
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
7240 ){ 6813 ){
7241 rc = SQLITE_NOMEM; 6814 rc = SQLITE_NOMEM;
7242 } 6815 }
7243 } 6816 }
7244 6817
7245 /* Create the virtual table wrapper around the hash-table and overload 6818 /* Create the virtual table wrapper around the hash-table and overload
7246 ** the two scalar functions. If this is successful, register the 6819 ** the two scalar functions. If this is successful, register the
7247 ** module with sqlite. 6820 ** module with sqlite.
7248 */ 6821 */
7249 if( SQLITE_OK==rc 6822 if( SQLITE_OK==rc
7250 #if GEARS_FTS2_CHANGES && !SQLITE_TEST
7251 /* fts2_tokenizer() disabled for security reasons. */
7252 #else
7253 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) 6823 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
7254 #endif
7255 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) 6824 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
7256 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) 6825 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
7257 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) 6826 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1))
7258 #ifdef SQLITE_TEST 6827 #ifdef SQLITE_TEST
7259 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) 6828 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1))
7260 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) 6829 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1))
7261 #endif 6830 #endif
7262 ){ 6831 ){
7263 return sqlite3_create_module_v2( 6832 return sqlite3_create_module_v2(
7264 db, "fts2", &fts2Module, (void *)pHash, hashDestroy 6833 db, "fts2", &fts2Module, (void *)pHash, hashDestroy
(...skipping 17 matching lines...) Expand all
7282 sqlite3 *db, 6851 sqlite3 *db,
7283 char **pzErrMsg, 6852 char **pzErrMsg,
7284 const sqlite3_api_routines *pApi 6853 const sqlite3_api_routines *pApi
7285 ){ 6854 ){
7286 SQLITE_EXTENSION_INIT2(pApi) 6855 SQLITE_EXTENSION_INIT2(pApi)
7287 return sqlite3Fts2Init(db); 6856 return sqlite3Fts2Init(db);
7288 } 6857 }
7289 #endif 6858 #endif
7290 6859
7291 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ 6860 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
OLDNEW
« no previous file with comments | « third_party/sqlite/src/Makefile.linux-gcc ('k') | third_party/sqlite/src/ext/fts2/fts2_icu.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698