OLD | NEW |
1 /* fts2 has a design flaw which can lead to database corruption (see | 1 /* fts2 has a design flaw which can lead to database corruption (see |
2 ** below). It is recommended not to use it any longer, instead use | 2 ** below). It is recommended not to use it any longer, instead use |
3 ** fts3 (or higher). If you believe that your use of fts2 is safe, | 3 ** fts3 (or higher). If you believe that your use of fts2 is safe, |
4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. | 4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. |
5 */ | 5 */ |
6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ | 6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ |
7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) | 7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) |
8 #error fts2 has a design flaw and has been deprecated. | 8 #error fts2 has a design flaw and has been deprecated. |
9 #endif | 9 #endif |
10 /* The flaw is that fts2 uses the content table's unaliased rowid as | 10 /* The flaw is that fts2 uses the content table's unaliased rowid as |
(...skipping 19 matching lines...) Expand all Loading... |
30 ** | 30 ** |
31 ** May you do good and not evil. | 31 ** May you do good and not evil. |
32 ** May you find forgiveness for yourself and forgive others. | 32 ** May you find forgiveness for yourself and forgive others. |
33 ** May you share freely, never taking more than you give. | 33 ** May you share freely, never taking more than you give. |
34 ** | 34 ** |
35 ****************************************************************************** | 35 ****************************************************************************** |
36 ** | 36 ** |
37 ** This is an SQLite module implementing full-text search. | 37 ** This is an SQLite module implementing full-text search. |
38 */ | 38 */ |
39 | 39 |
40 /* TODO(shess): To make it easier to spot changes without groveling | |
41 ** through changelogs, I've defined GEARS_FTS2_CHANGES to call them | |
42 ** out, and I will document them here. On imports, these changes | |
43 ** should be reviewed to make sure they are still present, or are | |
44 ** dropped as appropriate. | |
45 ** | |
46 ** SQLite core adds the custom function fts2_tokenizer() to be used | |
47 ** for defining new tokenizers. The second parameter is a vtable | |
48 ** pointer encoded as a blob. Obviously this cannot be exposed to | |
49 ** Gears callers for security reasons. It could be suppressed in the | |
50 ** authorizer, but for now I have simply commented the definition out. | |
51 */ | |
52 #define GEARS_FTS2_CHANGES 1 | |
53 | |
54 /* | 40 /* |
55 ** The code in this file is only compiled if: | 41 ** The code in this file is only compiled if: |
56 ** | 42 ** |
57 ** * The FTS2 module is being built as an extension | 43 ** * The FTS2 module is being built as an extension |
58 ** (in which case SQLITE_CORE is not defined), or | 44 ** (in which case SQLITE_CORE is not defined), or |
59 ** | 45 ** |
60 ** * The FTS2 module is being built into the core of | 46 ** * The FTS2 module is being built into the core of |
61 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). | 47 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). |
62 */ | 48 */ |
63 | 49 |
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 #endif | 303 #endif |
318 | 304 |
319 #include <assert.h> | 305 #include <assert.h> |
320 #include <stdlib.h> | 306 #include <stdlib.h> |
321 #include <stdio.h> | 307 #include <stdio.h> |
322 #include <string.h> | 308 #include <string.h> |
323 #include "fts2.h" | 309 #include "fts2.h" |
324 #include "fts2_hash.h" | 310 #include "fts2_hash.h" |
325 #include "fts2_tokenizer.h" | 311 #include "fts2_tokenizer.h" |
326 #include "sqlite3.h" | 312 #include "sqlite3.h" |
327 #ifndef SQLITE_CORE | 313 #include "sqlite3ext.h" |
328 # include "sqlite3ext.h" | 314 SQLITE_EXTENSION_INIT1 |
329 SQLITE_EXTENSION_INIT1 | |
330 #endif | |
331 | 315 |
332 | 316 |
333 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it | 317 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it |
334 ** would be nice to order the file better, perhaps something along the | 318 ** would be nice to order the file better, perhaps something along the |
335 ** lines of: | 319 ** lines of: |
336 ** | 320 ** |
337 ** - utility functions | 321 ** - utility functions |
338 ** - table setup functions | 322 ** - table setup functions |
339 ** - table update functions | 323 ** - table update functions |
340 ** - table query functions | 324 ** - table query functions |
341 ** | 325 ** |
342 ** Put the query functions last because they're likely to reference | 326 ** Put the query functions last because they're likely to reference |
343 ** typedefs or functions from the table update section. | 327 ** typedefs or functions from the table update section. |
344 */ | 328 */ |
345 | 329 |
346 #if 0 | 330 #if 0 |
347 # define TRACE(A) printf A; fflush(stdout) | 331 # define TRACE(A) printf A; fflush(stdout) |
348 #else | 332 #else |
349 # define TRACE(A) | 333 # define TRACE(A) |
350 #endif | 334 #endif |
351 | 335 |
352 #if 0 | |
353 /* Useful to set breakpoints. See main.c sqlite3Corrupt(). */ | |
354 static int fts2Corrupt(void){ | |
355 return SQLITE_CORRUPT; | |
356 } | |
357 # define SQLITE_CORRUPT_BKPT fts2Corrupt() | |
358 #else | |
359 # define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT | |
360 #endif | |
361 | |
362 /* It is not safe to call isspace(), tolower(), or isalnum() on | 336 /* It is not safe to call isspace(), tolower(), or isalnum() on |
363 ** hi-bit-set characters. This is the same solution used in the | 337 ** hi-bit-set characters. This is the same solution used in the |
364 ** tokenizer. | 338 ** tokenizer. |
365 */ | 339 */ |
366 /* TODO(shess) The snippet-generation code should be using the | 340 /* TODO(shess) The snippet-generation code should be using the |
367 ** tokenizer-generated tokens rather than doing its own local | 341 ** tokenizer-generated tokens rather than doing its own local |
368 ** tokenization. | 342 ** tokenization. |
369 */ | 343 */ |
370 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ | 344 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ |
371 static int safe_isspace(char c){ | 345 static int safe_isspace(char c){ |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
440 vu >>= 7; | 414 vu >>= 7; |
441 }while( vu!=0 ); | 415 }while( vu!=0 ); |
442 q[-1] &= 0x7f; /* turn off high bit in final byte */ | 416 q[-1] &= 0x7f; /* turn off high bit in final byte */ |
443 assert( q - (unsigned char *)p <= VARINT_MAX ); | 417 assert( q - (unsigned char *)p <= VARINT_MAX ); |
444 return (int) (q - (unsigned char *)p); | 418 return (int) (q - (unsigned char *)p); |
445 } | 419 } |
446 | 420 |
447 /* Read a 64-bit variable-length integer from memory starting at p[0]. | 421 /* Read a 64-bit variable-length integer from memory starting at p[0]. |
448 * Return the number of bytes read, or 0 on error. | 422 * Return the number of bytes read, or 0 on error. |
449 * The value is stored in *v. */ | 423 * The value is stored in *v. */ |
450 static int getVarintSafe(const char *p, sqlite_int64 *v, int max){ | 424 static int getVarint(const char *p, sqlite_int64 *v){ |
451 const unsigned char *q = (const unsigned char *) p; | 425 const unsigned char *q = (const unsigned char *) p; |
452 sqlite_uint64 x = 0, y = 1; | 426 sqlite_uint64 x = 0, y = 1; |
453 if( max>VARINT_MAX ) max = VARINT_MAX; | 427 while( (*q & 0x80) == 0x80 ){ |
454 while( max && (*q & 0x80) == 0x80 ){ | |
455 max--; | |
456 x += y * (*q++ & 0x7f); | 428 x += y * (*q++ & 0x7f); |
457 y <<= 7; | 429 y <<= 7; |
458 } | 430 if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ |
459 if ( !max ){ | 431 assert( 0 ); |
460 assert( 0 ); | 432 return 0; |
461 return 0; /* tried to read too much; bad data */ | 433 } |
462 } | 434 } |
463 x += y * (*q++); | 435 x += y * (*q++); |
464 *v = (sqlite_int64) x; | 436 *v = (sqlite_int64) x; |
465 return (int) (q - (unsigned char *)p); | 437 return (int) (q - (unsigned char *)p); |
466 } | 438 } |
467 | 439 |
468 static int getVarint(const char *p, sqlite_int64 *v){ | 440 static int getVarint32(const char *p, int *pi){ |
469 return getVarintSafe(p, v, VARINT_MAX); | |
470 } | |
471 | |
472 static int getVarint32Safe(const char *p, int *pi, int max){ | |
473 sqlite_int64 i; | 441 sqlite_int64 i; |
474 int ret = getVarintSafe(p, &i, max); | 442 int ret = getVarint(p, &i); |
475 if( !ret ) return ret; | |
476 *pi = (int) i; | 443 *pi = (int) i; |
477 assert( *pi==i ); | 444 assert( *pi==i ); |
478 return ret; | 445 return ret; |
479 } | 446 } |
480 | 447 |
481 static int getVarint32(const char* p, int *pi){ | |
482 return getVarint32Safe(p, pi, VARINT_MAX); | |
483 } | |
484 | |
485 /*******************************************************************/ | 448 /*******************************************************************/ |
486 /* DataBuffer is used to collect data into a buffer in piecemeal | 449 /* DataBuffer is used to collect data into a buffer in piecemeal |
487 ** fashion. It implements the usual distinction between amount of | 450 ** fashion. It implements the usual distinction between amount of |
488 ** data currently stored (nData) and buffer capacity (nCapacity). | 451 ** data currently stored (nData) and buffer capacity (nCapacity). |
489 ** | 452 ** |
490 ** dataBufferInit - create a buffer with given initial capacity. | 453 ** dataBufferInit - create a buffer with given initial capacity. |
491 ** dataBufferReset - forget buffer's data, retaining capacity. | 454 ** dataBufferReset - forget buffer's data, retaining capacity. |
492 ** dataBufferDestroy - free buffer's data. | 455 ** dataBufferDestroy - free buffer's data. |
493 ** dataBufferSwap - swap contents of two buffers. | 456 ** dataBufferSwap - swap contents of two buffers. |
494 ** dataBufferExpand - expand capacity without adding data. | 457 ** dataBufferExpand - expand capacity without adding data. |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
643 DocListType iType; | 606 DocListType iType; |
644 const char *pData; | 607 const char *pData; |
645 int nData; | 608 int nData; |
646 | 609 |
647 sqlite_int64 iDocid; | 610 sqlite_int64 iDocid; |
648 int nElement; | 611 int nElement; |
649 } DLReader; | 612 } DLReader; |
650 | 613 |
651 static int dlrAtEnd(DLReader *pReader){ | 614 static int dlrAtEnd(DLReader *pReader){ |
652 assert( pReader->nData>=0 ); | 615 assert( pReader->nData>=0 ); |
653 return pReader->nData<=0; | 616 return pReader->nData==0; |
654 } | 617 } |
655 static sqlite_int64 dlrDocid(DLReader *pReader){ | 618 static sqlite_int64 dlrDocid(DLReader *pReader){ |
656 assert( !dlrAtEnd(pReader) ); | 619 assert( !dlrAtEnd(pReader) ); |
657 return pReader->iDocid; | 620 return pReader->iDocid; |
658 } | 621 } |
659 static const char *dlrDocData(DLReader *pReader){ | 622 static const char *dlrDocData(DLReader *pReader){ |
660 assert( !dlrAtEnd(pReader) ); | 623 assert( !dlrAtEnd(pReader) ); |
661 return pReader->pData; | 624 return pReader->pData; |
662 } | 625 } |
663 static int dlrDocDataBytes(DLReader *pReader){ | 626 static int dlrDocDataBytes(DLReader *pReader){ |
664 assert( !dlrAtEnd(pReader) ); | 627 assert( !dlrAtEnd(pReader) ); |
665 return pReader->nElement; | 628 return pReader->nElement; |
666 } | 629 } |
667 static int dlrAllDataBytes(DLReader *pReader){ | 630 static int dlrAllDataBytes(DLReader *pReader){ |
668 assert( !dlrAtEnd(pReader) ); | 631 assert( !dlrAtEnd(pReader) ); |
669 return pReader->nData; | 632 return pReader->nData; |
670 } | 633 } |
671 /* TODO(shess) Consider adding a field to track iDocid varint length | 634 /* TODO(shess) Consider adding a field to track iDocid varint length |
672 ** to make these two functions faster. This might matter (a tiny bit) | 635 ** to make these two functions faster. This might matter (a tiny bit) |
673 ** for queries. | 636 ** for queries. |
674 */ | 637 */ |
675 static const char *dlrPosData(DLReader *pReader){ | 638 static const char *dlrPosData(DLReader *pReader){ |
676 sqlite_int64 iDummy; | 639 sqlite_int64 iDummy; |
677 int n = getVarintSafe(pReader->pData, &iDummy, pReader->nElement); | 640 int n = getVarint(pReader->pData, &iDummy); |
678 if( !n ) return NULL; | |
679 assert( !dlrAtEnd(pReader) ); | 641 assert( !dlrAtEnd(pReader) ); |
680 return pReader->pData+n; | 642 return pReader->pData+n; |
681 } | 643 } |
682 static int dlrPosDataLen(DLReader *pReader){ | 644 static int dlrPosDataLen(DLReader *pReader){ |
683 sqlite_int64 iDummy; | 645 sqlite_int64 iDummy; |
684 int n = getVarint(pReader->pData, &iDummy); | 646 int n = getVarint(pReader->pData, &iDummy); |
685 assert( !dlrAtEnd(pReader) ); | 647 assert( !dlrAtEnd(pReader) ); |
686 return pReader->nElement-n; | 648 return pReader->nElement-n; |
687 } | 649 } |
688 static int dlrStep(DLReader *pReader){ | 650 static void dlrStep(DLReader *pReader){ |
689 assert( !dlrAtEnd(pReader) ); | 651 assert( !dlrAtEnd(pReader) ); |
690 | 652 |
691 /* Skip past current doclist element. */ | 653 /* Skip past current doclist element. */ |
692 assert( pReader->nElement<=pReader->nData ); | 654 assert( pReader->nElement<=pReader->nData ); |
693 pReader->pData += pReader->nElement; | 655 pReader->pData += pReader->nElement; |
694 pReader->nData -= pReader->nElement; | 656 pReader->nData -= pReader->nElement; |
695 | 657 |
696 /* If there is more data, read the next doclist element. */ | 658 /* If there is more data, read the next doclist element. */ |
697 if( pReader->nData>0 ){ | 659 if( pReader->nData!=0 ){ |
698 sqlite_int64 iDocidDelta; | 660 sqlite_int64 iDocidDelta; |
699 int nTotal = 0; | 661 int iDummy, n = getVarint(pReader->pData, &iDocidDelta); |
700 int iDummy, n = getVarintSafe(pReader->pData, &iDocidDelta, pReader->nData); | |
701 if( !n ) return SQLITE_CORRUPT_BKPT; | |
702 nTotal += n; | |
703 pReader->iDocid += iDocidDelta; | 662 pReader->iDocid += iDocidDelta; |
704 if( pReader->iType>=DL_POSITIONS ){ | 663 if( pReader->iType>=DL_POSITIONS ){ |
| 664 assert( n<pReader->nData ); |
705 while( 1 ){ | 665 while( 1 ){ |
706 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 666 n += getVarint32(pReader->pData+n, &iDummy); |
707 pReader->nData-nTotal); | 667 assert( n<=pReader->nData ); |
708 if( !n ) return SQLITE_CORRUPT_BKPT; | |
709 nTotal += n; | |
710 if( iDummy==POS_END ) break; | 668 if( iDummy==POS_END ) break; |
711 if( iDummy==POS_COLUMN ){ | 669 if( iDummy==POS_COLUMN ){ |
712 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 670 n += getVarint32(pReader->pData+n, &iDummy); |
713 pReader->nData-nTotal); | 671 assert( n<pReader->nData ); |
714 if( !n ) return SQLITE_CORRUPT_BKPT; | |
715 nTotal += n; | |
716 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ | 672 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ |
717 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 673 n += getVarint32(pReader->pData+n, &iDummy); |
718 pReader->nData-nTotal); | 674 n += getVarint32(pReader->pData+n, &iDummy); |
719 if( !n ) return SQLITE_CORRUPT_BKPT; | 675 assert( n<pReader->nData ); |
720 nTotal += n; | |
721 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | |
722 pReader->nData-nTotal); | |
723 if( !n ) return SQLITE_CORRUPT_BKPT; | |
724 nTotal += n; | |
725 } | 676 } |
726 } | 677 } |
727 } | 678 } |
728 pReader->nElement = nTotal; | 679 pReader->nElement = n; |
729 assert( pReader->nElement<=pReader->nData ); | 680 assert( pReader->nElement<=pReader->nData ); |
730 } | 681 } |
731 return SQLITE_OK; | |
732 } | 682 } |
733 static void dlrDestroy(DLReader *pReader){ | 683 static void dlrInit(DLReader *pReader, DocListType iType, |
734 SCRAMBLE(pReader); | 684 const char *pData, int nData){ |
735 } | |
736 static int dlrInit(DLReader *pReader, DocListType iType, | |
737 const char *pData, int nData){ | |
738 int rc; | |
739 assert( pData!=NULL && nData!=0 ); | 685 assert( pData!=NULL && nData!=0 ); |
740 pReader->iType = iType; | 686 pReader->iType = iType; |
741 pReader->pData = pData; | 687 pReader->pData = pData; |
742 pReader->nData = nData; | 688 pReader->nData = nData; |
743 pReader->nElement = 0; | 689 pReader->nElement = 0; |
744 pReader->iDocid = 0; | 690 pReader->iDocid = 0; |
745 | 691 |
746 /* Load the first element's data. There must be a first element. */ | 692 /* Load the first element's data. There must be a first element. */ |
747 rc = dlrStep(pReader); | 693 dlrStep(pReader); |
748 if( rc!=SQLITE_OK ) dlrDestroy(pReader); | 694 } |
749 return rc; | 695 static void dlrDestroy(DLReader *pReader){ |
| 696 SCRAMBLE(pReader); |
750 } | 697 } |
751 | 698 |
752 #ifndef NDEBUG | 699 #ifndef NDEBUG |
753 /* Verify that the doclist can be validly decoded. Also returns the | 700 /* Verify that the doclist can be validly decoded. Also returns the |
754 ** last docid found because it is convenient in other assertions for | 701 ** last docid found because it is convenient in other assertions for |
755 ** DLWriter. | 702 ** DLWriter. |
756 */ | 703 */ |
757 static void docListValidate(DocListType iType, const char *pData, int nData, | 704 static void docListValidate(DocListType iType, const char *pData, int nData, |
758 sqlite_int64 *pLastDocid){ | 705 sqlite_int64 *pLastDocid){ |
759 sqlite_int64 iPrevDocid = 0; | 706 sqlite_int64 iPrevDocid = 0; |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
826 ** | 773 ** |
827 ** iLastDocid is the final docid in the doclist in pData. It is | 774 ** iLastDocid is the final docid in the doclist in pData. It is |
828 ** needed to create the new iPrevDocid for future delta-encoding. The | 775 ** needed to create the new iPrevDocid for future delta-encoding. The |
829 ** code could decode the passed doclist to recreate iLastDocid, but | 776 ** code could decode the passed doclist to recreate iLastDocid, but |
830 ** the only current user (docListMerge) already has decoded this | 777 ** the only current user (docListMerge) already has decoded this |
831 ** information. | 778 ** information. |
832 */ | 779 */ |
833 /* TODO(shess) This has become just a helper for docListMerge. | 780 /* TODO(shess) This has become just a helper for docListMerge. |
834 ** Consider a refactor to make this cleaner. | 781 ** Consider a refactor to make this cleaner. |
835 */ | 782 */ |
836 static int dlwAppend(DLWriter *pWriter, | 783 static void dlwAppend(DLWriter *pWriter, |
837 const char *pData, int nData, | 784 const char *pData, int nData, |
838 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ | 785 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ |
839 sqlite_int64 iDocid = 0; | 786 sqlite_int64 iDocid = 0; |
840 char c[VARINT_MAX]; | 787 char c[VARINT_MAX]; |
841 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ | 788 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ |
842 #ifndef NDEBUG | 789 #ifndef NDEBUG |
843 sqlite_int64 iLastDocidDelta; | 790 sqlite_int64 iLastDocidDelta; |
844 #endif | 791 #endif |
845 | 792 |
846 /* Recode the initial docid as delta from iPrevDocid. */ | 793 /* Recode the initial docid as delta from iPrevDocid. */ |
847 nFirstOld = getVarintSafe(pData, &iDocid, nData); | 794 nFirstOld = getVarint(pData, &iDocid); |
848 if( !nFirstOld ) return SQLITE_CORRUPT_BKPT; | |
849 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); | 795 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); |
850 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); | 796 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); |
851 | 797 |
852 /* Verify that the incoming doclist is valid AND that it ends with | 798 /* Verify that the incoming doclist is valid AND that it ends with |
853 ** the expected docid. This is essential because we'll trust this | 799 ** the expected docid. This is essential because we'll trust this |
854 ** docid in future delta-encoding. | 800 ** docid in future delta-encoding. |
855 */ | 801 */ |
856 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); | 802 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); |
857 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); | 803 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); |
858 | 804 |
859 /* Append recoded initial docid and everything else. Rest of docids | 805 /* Append recoded initial docid and everything else. Rest of docids |
860 ** should have been delta-encoded from previous initial docid. | 806 ** should have been delta-encoded from previous initial docid. |
861 */ | 807 */ |
862 if( nFirstOld<nData ){ | 808 if( nFirstOld<nData ){ |
863 dataBufferAppend2(pWriter->b, c, nFirstNew, | 809 dataBufferAppend2(pWriter->b, c, nFirstNew, |
864 pData+nFirstOld, nData-nFirstOld); | 810 pData+nFirstOld, nData-nFirstOld); |
865 }else{ | 811 }else{ |
866 dataBufferAppend(pWriter->b, c, nFirstNew); | 812 dataBufferAppend(pWriter->b, c, nFirstNew); |
867 } | 813 } |
868 pWriter->iPrevDocid = iLastDocid; | 814 pWriter->iPrevDocid = iLastDocid; |
869 return SQLITE_OK; | |
870 } | 815 } |
871 static int dlwCopy(DLWriter *pWriter, DLReader *pReader){ | 816 static void dlwCopy(DLWriter *pWriter, DLReader *pReader){ |
872 return dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), | 817 dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), |
873 dlrDocid(pReader), dlrDocid(pReader)); | 818 dlrDocid(pReader), dlrDocid(pReader)); |
874 } | 819 } |
875 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ | 820 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ |
876 char c[VARINT_MAX]; | 821 char c[VARINT_MAX]; |
877 int n = putVarint(c, iDocid-pWriter->iPrevDocid); | 822 int n = putVarint(c, iDocid-pWriter->iPrevDocid); |
878 | 823 |
879 /* Docids must ascend. */ | 824 /* Docids must ascend. */ |
880 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); | 825 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); |
881 assert( pWriter->iType==DL_DOCIDS ); | 826 assert( pWriter->iType==DL_DOCIDS ); |
882 | 827 |
883 dataBufferAppend(pWriter->b, c, n); | 828 dataBufferAppend(pWriter->b, c, n); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
924 return pReader->iPosition; | 869 return pReader->iPosition; |
925 } | 870 } |
926 static int plrStartOffset(PLReader *pReader){ | 871 static int plrStartOffset(PLReader *pReader){ |
927 assert( !plrAtEnd(pReader) ); | 872 assert( !plrAtEnd(pReader) ); |
928 return pReader->iStartOffset; | 873 return pReader->iStartOffset; |
929 } | 874 } |
930 static int plrEndOffset(PLReader *pReader){ | 875 static int plrEndOffset(PLReader *pReader){ |
931 assert( !plrAtEnd(pReader) ); | 876 assert( !plrAtEnd(pReader) ); |
932 return pReader->iEndOffset; | 877 return pReader->iEndOffset; |
933 } | 878 } |
934 static int plrStep(PLReader *pReader){ | 879 static void plrStep(PLReader *pReader){ |
935 int i, n, nTotal = 0; | 880 int i, n; |
936 | 881 |
937 assert( !plrAtEnd(pReader) ); | 882 assert( !plrAtEnd(pReader) ); |
938 | 883 |
939 if( pReader->nData<=0 ){ | 884 if( pReader->nData==0 ){ |
940 pReader->pData = NULL; | 885 pReader->pData = NULL; |
941 return SQLITE_OK; | 886 return; |
942 } | 887 } |
943 | 888 |
944 n = getVarint32Safe(pReader->pData, &i, pReader->nData); | 889 n = getVarint32(pReader->pData, &i); |
945 if( !n ) return SQLITE_CORRUPT_BKPT; | |
946 nTotal += n; | |
947 if( i==POS_COLUMN ){ | 890 if( i==POS_COLUMN ){ |
948 n = getVarint32Safe(pReader->pData+nTotal, &pReader->iColumn, | 891 n += getVarint32(pReader->pData+n, &pReader->iColumn); |
949 pReader->nData-nTotal); | |
950 if( !n ) return SQLITE_CORRUPT_BKPT; | |
951 nTotal += n; | |
952 pReader->iPosition = 0; | 892 pReader->iPosition = 0; |
953 pReader->iStartOffset = 0; | 893 pReader->iStartOffset = 0; |
954 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 894 n += getVarint32(pReader->pData+n, &i); |
955 if( !n ) return SQLITE_CORRUPT_BKPT; | |
956 nTotal += n; | |
957 } | 895 } |
958 /* Should never see adjacent column changes. */ | 896 /* Should never see adjacent column changes. */ |
959 assert( i!=POS_COLUMN ); | 897 assert( i!=POS_COLUMN ); |
960 | 898 |
961 if( i==POS_END ){ | 899 if( i==POS_END ){ |
962 assert( nTotal<=pReader->nData ); | |
963 pReader->nData = 0; | 900 pReader->nData = 0; |
964 pReader->pData = NULL; | 901 pReader->pData = NULL; |
965 return SQLITE_OK; | 902 return; |
966 } | 903 } |
967 | 904 |
968 pReader->iPosition += i-POS_BASE; | 905 pReader->iPosition += i-POS_BASE; |
969 if( pReader->iType==DL_POSITIONS_OFFSETS ){ | 906 if( pReader->iType==DL_POSITIONS_OFFSETS ){ |
970 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 907 n += getVarint32(pReader->pData+n, &i); |
971 if( !n ) return SQLITE_CORRUPT_BKPT; | |
972 nTotal += n; | |
973 pReader->iStartOffset += i; | 908 pReader->iStartOffset += i; |
974 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 909 n += getVarint32(pReader->pData+n, &i); |
975 if( !n ) return SQLITE_CORRUPT_BKPT; | |
976 nTotal += n; | |
977 pReader->iEndOffset = pReader->iStartOffset+i; | 910 pReader->iEndOffset = pReader->iStartOffset+i; |
978 } | 911 } |
979 assert( nTotal<=pReader->nData ); | 912 assert( n<=pReader->nData ); |
980 pReader->pData += nTotal; | 913 pReader->pData += n; |
981 pReader->nData -= nTotal; | 914 pReader->nData -= n; |
982 return SQLITE_OK; | |
983 } | 915 } |
984 | 916 |
985 static void plrDestroy(PLReader *pReader){ | 917 static void plrInit(PLReader *pReader, DLReader *pDLReader){ |
986 SCRAMBLE(pReader); | |
987 } | |
988 | |
989 static int plrInit(PLReader *pReader, DLReader *pDLReader){ | |
990 int rc; | |
991 pReader->pData = dlrPosData(pDLReader); | 918 pReader->pData = dlrPosData(pDLReader); |
992 pReader->nData = dlrPosDataLen(pDLReader); | 919 pReader->nData = dlrPosDataLen(pDLReader); |
993 pReader->iType = pDLReader->iType; | 920 pReader->iType = pDLReader->iType; |
994 pReader->iColumn = 0; | 921 pReader->iColumn = 0; |
995 pReader->iPosition = 0; | 922 pReader->iPosition = 0; |
996 pReader->iStartOffset = 0; | 923 pReader->iStartOffset = 0; |
997 pReader->iEndOffset = 0; | 924 pReader->iEndOffset = 0; |
998 rc = plrStep(pReader); | 925 plrStep(pReader); |
999 if( rc!=SQLITE_OK ) plrDestroy(pReader); | 926 } |
1000 return rc; | 927 static void plrDestroy(PLReader *pReader){ |
| 928 SCRAMBLE(pReader); |
1001 } | 929 } |
1002 | 930 |
1003 /*******************************************************************/ | 931 /*******************************************************************/ |
1004 /* PLWriter is used in constructing a document's position list. As a | 932 /* PLWriter is used in constructing a document's position list. As a |
1005 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. | 933 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. |
1006 ** PLWriter writes to the associated DLWriter's buffer. | 934 ** PLWriter writes to the associated DLWriter's buffer. |
1007 ** | 935 ** |
1008 ** plwInit - init for writing a document's poslist. | 936 ** plwInit - init for writing a document's poslist. |
1009 ** plwDestroy - clear a writer. | 937 ** plwDestroy - clear a writer. |
1010 ** plwAdd - append position and offset information. | 938 ** plwAdd - append position and offset information. |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1176 /* Copy the doclist data of iType in pData/nData into *out, trimming | 1104 /* Copy the doclist data of iType in pData/nData into *out, trimming |
1177 ** unnecessary data as we go. Only columns matching iColumn are | 1105 ** unnecessary data as we go. Only columns matching iColumn are |
1178 ** copied, all columns copied if iColumn is -1. Elements with no | 1106 ** copied, all columns copied if iColumn is -1. Elements with no |
1179 ** matching columns are dropped. The output is an iOutType doclist. | 1107 ** matching columns are dropped. The output is an iOutType doclist. |
1180 */ | 1108 */ |
1181 /* NOTE(shess) This code is only valid after all doclists are merged. | 1109 /* NOTE(shess) This code is only valid after all doclists are merged. |
1182 ** If this is run before merges, then doclist items which represent | 1110 ** If this is run before merges, then doclist items which represent |
1183 ** deletion will be trimmed, and will thus not effect a deletion | 1111 ** deletion will be trimmed, and will thus not effect a deletion |
1184 ** during the merge. | 1112 ** during the merge. |
1185 */ | 1113 */ |
1186 static int docListTrim(DocListType iType, const char *pData, int nData, | 1114 static void docListTrim(DocListType iType, const char *pData, int nData, |
1187 int iColumn, DocListType iOutType, DataBuffer *out){ | 1115 int iColumn, DocListType iOutType, DataBuffer *out){ |
1188 DLReader dlReader; | 1116 DLReader dlReader; |
1189 DLWriter dlWriter; | 1117 DLWriter dlWriter; |
1190 int rc; | |
1191 | 1118 |
1192 assert( iOutType<=iType ); | 1119 assert( iOutType<=iType ); |
1193 | 1120 |
1194 rc = dlrInit(&dlReader, iType, pData, nData); | 1121 dlrInit(&dlReader, iType, pData, nData); |
1195 if( rc!=SQLITE_OK ) return rc; | |
1196 dlwInit(&dlWriter, iOutType, out); | 1122 dlwInit(&dlWriter, iOutType, out); |
1197 | 1123 |
1198 while( !dlrAtEnd(&dlReader) ){ | 1124 while( !dlrAtEnd(&dlReader) ){ |
1199 PLReader plReader; | 1125 PLReader plReader; |
1200 PLWriter plWriter; | 1126 PLWriter plWriter; |
1201 int match = 0; | 1127 int match = 0; |
1202 | 1128 |
1203 rc = plrInit(&plReader, &dlReader); | 1129 plrInit(&plReader, &dlReader); |
1204 if( rc!=SQLITE_OK ) break; | |
1205 | 1130 |
1206 while( !plrAtEnd(&plReader) ){ | 1131 while( !plrAtEnd(&plReader) ){ |
1207 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ | 1132 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ |
1208 if( !match ){ | 1133 if( !match ){ |
1209 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); | 1134 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); |
1210 match = 1; | 1135 match = 1; |
1211 } | 1136 } |
1212 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), | 1137 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), |
1213 plrStartOffset(&plReader), plrEndOffset(&plReader)); | 1138 plrStartOffset(&plReader), plrEndOffset(&plReader)); |
1214 } | 1139 } |
1215 rc = plrStep(&plReader); | 1140 plrStep(&plReader); |
1216 if( rc!=SQLITE_OK ){ | |
1217 plrDestroy(&plReader); | |
1218 goto err; | |
1219 } | |
1220 } | 1141 } |
1221 if( match ){ | 1142 if( match ){ |
1222 plwTerminate(&plWriter); | 1143 plwTerminate(&plWriter); |
1223 plwDestroy(&plWriter); | 1144 plwDestroy(&plWriter); |
1224 } | 1145 } |
1225 | 1146 |
1226 plrDestroy(&plReader); | 1147 plrDestroy(&plReader); |
1227 rc = dlrStep(&dlReader); | 1148 dlrStep(&dlReader); |
1228 if( rc!=SQLITE_OK ) break; | |
1229 } | 1149 } |
1230 err: | |
1231 dlwDestroy(&dlWriter); | 1150 dlwDestroy(&dlWriter); |
1232 dlrDestroy(&dlReader); | 1151 dlrDestroy(&dlReader); |
1233 return rc; | |
1234 } | 1152 } |
1235 | 1153 |
1236 /* Used by docListMerge() to keep doclists in the ascending order by | 1154 /* Used by docListMerge() to keep doclists in the ascending order by |
1237 ** docid, then ascending order by age (so the newest comes first). | 1155 ** docid, then ascending order by age (so the newest comes first). |
1238 */ | 1156 */ |
1239 typedef struct OrderedDLReader { | 1157 typedef struct OrderedDLReader { |
1240 DLReader *pReader; | 1158 DLReader *pReader; |
1241 | 1159 |
1242 /* TODO(shess) If we assume that docListMerge pReaders is ordered by | 1160 /* TODO(shess) If we assume that docListMerge pReaders is ordered by |
1243 ** age (which we do), then we could use pReader comparisons to break | 1161 ** age (which we do), then we could use pReader comparisons to break |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1280 } | 1198 } |
1281 | 1199 |
1282 /* Given an array of doclist readers, merge their doclist elements | 1200 /* Given an array of doclist readers, merge their doclist elements |
1283 ** into out in sorted order (by docid), dropping elements from older | 1201 ** into out in sorted order (by docid), dropping elements from older |
1284 ** readers when there is a duplicate docid. pReaders is assumed to be | 1202 ** readers when there is a duplicate docid. pReaders is assumed to be |
1285 ** ordered by age, oldest first. | 1203 ** ordered by age, oldest first. |
1286 */ | 1204 */ |
1287 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably | 1205 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably |
1288 ** be fixed. | 1206 ** be fixed. |
1289 */ | 1207 */ |
1290 static int docListMerge(DataBuffer *out, | 1208 static void docListMerge(DataBuffer *out, |
1291 DLReader *pReaders, int nReaders){ | 1209 DLReader *pReaders, int nReaders){ |
1292 OrderedDLReader readers[MERGE_COUNT]; | 1210 OrderedDLReader readers[MERGE_COUNT]; |
1293 DLWriter writer; | 1211 DLWriter writer; |
1294 int i, n; | 1212 int i, n; |
1295 const char *pStart = 0; | 1213 const char *pStart = 0; |
1296 int nStart = 0; | 1214 int nStart = 0; |
1297 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; | 1215 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; |
1298 int rc = SQLITE_OK; | |
1299 | 1216 |
1300 assert( nReaders>0 ); | 1217 assert( nReaders>0 ); |
1301 if( nReaders==1 ){ | 1218 if( nReaders==1 ){ |
1302 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); | 1219 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); |
1303 return SQLITE_OK; | 1220 return; |
1304 } | 1221 } |
1305 | 1222 |
1306 assert( nReaders<=MERGE_COUNT ); | 1223 assert( nReaders<=MERGE_COUNT ); |
1307 n = 0; | 1224 n = 0; |
1308 for(i=0; i<nReaders; i++){ | 1225 for(i=0; i<nReaders; i++){ |
1309 assert( pReaders[i].iType==pReaders[0].iType ); | 1226 assert( pReaders[i].iType==pReaders[0].iType ); |
1310 readers[i].pReader = pReaders+i; | 1227 readers[i].pReader = pReaders+i; |
1311 readers[i].idx = i; | 1228 readers[i].idx = i; |
1312 n += dlrAllDataBytes(&pReaders[i]); | 1229 n += dlrAllDataBytes(&pReaders[i]); |
1313 } | 1230 } |
(...skipping 12 matching lines...) Expand all Loading... |
1326 sqlite_int64 iDocid = dlrDocid(readers[0].pReader); | 1243 sqlite_int64 iDocid = dlrDocid(readers[0].pReader); |
1327 | 1244 |
1328 /* If this is a continuation of the current buffer to copy, extend | 1245 /* If this is a continuation of the current buffer to copy, extend |
1329 ** that buffer. memcpy() seems to be more efficient if it has a | 1246 ** that buffer. memcpy() seems to be more efficient if it has a |
1330 ** lots of data to copy. | 1247 ** lots of data to copy. |
1331 */ | 1248 */ |
1332 if( dlrDocData(readers[0].pReader)==pStart+nStart ){ | 1249 if( dlrDocData(readers[0].pReader)==pStart+nStart ){ |
1333 nStart += dlrDocDataBytes(readers[0].pReader); | 1250 nStart += dlrDocDataBytes(readers[0].pReader); |
1334 }else{ | 1251 }else{ |
1335 if( pStart!=0 ){ | 1252 if( pStart!=0 ){ |
1336 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); | 1253 dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); |
1337 if( rc!=SQLITE_OK ) goto err; | |
1338 } | 1254 } |
1339 pStart = dlrDocData(readers[0].pReader); | 1255 pStart = dlrDocData(readers[0].pReader); |
1340 nStart = dlrDocDataBytes(readers[0].pReader); | 1256 nStart = dlrDocDataBytes(readers[0].pReader); |
1341 iFirstDocid = iDocid; | 1257 iFirstDocid = iDocid; |
1342 } | 1258 } |
1343 iLastDocid = iDocid; | 1259 iLastDocid = iDocid; |
1344 rc = dlrStep(readers[0].pReader); | 1260 dlrStep(readers[0].pReader); |
1345 if( rc!=SQLITE_OK ) goto err; | |
1346 | 1261 |
1347 /* Drop all of the older elements with the same docid. */ | 1262 /* Drop all of the older elements with the same docid. */ |
1348 for(i=1; i<nReaders && | 1263 for(i=1; i<nReaders && |
1349 !dlrAtEnd(readers[i].pReader) && | 1264 !dlrAtEnd(readers[i].pReader) && |
1350 dlrDocid(readers[i].pReader)==iDocid; i++){ | 1265 dlrDocid(readers[i].pReader)==iDocid; i++){ |
1351 rc = dlrStep(readers[i].pReader); | 1266 dlrStep(readers[i].pReader); |
1352 if( rc!=SQLITE_OK ) goto err; | |
1353 } | 1267 } |
1354 | 1268 |
1355 /* Get the readers back into order. */ | 1269 /* Get the readers back into order. */ |
1356 while( i-->0 ){ | 1270 while( i-->0 ){ |
1357 orderedDLReaderReorder(readers+i, nReaders-i); | 1271 orderedDLReaderReorder(readers+i, nReaders-i); |
1358 } | 1272 } |
1359 } | 1273 } |
1360 | 1274 |
1361 /* Copy over any remaining elements. */ | 1275 /* Copy over any remaining elements. */ |
1362 if( nStart>0 ) | 1276 if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); |
1363 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); | |
1364 err: | |
1365 dlwDestroy(&writer); | 1277 dlwDestroy(&writer); |
1366 return rc; | |
1367 } | 1278 } |
1368 | 1279 |
1369 /* Helper function for posListUnion(). Compares the current position | 1280 /* Helper function for posListUnion(). Compares the current position |
1370 ** between left and right, returning as standard C idiom of <0 if | 1281 ** between left and right, returning as standard C idiom of <0 if |
1371 ** left<right, >0 if left>right, and 0 if left==right. "End" always | 1282 ** left<right, >0 if left>right, and 0 if left==right. "End" always |
1372 ** compares greater. | 1283 ** compares greater. |
1373 */ | 1284 */ |
1374 static int posListCmp(PLReader *pLeft, PLReader *pRight){ | 1285 static int posListCmp(PLReader *pLeft, PLReader *pRight){ |
1375 assert( pLeft->iType==pRight->iType ); | 1286 assert( pLeft->iType==pRight->iType ); |
1376 if( pLeft->iType==DL_DOCIDS ) return 0; | 1287 if( pLeft->iType==DL_DOCIDS ) return 0; |
(...skipping 15 matching lines...) Expand all Loading... |
1392 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; | 1303 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; |
1393 | 1304 |
1394 return 0; | 1305 return 0; |
1395 } | 1306 } |
1396 | 1307 |
1397 /* Write the union of position lists in pLeft and pRight to pOut. | 1308 /* Write the union of position lists in pLeft and pRight to pOut. |
1398 ** "Union" in this case meaning "All unique position tuples". Should | 1309 ** "Union" in this case meaning "All unique position tuples". Should |
1399 ** work with any doclist type, though both inputs and the output | 1310 ** work with any doclist type, though both inputs and the output |
1400 ** should be the same type. | 1311 ** should be the same type. |
1401 */ | 1312 */ |
1402 static int posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ | 1313 static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ |
1403 PLReader left, right; | 1314 PLReader left, right; |
1404 PLWriter writer; | 1315 PLWriter writer; |
1405 int rc; | |
1406 | 1316 |
1407 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); | 1317 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |
1408 assert( pLeft->iType==pRight->iType ); | 1318 assert( pLeft->iType==pRight->iType ); |
1409 assert( pLeft->iType==pOut->iType ); | 1319 assert( pLeft->iType==pOut->iType ); |
1410 | 1320 |
1411 rc = plrInit(&left, pLeft); | 1321 plrInit(&left, pLeft); |
1412 if( rc != SQLITE_OK ) return rc; | 1322 plrInit(&right, pRight); |
1413 rc = plrInit(&right, pRight); | |
1414 if( rc != SQLITE_OK ){ | |
1415 plrDestroy(&left); | |
1416 return rc; | |
1417 } | |
1418 plwInit(&writer, pOut, dlrDocid(pLeft)); | 1323 plwInit(&writer, pOut, dlrDocid(pLeft)); |
1419 | 1324 |
1420 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ | 1325 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ |
1421 int c = posListCmp(&left, &right); | 1326 int c = posListCmp(&left, &right); |
1422 if( c<0 ){ | 1327 if( c<0 ){ |
1423 plwCopy(&writer, &left); | 1328 plwCopy(&writer, &left); |
1424 rc = plrStep(&left); | 1329 plrStep(&left); |
1425 if( rc != SQLITE_OK ) break; | |
1426 }else if( c>0 ){ | 1330 }else if( c>0 ){ |
1427 plwCopy(&writer, &right); | 1331 plwCopy(&writer, &right); |
1428 rc = plrStep(&right); | 1332 plrStep(&right); |
1429 if( rc != SQLITE_OK ) break; | |
1430 }else{ | 1333 }else{ |
1431 plwCopy(&writer, &left); | 1334 plwCopy(&writer, &left); |
1432 rc = plrStep(&left); | 1335 plrStep(&left); |
1433 if( rc != SQLITE_OK ) break; | 1336 plrStep(&right); |
1434 rc = plrStep(&right); | |
1435 if( rc != SQLITE_OK ) break; | |
1436 } | 1337 } |
1437 } | 1338 } |
1438 | 1339 |
1439 plwTerminate(&writer); | 1340 plwTerminate(&writer); |
1440 plwDestroy(&writer); | 1341 plwDestroy(&writer); |
1441 plrDestroy(&left); | 1342 plrDestroy(&left); |
1442 plrDestroy(&right); | 1343 plrDestroy(&right); |
1443 return rc; | |
1444 } | 1344 } |
1445 | 1345 |
1446 /* Write the union of doclists in pLeft and pRight to pOut. For | 1346 /* Write the union of doclists in pLeft and pRight to pOut. For |
1447 ** docids in common between the inputs, the union of the position | 1347 ** docids in common between the inputs, the union of the position |
1448 ** lists is written. Inputs and outputs are always type DL_DEFAULT. | 1348 ** lists is written. Inputs and outputs are always type DL_DEFAULT. |
1449 */ | 1349 */ |
1450 static int docListUnion( | 1350 static void docListUnion( |
1451 const char *pLeft, int nLeft, | 1351 const char *pLeft, int nLeft, |
1452 const char *pRight, int nRight, | 1352 const char *pRight, int nRight, |
1453 DataBuffer *pOut /* Write the combined doclist here */ | 1353 DataBuffer *pOut /* Write the combined doclist here */ |
1454 ){ | 1354 ){ |
1455 DLReader left, right; | 1355 DLReader left, right; |
1456 DLWriter writer; | 1356 DLWriter writer; |
1457 int rc; | |
1458 | 1357 |
1459 if( nLeft==0 ){ | 1358 if( nLeft==0 ){ |
1460 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); | 1359 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); |
1461 return SQLITE_OK; | 1360 return; |
1462 } | 1361 } |
1463 if( nRight==0 ){ | 1362 if( nRight==0 ){ |
1464 dataBufferAppend(pOut, pLeft, nLeft); | 1363 dataBufferAppend(pOut, pLeft, nLeft); |
1465 return SQLITE_OK; | 1364 return; |
1466 } | 1365 } |
1467 | 1366 |
1468 rc = dlrInit(&left, DL_DEFAULT, pLeft, nLeft); | 1367 dlrInit(&left, DL_DEFAULT, pLeft, nLeft); |
1469 if( rc!=SQLITE_OK ) return rc; | 1368 dlrInit(&right, DL_DEFAULT, pRight, nRight); |
1470 rc = dlrInit(&right, DL_DEFAULT, pRight, nRight); | |
1471 if( rc!=SQLITE_OK ){ | |
1472 dlrDestroy(&left); | |
1473 return rc; | |
1474 } | |
1475 dlwInit(&writer, DL_DEFAULT, pOut); | 1369 dlwInit(&writer, DL_DEFAULT, pOut); |
1476 | 1370 |
1477 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ | 1371 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |
1478 if( dlrAtEnd(&right) ){ | 1372 if( dlrAtEnd(&right) ){ |
1479 rc = dlwCopy(&writer, &left); | 1373 dlwCopy(&writer, &left); |
1480 if( rc!=SQLITE_OK ) break; | 1374 dlrStep(&left); |
1481 rc = dlrStep(&left); | |
1482 if( rc!=SQLITE_OK ) break; | |
1483 }else if( dlrAtEnd(&left) ){ | 1375 }else if( dlrAtEnd(&left) ){ |
1484 rc = dlwCopy(&writer, &right); | 1376 dlwCopy(&writer, &right); |
1485 if( rc!=SQLITE_OK ) break; | 1377 dlrStep(&right); |
1486 rc = dlrStep(&right); | |
1487 if( rc!=SQLITE_OK ) break; | |
1488 }else if( dlrDocid(&left)<dlrDocid(&right) ){ | 1378 }else if( dlrDocid(&left)<dlrDocid(&right) ){ |
1489 rc = dlwCopy(&writer, &left); | 1379 dlwCopy(&writer, &left); |
1490 if( rc!=SQLITE_OK ) break; | 1380 dlrStep(&left); |
1491 rc = dlrStep(&left); | |
1492 if( rc!=SQLITE_OK ) break; | |
1493 }else if( dlrDocid(&left)>dlrDocid(&right) ){ | 1381 }else if( dlrDocid(&left)>dlrDocid(&right) ){ |
1494 rc = dlwCopy(&writer, &right); | 1382 dlwCopy(&writer, &right); |
1495 if( rc!=SQLITE_OK ) break; | 1383 dlrStep(&right); |
1496 rc = dlrStep(&right); | |
1497 if( rc!=SQLITE_OK ) break; | |
1498 }else{ | 1384 }else{ |
1499 rc = posListUnion(&left, &right, &writer); | 1385 posListUnion(&left, &right, &writer); |
1500 if( rc!=SQLITE_OK ) break; | 1386 dlrStep(&left); |
1501 rc = dlrStep(&left); | 1387 dlrStep(&right); |
1502 if( rc!=SQLITE_OK ) break; | |
1503 rc = dlrStep(&right); | |
1504 if( rc!=SQLITE_OK ) break; | |
1505 } | 1388 } |
1506 } | 1389 } |
1507 | 1390 |
1508 dlrDestroy(&left); | 1391 dlrDestroy(&left); |
1509 dlrDestroy(&right); | 1392 dlrDestroy(&right); |
1510 dlwDestroy(&writer); | 1393 dlwDestroy(&writer); |
1511 return rc; | |
1512 } | 1394 } |
1513 | 1395 |
1514 /* pLeft and pRight are DLReaders positioned to the same docid. | 1396 /* pLeft and pRight are DLReaders positioned to the same docid. |
1515 ** | 1397 ** |
1516 ** If there are no instances in pLeft or pRight where the position | 1398 ** If there are no instances in pLeft or pRight where the position |
1517 ** of pLeft is one less than the position of pRight, then this | 1399 ** of pLeft is one less than the position of pRight, then this |
1518 ** routine adds nothing to pOut. | 1400 ** routine adds nothing to pOut. |
1519 ** | 1401 ** |
1520 ** If there are one or more instances where positions from pLeft | 1402 ** If there are one or more instances where positions from pLeft |
1521 ** are exactly one less than positions from pRight, then add a new | 1403 ** are exactly one less than positions from pRight, then add a new |
1522 ** document record to pOut. If pOut wants to hold positions, then | 1404 ** document record to pOut. If pOut wants to hold positions, then |
1523 ** include the positions from pRight that are one more than a | 1405 ** include the positions from pRight that are one more than a |
1524 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. | 1406 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. |
1525 */ | 1407 */ |
1526 static int posListPhraseMerge(DLReader *pLeft, DLReader *pRight, | 1408 static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight, |
1527 DLWriter *pOut){ | 1409 DLWriter *pOut){ |
1528 PLReader left, right; | 1410 PLReader left, right; |
1529 PLWriter writer; | 1411 PLWriter writer; |
1530 int match = 0; | 1412 int match = 0; |
1531 int rc; | |
1532 | 1413 |
1533 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); | 1414 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |
1534 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); | 1415 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); |
1535 | 1416 |
1536 rc = plrInit(&left, pLeft); | 1417 plrInit(&left, pLeft); |
1537 if( rc!=SQLITE_OK ) return rc; | 1418 plrInit(&right, pRight); |
1538 rc = plrInit(&right, pRight); | |
1539 if( rc!=SQLITE_OK ){ | |
1540 plrDestroy(&left); | |
1541 return rc; | |
1542 } | |
1543 | 1419 |
1544 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ | 1420 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ |
1545 if( plrColumn(&left)<plrColumn(&right) ){ | 1421 if( plrColumn(&left)<plrColumn(&right) ){ |
1546 rc = plrStep(&left); | 1422 plrStep(&left); |
1547 if( rc!=SQLITE_OK ) break; | |
1548 }else if( plrColumn(&left)>plrColumn(&right) ){ | 1423 }else if( plrColumn(&left)>plrColumn(&right) ){ |
1549 rc = plrStep(&right); | 1424 plrStep(&right); |
1550 if( rc!=SQLITE_OK ) break; | |
1551 }else if( plrPosition(&left)+1<plrPosition(&right) ){ | 1425 }else if( plrPosition(&left)+1<plrPosition(&right) ){ |
1552 rc = plrStep(&left); | 1426 plrStep(&left); |
1553 if( rc!=SQLITE_OK ) break; | |
1554 }else if( plrPosition(&left)+1>plrPosition(&right) ){ | 1427 }else if( plrPosition(&left)+1>plrPosition(&right) ){ |
1555 rc = plrStep(&right); | 1428 plrStep(&right); |
1556 if( rc!=SQLITE_OK ) break; | |
1557 }else{ | 1429 }else{ |
1558 if( !match ){ | 1430 if( !match ){ |
1559 plwInit(&writer, pOut, dlrDocid(pLeft)); | 1431 plwInit(&writer, pOut, dlrDocid(pLeft)); |
1560 match = 1; | 1432 match = 1; |
1561 } | 1433 } |
1562 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); | 1434 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); |
1563 rc = plrStep(&left); | 1435 plrStep(&left); |
1564 if( rc!=SQLITE_OK ) break; | 1436 plrStep(&right); |
1565 rc = plrStep(&right); | |
1566 if( rc!=SQLITE_OK ) break; | |
1567 } | 1437 } |
1568 } | 1438 } |
1569 | 1439 |
1570 if( match ){ | 1440 if( match ){ |
1571 plwTerminate(&writer); | 1441 plwTerminate(&writer); |
1572 plwDestroy(&writer); | 1442 plwDestroy(&writer); |
1573 } | 1443 } |
1574 | 1444 |
1575 plrDestroy(&left); | 1445 plrDestroy(&left); |
1576 plrDestroy(&right); | 1446 plrDestroy(&right); |
1577 return rc; | |
1578 } | 1447 } |
1579 | 1448 |
1580 /* We have two doclists with positions: pLeft and pRight. | 1449 /* We have two doclists with positions: pLeft and pRight. |
1581 ** Write the phrase intersection of these two doclists into pOut. | 1450 ** Write the phrase intersection of these two doclists into pOut. |
1582 ** | 1451 ** |
1583 ** A phrase intersection means that two documents only match | 1452 ** A phrase intersection means that two documents only match |
1584 ** if pLeft.iPos+1==pRight.iPos. | 1453 ** if pLeft.iPos+1==pRight.iPos. |
1585 ** | 1454 ** |
1586 ** iType controls the type of data written to pOut. If iType is | 1455 ** iType controls the type of data written to pOut. If iType is |
1587 ** DL_POSITIONS, the positions are those from pRight. | 1456 ** DL_POSITIONS, the positions are those from pRight. |
1588 */ | 1457 */ |
1589 static int docListPhraseMerge( | 1458 static void docListPhraseMerge( |
1590 const char *pLeft, int nLeft, | 1459 const char *pLeft, int nLeft, |
1591 const char *pRight, int nRight, | 1460 const char *pRight, int nRight, |
1592 DocListType iType, | 1461 DocListType iType, |
1593 DataBuffer *pOut /* Write the combined doclist here */ | 1462 DataBuffer *pOut /* Write the combined doclist here */ |
1594 ){ | 1463 ){ |
1595 DLReader left, right; | 1464 DLReader left, right; |
1596 DLWriter writer; | 1465 DLWriter writer; |
1597 int rc; | |
1598 | 1466 |
1599 if( nLeft==0 || nRight==0 ) return SQLITE_OK; | 1467 if( nLeft==0 || nRight==0 ) return; |
1600 | 1468 |
1601 assert( iType!=DL_POSITIONS_OFFSETS ); | 1469 assert( iType!=DL_POSITIONS_OFFSETS ); |
1602 | 1470 |
1603 rc = dlrInit(&left, DL_POSITIONS, pLeft, nLeft); | 1471 dlrInit(&left, DL_POSITIONS, pLeft, nLeft); |
1604 if( rc!=SQLITE_OK ) return rc; | 1472 dlrInit(&right, DL_POSITIONS, pRight, nRight); |
1605 rc = dlrInit(&right, DL_POSITIONS, pRight, nRight); | |
1606 if( rc!=SQLITE_OK ){ | |
1607 dlrDestroy(&left); | |
1608 return rc; | |
1609 } | |
1610 dlwInit(&writer, iType, pOut); | 1473 dlwInit(&writer, iType, pOut); |
1611 | 1474 |
1612 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ | 1475 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |
1613 if( dlrDocid(&left)<dlrDocid(&right) ){ | 1476 if( dlrDocid(&left)<dlrDocid(&right) ){ |
1614 rc = dlrStep(&left); | 1477 dlrStep(&left); |
1615 if( rc!=SQLITE_OK ) break; | |
1616 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1478 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
1617 rc = dlrStep(&right); | 1479 dlrStep(&right); |
1618 if( rc!=SQLITE_OK ) break; | |
1619 }else{ | 1480 }else{ |
1620 rc = posListPhraseMerge(&left, &right, &writer); | 1481 posListPhraseMerge(&left, &right, &writer); |
1621 if( rc!=SQLITE_OK ) break; | 1482 dlrStep(&left); |
1622 rc = dlrStep(&left); | 1483 dlrStep(&right); |
1623 if( rc!=SQLITE_OK ) break; | |
1624 rc = dlrStep(&right); | |
1625 if( rc!=SQLITE_OK ) break; | |
1626 } | 1484 } |
1627 } | 1485 } |
1628 | 1486 |
1629 dlrDestroy(&left); | 1487 dlrDestroy(&left); |
1630 dlrDestroy(&right); | 1488 dlrDestroy(&right); |
1631 dlwDestroy(&writer); | 1489 dlwDestroy(&writer); |
1632 return rc; | |
1633 } | 1490 } |
1634 | 1491 |
1635 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1492 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
1636 ** Write the intersection of these two doclists into pOut as a | 1493 ** Write the intersection of these two doclists into pOut as a |
1637 ** DL_DOCIDS doclist. | 1494 ** DL_DOCIDS doclist. |
1638 */ | 1495 */ |
1639 static int docListAndMerge( | 1496 static void docListAndMerge( |
1640 const char *pLeft, int nLeft, | 1497 const char *pLeft, int nLeft, |
1641 const char *pRight, int nRight, | 1498 const char *pRight, int nRight, |
1642 DataBuffer *pOut /* Write the combined doclist here */ | 1499 DataBuffer *pOut /* Write the combined doclist here */ |
1643 ){ | 1500 ){ |
1644 DLReader left, right; | 1501 DLReader left, right; |
1645 DLWriter writer; | 1502 DLWriter writer; |
1646 int rc; | |
1647 | 1503 |
1648 if( nLeft==0 || nRight==0 ) return SQLITE_OK; | 1504 if( nLeft==0 || nRight==0 ) return; |
1649 | 1505 |
1650 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1506 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
1651 if( rc!=SQLITE_OK ) return rc; | 1507 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
1652 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
1653 if( rc!=SQLITE_OK ){ | |
1654 dlrDestroy(&left); | |
1655 return rc; | |
1656 } | |
1657 dlwInit(&writer, DL_DOCIDS, pOut); | 1508 dlwInit(&writer, DL_DOCIDS, pOut); |
1658 | 1509 |
1659 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ | 1510 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |
1660 if( dlrDocid(&left)<dlrDocid(&right) ){ | 1511 if( dlrDocid(&left)<dlrDocid(&right) ){ |
1661 rc = dlrStep(&left); | 1512 dlrStep(&left); |
1662 if( rc!=SQLITE_OK ) break; | |
1663 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1513 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
1664 rc = dlrStep(&right); | 1514 dlrStep(&right); |
1665 if( rc!=SQLITE_OK ) break; | |
1666 }else{ | 1515 }else{ |
1667 dlwAdd(&writer, dlrDocid(&left)); | 1516 dlwAdd(&writer, dlrDocid(&left)); |
1668 rc = dlrStep(&left); | 1517 dlrStep(&left); |
1669 if( rc!=SQLITE_OK ) break; | 1518 dlrStep(&right); |
1670 rc = dlrStep(&right); | |
1671 if( rc!=SQLITE_OK ) break; | |
1672 } | 1519 } |
1673 } | 1520 } |
1674 | 1521 |
1675 dlrDestroy(&left); | 1522 dlrDestroy(&left); |
1676 dlrDestroy(&right); | 1523 dlrDestroy(&right); |
1677 dlwDestroy(&writer); | 1524 dlwDestroy(&writer); |
1678 return rc; | |
1679 } | 1525 } |
1680 | 1526 |
1681 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1527 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
1682 ** Write the union of these two doclists into pOut as a | 1528 ** Write the union of these two doclists into pOut as a |
1683 ** DL_DOCIDS doclist. | 1529 ** DL_DOCIDS doclist. |
1684 */ | 1530 */ |
1685 static int docListOrMerge( | 1531 static void docListOrMerge( |
1686 const char *pLeft, int nLeft, | 1532 const char *pLeft, int nLeft, |
1687 const char *pRight, int nRight, | 1533 const char *pRight, int nRight, |
1688 DataBuffer *pOut /* Write the combined doclist here */ | 1534 DataBuffer *pOut /* Write the combined doclist here */ |
1689 ){ | 1535 ){ |
1690 DLReader left, right; | 1536 DLReader left, right; |
1691 DLWriter writer; | 1537 DLWriter writer; |
1692 int rc; | |
1693 | 1538 |
1694 if( nLeft==0 ){ | 1539 if( nLeft==0 ){ |
1695 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); | 1540 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); |
1696 return SQLITE_OK; | 1541 return; |
1697 } | 1542 } |
1698 if( nRight==0 ){ | 1543 if( nRight==0 ){ |
1699 dataBufferAppend(pOut, pLeft, nLeft); | 1544 dataBufferAppend(pOut, pLeft, nLeft); |
1700 return SQLITE_OK; | 1545 return; |
1701 } | 1546 } |
1702 | 1547 |
1703 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1548 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
1704 if( rc!=SQLITE_OK ) return rc; | 1549 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
1705 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
1706 if( rc!=SQLITE_OK ){ | |
1707 dlrDestroy(&left); | |
1708 return rc; | |
1709 } | |
1710 dlwInit(&writer, DL_DOCIDS, pOut); | 1550 dlwInit(&writer, DL_DOCIDS, pOut); |
1711 | 1551 |
1712 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ | 1552 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |
1713 if( dlrAtEnd(&right) ){ | 1553 if( dlrAtEnd(&right) ){ |
1714 dlwAdd(&writer, dlrDocid(&left)); | 1554 dlwAdd(&writer, dlrDocid(&left)); |
1715 rc = dlrStep(&left); | 1555 dlrStep(&left); |
1716 if( rc!=SQLITE_OK ) break; | |
1717 }else if( dlrAtEnd(&left) ){ | 1556 }else if( dlrAtEnd(&left) ){ |
1718 dlwAdd(&writer, dlrDocid(&right)); | 1557 dlwAdd(&writer, dlrDocid(&right)); |
1719 rc = dlrStep(&right); | 1558 dlrStep(&right); |
1720 if( rc!=SQLITE_OK ) break; | |
1721 }else if( dlrDocid(&left)<dlrDocid(&right) ){ | 1559 }else if( dlrDocid(&left)<dlrDocid(&right) ){ |
1722 dlwAdd(&writer, dlrDocid(&left)); | 1560 dlwAdd(&writer, dlrDocid(&left)); |
1723 rc = dlrStep(&left); | 1561 dlrStep(&left); |
1724 if( rc!=SQLITE_OK ) break; | |
1725 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1562 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
1726 dlwAdd(&writer, dlrDocid(&right)); | 1563 dlwAdd(&writer, dlrDocid(&right)); |
1727 rc = dlrStep(&right); | 1564 dlrStep(&right); |
1728 if( rc!=SQLITE_OK ) break; | |
1729 }else{ | 1565 }else{ |
1730 dlwAdd(&writer, dlrDocid(&left)); | 1566 dlwAdd(&writer, dlrDocid(&left)); |
1731 rc = dlrStep(&left); | 1567 dlrStep(&left); |
1732 if( rc!=SQLITE_OK ) break; | 1568 dlrStep(&right); |
1733 rc = dlrStep(&right); | |
1734 if( rc!=SQLITE_OK ) break; | |
1735 } | 1569 } |
1736 } | 1570 } |
1737 | 1571 |
1738 dlrDestroy(&left); | 1572 dlrDestroy(&left); |
1739 dlrDestroy(&right); | 1573 dlrDestroy(&right); |
1740 dlwDestroy(&writer); | 1574 dlwDestroy(&writer); |
1741 return rc; | |
1742 } | 1575 } |
1743 | 1576 |
1744 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1577 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
1745 ** Write into pOut as DL_DOCIDS doclist containing all documents that | 1578 ** Write into pOut as DL_DOCIDS doclist containing all documents that |
1746 ** occur in pLeft but not in pRight. | 1579 ** occur in pLeft but not in pRight. |
1747 */ | 1580 */ |
1748 static int docListExceptMerge( | 1581 static void docListExceptMerge( |
1749 const char *pLeft, int nLeft, | 1582 const char *pLeft, int nLeft, |
1750 const char *pRight, int nRight, | 1583 const char *pRight, int nRight, |
1751 DataBuffer *pOut /* Write the combined doclist here */ | 1584 DataBuffer *pOut /* Write the combined doclist here */ |
1752 ){ | 1585 ){ |
1753 DLReader left, right; | 1586 DLReader left, right; |
1754 DLWriter writer; | 1587 DLWriter writer; |
1755 int rc; | |
1756 | 1588 |
1757 if( nLeft==0 ) return SQLITE_OK; | 1589 if( nLeft==0 ) return; |
1758 if( nRight==0 ){ | 1590 if( nRight==0 ){ |
1759 dataBufferAppend(pOut, pLeft, nLeft); | 1591 dataBufferAppend(pOut, pLeft, nLeft); |
1760 return SQLITE_OK; | 1592 return; |
1761 } | 1593 } |
1762 | 1594 |
1763 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1595 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
1764 if( rc!=SQLITE_OK ) return rc; | 1596 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
1765 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
1766 if( rc!=SQLITE_OK ){ | |
1767 dlrDestroy(&left); | |
1768 return rc; | |
1769 } | |
1770 dlwInit(&writer, DL_DOCIDS, pOut); | 1597 dlwInit(&writer, DL_DOCIDS, pOut); |
1771 | 1598 |
1772 while( !dlrAtEnd(&left) ){ | 1599 while( !dlrAtEnd(&left) ){ |
1773 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ | 1600 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ |
1774 rc = dlrStep(&right); | 1601 dlrStep(&right); |
1775 if( rc!=SQLITE_OK ) goto err; | |
1776 } | 1602 } |
1777 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ | 1603 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ |
1778 dlwAdd(&writer, dlrDocid(&left)); | 1604 dlwAdd(&writer, dlrDocid(&left)); |
1779 } | 1605 } |
1780 rc = dlrStep(&left); | 1606 dlrStep(&left); |
1781 if( rc!=SQLITE_OK ) break; | |
1782 } | 1607 } |
1783 | 1608 |
1784 err: | |
1785 dlrDestroy(&left); | 1609 dlrDestroy(&left); |
1786 dlrDestroy(&right); | 1610 dlrDestroy(&right); |
1787 dlwDestroy(&writer); | 1611 dlwDestroy(&writer); |
1788 return rc; | |
1789 } | 1612 } |
1790 | 1613 |
1791 static char *string_dup_n(const char *s, int n){ | 1614 static char *string_dup_n(const char *s, int n){ |
1792 char *str = sqlite3_malloc(n + 1); | 1615 char *str = sqlite3_malloc(n + 1); |
1793 memcpy(str, s, n); | 1616 memcpy(str, s, n); |
1794 str[n] = '\0'; | 1617 str[n] = '\0'; |
1795 return str; | 1618 return str; |
1796 } | 1619 } |
1797 | 1620 |
1798 /* Duplicate a string; the caller must free() the returned string. | 1621 /* Duplicate a string; the caller must free() the returned string. |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1982 /* CONTENT_EXISTS */ "select rowid from %_content limit 1", | 1805 /* CONTENT_EXISTS */ "select rowid from %_content limit 1", |
1983 | 1806 |
1984 /* BLOCK_INSERT */ "insert into %_segments values (?)", | 1807 /* BLOCK_INSERT */ "insert into %_segments values (?)", |
1985 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", | 1808 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", |
1986 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", | 1809 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", |
1987 /* BLOCK_DELETE_ALL */ "delete from %_segments", | 1810 /* BLOCK_DELETE_ALL */ "delete from %_segments", |
1988 | 1811 |
1989 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", | 1812 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", |
1990 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", | 1813 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", |
1991 /* SEGDIR_SELECT_LEVEL */ | 1814 /* SEGDIR_SELECT_LEVEL */ |
1992 "select start_block, leaves_end_block, root, idx from %_segdir " | 1815 "select start_block, leaves_end_block, root from %_segdir " |
1993 " where level = ? order by idx", | 1816 " where level = ? order by idx", |
1994 /* SEGDIR_SPAN */ | 1817 /* SEGDIR_SPAN */ |
1995 "select min(start_block), max(end_block) from %_segdir " | 1818 "select min(start_block), max(end_block) from %_segdir " |
1996 " where level = ? and start_block <> 0", | 1819 " where level = ? and start_block <> 0", |
1997 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", | 1820 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", |
1998 | 1821 |
1999 /* NOTE(shess): The first three results of the following two | 1822 /* NOTE(shess): The first three results of the following two |
2000 ** statements must match. | 1823 ** statements must match. |
2001 */ | 1824 */ |
2002 /* SEGDIR_SELECT_SEGMENT */ | 1825 /* SEGDIR_SELECT_SEGMENT */ |
(...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3581 } | 3404 } |
3582 } else { /* full-text query */ | 3405 } else { /* full-text query */ |
3583 rc = sqlite3_reset(c->pStmt); | 3406 rc = sqlite3_reset(c->pStmt); |
3584 if( rc!=SQLITE_OK ) return rc; | 3407 if( rc!=SQLITE_OK ) return rc; |
3585 | 3408 |
3586 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ | 3409 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ |
3587 c->eof = 1; | 3410 c->eof = 1; |
3588 return SQLITE_OK; | 3411 return SQLITE_OK; |
3589 } | 3412 } |
3590 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); | 3413 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); |
3591 if( rc!=SQLITE_OK ) return rc; | 3414 dlrStep(&c->reader); |
3592 rc = dlrStep(&c->reader); | |
3593 if( rc!=SQLITE_OK ) return rc; | 3415 if( rc!=SQLITE_OK ) return rc; |
3594 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ | 3416 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |
3595 rc = sqlite3_step(c->pStmt); | 3417 rc = sqlite3_step(c->pStmt); |
3596 if( rc==SQLITE_ROW ){ /* the case we expect */ | 3418 if( rc==SQLITE_ROW ){ /* the case we expect */ |
3597 c->eof = 0; | 3419 c->eof = 0; |
3598 return SQLITE_OK; | 3420 return SQLITE_OK; |
3599 } | 3421 } |
3600 | 3422 /* an error occurred; abort */ |
3601 /* Corrupt if the index refers to missing document. */ | 3423 return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |
3602 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; | |
3603 | |
3604 return rc; | |
3605 } | 3424 } |
3606 } | 3425 } |
3607 | 3426 |
3608 | 3427 |
3609 /* TODO(shess) If we pushed LeafReader to the top of the file, or to | 3428 /* TODO(shess) If we pushed LeafReader to the top of the file, or to |
3610 ** another file, term_select() could be pushed above | 3429 ** another file, term_select() could be pushed above |
3611 ** docListOfTerm(). | 3430 ** docListOfTerm(). |
3612 */ | 3431 */ |
3613 static int termSelect(fulltext_vtab *v, int iColumn, | 3432 static int termSelect(fulltext_vtab *v, int iColumn, |
3614 const char *pTerm, int nTerm, int isPrefix, | 3433 const char *pTerm, int nTerm, int isPrefix, |
(...skipping 27 matching lines...) Expand all Loading... |
3642 if( rc ) return rc; | 3461 if( rc ) return rc; |
3643 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ | 3462 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ |
3644 dataBufferInit(&right, 0); | 3463 dataBufferInit(&right, 0); |
3645 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, | 3464 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, |
3646 pQTerm[i].isPrefix, DL_POSITIONS, &right); | 3465 pQTerm[i].isPrefix, DL_POSITIONS, &right); |
3647 if( rc ){ | 3466 if( rc ){ |
3648 dataBufferDestroy(&left); | 3467 dataBufferDestroy(&left); |
3649 return rc; | 3468 return rc; |
3650 } | 3469 } |
3651 dataBufferInit(&new, 0); | 3470 dataBufferInit(&new, 0); |
3652 rc = docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, | 3471 docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, |
3653 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); | 3472 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); |
3654 dataBufferDestroy(&left); | 3473 dataBufferDestroy(&left); |
3655 dataBufferDestroy(&right); | 3474 dataBufferDestroy(&right); |
3656 if( rc!=SQLITE_OK ){ | |
3657 dataBufferDestroy(&new); | |
3658 return rc; | |
3659 } | |
3660 left = new; | 3475 left = new; |
3661 } | 3476 } |
3662 *pResult = left; | 3477 *pResult = left; |
3663 return rc; | 3478 return SQLITE_OK; |
3664 } | 3479 } |
3665 | 3480 |
3666 /* Add a new term pTerm[0..nTerm-1] to the query *q. | 3481 /* Add a new term pTerm[0..nTerm-1] to the query *q. |
3667 */ | 3482 */ |
3668 static void queryAdd(Query *q, const char *pTerm, int nTerm){ | 3483 static void queryAdd(Query *q, const char *pTerm, int nTerm){ |
3669 QueryTerm *t; | 3484 QueryTerm *t; |
3670 ++q->nTerms; | 3485 ++q->nTerms; |
3671 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); | 3486 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); |
3672 if( q->pTerms==0 ){ | 3487 if( q->pTerms==0 ){ |
3673 q->nTerms = 0; | 3488 q->nTerms = 0; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3720 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ | 3535 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ |
3721 const char *pSegment, int nSegment, /* Query expression being parsed */ | 3536 const char *pSegment, int nSegment, /* Query expression being parsed */ |
3722 int inPhrase, /* True if within "..." */ | 3537 int inPhrase, /* True if within "..." */ |
3723 Query *pQuery /* Append results here */ | 3538 Query *pQuery /* Append results here */ |
3724 ){ | 3539 ){ |
3725 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; | 3540 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; |
3726 sqlite3_tokenizer_cursor *pCursor; | 3541 sqlite3_tokenizer_cursor *pCursor; |
3727 int firstIndex = pQuery->nTerms; | 3542 int firstIndex = pQuery->nTerms; |
3728 int iCol; | 3543 int iCol; |
3729 int nTerm = 1; | 3544 int nTerm = 1; |
3730 int iEndLast = -1; | |
3731 | 3545 |
3732 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); | 3546 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); |
3733 if( rc!=SQLITE_OK ) return rc; | 3547 if( rc!=SQLITE_OK ) return rc; |
3734 pCursor->pTokenizer = pTokenizer; | 3548 pCursor->pTokenizer = pTokenizer; |
3735 | 3549 |
3736 while( 1 ){ | 3550 while( 1 ){ |
3737 const char *pToken; | 3551 const char *pToken; |
3738 int nToken, iBegin, iEnd, iPos; | 3552 int nToken, iBegin, iEnd, iPos; |
3739 | 3553 |
3740 rc = pModule->xNext(pCursor, | 3554 rc = pModule->xNext(pCursor, |
3741 &pToken, &nToken, | 3555 &pToken, &nToken, |
3742 &iBegin, &iEnd, &iPos); | 3556 &iBegin, &iEnd, &iPos); |
3743 if( rc!=SQLITE_OK ) break; | 3557 if( rc!=SQLITE_OK ) break; |
3744 if( !inPhrase && | 3558 if( !inPhrase && |
3745 pSegment[iEnd]==':' && | 3559 pSegment[iEnd]==':' && |
3746 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ | 3560 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ |
3747 pQuery->nextColumn = iCol; | 3561 pQuery->nextColumn = iCol; |
3748 continue; | 3562 continue; |
3749 } | 3563 } |
3750 if( !inPhrase && pQuery->nTerms>0 && nToken==2 | 3564 if( !inPhrase && pQuery->nTerms>0 && nToken==2 |
3751 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ | 3565 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ |
3752 pQuery->nextIsOr = 1; | 3566 pQuery->nextIsOr = 1; |
3753 continue; | 3567 continue; |
3754 } | 3568 } |
3755 | |
3756 /* | |
3757 * The ICU tokenizer considers '*' a break character, so the code below | |
3758 * sets isPrefix correctly, but since that code doesn't eat the '*', the | |
3759 * ICU tokenizer returns it as the next token. So eat it here until a | |
3760 * better solution presents itself. | |
3761 */ | |
3762 if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && | |
3763 iEndLast==iBegin){ | |
3764 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | |
3765 continue; | |
3766 } | |
3767 iEndLast = iEnd; | |
3768 | |
3769 queryAdd(pQuery, pToken, nToken); | 3569 queryAdd(pQuery, pToken, nToken); |
3770 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ | 3570 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ |
3771 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; | 3571 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; |
3772 } | 3572 } |
3773 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ | 3573 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ |
3774 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | 3574 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; |
3775 } | 3575 } |
3776 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; | 3576 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; |
3777 if( inPhrase ){ | 3577 if( inPhrase ){ |
3778 nTerm++; | 3578 nTerm++; |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3898 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ | 3698 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ |
3899 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); | 3699 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); |
3900 iNext += aTerm[iNext].nPhrase + 1; | 3700 iNext += aTerm[iNext].nPhrase + 1; |
3901 if( rc ){ | 3701 if( rc ){ |
3902 if( i!=nNot ) dataBufferDestroy(&left); | 3702 if( i!=nNot ) dataBufferDestroy(&left); |
3903 dataBufferDestroy(&right); | 3703 dataBufferDestroy(&right); |
3904 queryClear(pQuery); | 3704 queryClear(pQuery); |
3905 return rc; | 3705 return rc; |
3906 } | 3706 } |
3907 dataBufferInit(&new, 0); | 3707 dataBufferInit(&new, 0); |
3908 rc = docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); | 3708 docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); |
3909 dataBufferDestroy(&right); | 3709 dataBufferDestroy(&right); |
3910 dataBufferDestroy(&or); | 3710 dataBufferDestroy(&or); |
3911 if( rc!=SQLITE_OK ){ | |
3912 if( i!=nNot ) dataBufferDestroy(&left); | |
3913 queryClear(pQuery); | |
3914 dataBufferDestroy(&new); | |
3915 return rc; | |
3916 } | |
3917 right = new; | 3711 right = new; |
3918 } | 3712 } |
3919 if( i==nNot ){ /* first term processed. */ | 3713 if( i==nNot ){ /* first term processed. */ |
3920 left = right; | 3714 left = right; |
3921 }else{ | 3715 }else{ |
3922 dataBufferInit(&new, 0); | 3716 dataBufferInit(&new, 0); |
3923 rc = docListAndMerge(left.pData, left.nData, | 3717 docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new); |
3924 right.pData, right.nData, &new); | |
3925 dataBufferDestroy(&right); | 3718 dataBufferDestroy(&right); |
3926 dataBufferDestroy(&left); | 3719 dataBufferDestroy(&left); |
3927 if( rc!=SQLITE_OK ){ | |
3928 queryClear(pQuery); | |
3929 dataBufferDestroy(&new); | |
3930 return rc; | |
3931 } | |
3932 left = new; | 3720 left = new; |
3933 } | 3721 } |
3934 } | 3722 } |
3935 | 3723 |
3936 if( nNot==pQuery->nTerms ){ | 3724 if( nNot==pQuery->nTerms ){ |
3937 /* We do not yet know how to handle a query of only NOT terms */ | 3725 /* We do not yet know how to handle a query of only NOT terms */ |
3938 return SQLITE_ERROR; | 3726 return SQLITE_ERROR; |
3939 } | 3727 } |
3940 | 3728 |
3941 /* Do the EXCEPT terms */ | 3729 /* Do the EXCEPT terms */ |
3942 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){ | 3730 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){ |
3943 if( !aTerm[i].isNot ) continue; | 3731 if( !aTerm[i].isNot ) continue; |
3944 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); | 3732 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); |
3945 if( rc ){ | 3733 if( rc ){ |
3946 queryClear(pQuery); | 3734 queryClear(pQuery); |
3947 dataBufferDestroy(&left); | 3735 dataBufferDestroy(&left); |
3948 return rc; | 3736 return rc; |
3949 } | 3737 } |
3950 dataBufferInit(&new, 0); | 3738 dataBufferInit(&new, 0); |
3951 rc = docListExceptMerge(left.pData, left.nData, | 3739 docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new); |
3952 right.pData, right.nData, &new); | |
3953 dataBufferDestroy(&right); | 3740 dataBufferDestroy(&right); |
3954 dataBufferDestroy(&left); | 3741 dataBufferDestroy(&left); |
3955 if( rc!=SQLITE_OK ){ | |
3956 queryClear(pQuery); | |
3957 dataBufferDestroy(&new); | |
3958 return rc; | |
3959 } | |
3960 left = new; | 3742 left = new; |
3961 } | 3743 } |
3962 | 3744 |
3963 *pResult = left; | 3745 *pResult = left; |
3964 return rc; | 3746 return rc; |
3965 } | 3747 } |
3966 | 3748 |
3967 /* | 3749 /* |
3968 ** This is the xFilter interface for the virtual table. See | 3750 ** This is the xFilter interface for the virtual table. See |
3969 ** the virtual table xFilter method documentation for additional | 3751 ** the virtual table xFilter method documentation for additional |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4043 if( c->result.nData!=0 ){ | 3825 if( c->result.nData!=0 ){ |
4044 /* This case happens if the same cursor is used repeatedly. */ | 3826 /* This case happens if the same cursor is used repeatedly. */ |
4045 dlrDestroy(&c->reader); | 3827 dlrDestroy(&c->reader); |
4046 dataBufferReset(&c->result); | 3828 dataBufferReset(&c->result); |
4047 }else{ | 3829 }else{ |
4048 dataBufferInit(&c->result, 0); | 3830 dataBufferInit(&c->result, 0); |
4049 } | 3831 } |
4050 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q
); | 3832 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q
); |
4051 if( rc!=SQLITE_OK ) return rc; | 3833 if( rc!=SQLITE_OK ) return rc; |
4052 if( c->result.nData!=0 ){ | 3834 if( c->result.nData!=0 ){ |
4053 rc = dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); | 3835 dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); |
4054 if( rc!=SQLITE_OK ) return rc; | |
4055 } | 3836 } |
4056 break; | 3837 break; |
4057 } | 3838 } |
4058 } | 3839 } |
4059 | 3840 |
4060 return fulltextNext(pCursor); | 3841 return fulltextNext(pCursor); |
4061 } | 3842 } |
4062 | 3843 |
4063 /* This is the xEof method of the virtual table. The SQLite core | 3844 /* This is the xEof method of the virtual table. The SQLite core |
4064 ** calls this routine to find out if it has reached the end of | 3845 ** calls this routine to find out if it has reached the end of |
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4545 DataBuffer term; /* previous term, for decoding term delta. */ | 4326 DataBuffer term; /* previous term, for decoding term delta. */ |
4546 | 4327 |
4547 sqlite_int64 iBlockid; | 4328 sqlite_int64 iBlockid; |
4548 } InteriorReader; | 4329 } InteriorReader; |
4549 | 4330 |
4550 static void interiorReaderDestroy(InteriorReader *pReader){ | 4331 static void interiorReaderDestroy(InteriorReader *pReader){ |
4551 dataBufferDestroy(&pReader->term); | 4332 dataBufferDestroy(&pReader->term); |
4552 SCRAMBLE(pReader); | 4333 SCRAMBLE(pReader); |
4553 } | 4334 } |
4554 | 4335 |
4555 static int interiorReaderInit(const char *pData, int nData, | 4336 /* TODO(shess) The assertions are great, but what if we're in NDEBUG |
4556 InteriorReader *pReader){ | 4337 ** and the blob is empty or otherwise contains suspect data? |
| 4338 */ |
| 4339 static void interiorReaderInit(const char *pData, int nData, |
| 4340 InteriorReader *pReader){ |
4557 int n, nTerm; | 4341 int n, nTerm; |
4558 | 4342 |
4559 /* These conditions are checked and met by the callers. */ | 4343 /* Require at least the leading flag byte */ |
4560 assert( nData>0 ); | 4344 assert( nData>0 ); |
4561 assert( pData[0]!='\0' ); | 4345 assert( pData[0]!='\0' ); |
4562 | 4346 |
4563 CLEAR(pReader); | 4347 CLEAR(pReader); |
4564 | 4348 |
4565 /* Decode the base blockid, and set the cursor to the first term. */ | 4349 /* Decode the base blockid, and set the cursor to the first term. */ |
4566 n = getVarintSafe(pData+1, &pReader->iBlockid, nData-1); | 4350 n = getVarint(pData+1, &pReader->iBlockid); |
4567 if( !n ) return SQLITE_CORRUPT_BKPT; | 4351 assert( 1+n<=nData ); |
4568 pReader->pData = pData+1+n; | 4352 pReader->pData = pData+1+n; |
4569 pReader->nData = nData-(1+n); | 4353 pReader->nData = nData-(1+n); |
4570 | 4354 |
4571 /* A single-child interior node (such as when a leaf node was too | 4355 /* A single-child interior node (such as when a leaf node was too |
4572 ** large for the segment directory) won't have any terms. | 4356 ** large for the segment directory) won't have any terms. |
4573 ** Otherwise, decode the first term. | 4357 ** Otherwise, decode the first term. |
4574 */ | 4358 */ |
4575 if( pReader->nData==0 ){ | 4359 if( pReader->nData==0 ){ |
4576 dataBufferInit(&pReader->term, 0); | 4360 dataBufferInit(&pReader->term, 0); |
4577 }else{ | 4361 }else{ |
4578 n = getVarint32Safe(pReader->pData, &nTerm, pReader->nData); | 4362 n = getVarint32(pReader->pData, &nTerm); |
4579 if( !n || nTerm<0 || nTerm>pReader->nData-n) return SQLITE_CORRUPT_BKPT; | |
4580 dataBufferInit(&pReader->term, nTerm); | 4363 dataBufferInit(&pReader->term, nTerm); |
4581 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); | 4364 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); |
| 4365 assert( n+nTerm<=pReader->nData ); |
4582 pReader->pData += n+nTerm; | 4366 pReader->pData += n+nTerm; |
4583 pReader->nData -= n+nTerm; | 4367 pReader->nData -= n+nTerm; |
4584 } | 4368 } |
4585 return SQLITE_OK; | |
4586 } | 4369 } |
4587 | 4370 |
4588 static int interiorReaderAtEnd(InteriorReader *pReader){ | 4371 static int interiorReaderAtEnd(InteriorReader *pReader){ |
4589 return pReader->term.nData<=0; | 4372 return pReader->term.nData==0; |
4590 } | 4373 } |
4591 | 4374 |
4592 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ | 4375 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ |
4593 return pReader->iBlockid; | 4376 return pReader->iBlockid; |
4594 } | 4377 } |
4595 | 4378 |
4596 static int interiorReaderTermBytes(InteriorReader *pReader){ | 4379 static int interiorReaderTermBytes(InteriorReader *pReader){ |
4597 assert( !interiorReaderAtEnd(pReader) ); | 4380 assert( !interiorReaderAtEnd(pReader) ); |
4598 return pReader->term.nData; | 4381 return pReader->term.nData; |
4599 } | 4382 } |
4600 static const char *interiorReaderTerm(InteriorReader *pReader){ | 4383 static const char *interiorReaderTerm(InteriorReader *pReader){ |
4601 assert( !interiorReaderAtEnd(pReader) ); | 4384 assert( !interiorReaderAtEnd(pReader) ); |
4602 return pReader->term.pData; | 4385 return pReader->term.pData; |
4603 } | 4386 } |
4604 | 4387 |
4605 /* Step forward to the next term in the node. */ | 4388 /* Step forward to the next term in the node. */ |
4606 static int interiorReaderStep(InteriorReader *pReader){ | 4389 static void interiorReaderStep(InteriorReader *pReader){ |
4607 assert( !interiorReaderAtEnd(pReader) ); | 4390 assert( !interiorReaderAtEnd(pReader) ); |
4608 | 4391 |
4609 /* If the last term has been read, signal eof, else construct the | 4392 /* If the last term has been read, signal eof, else construct the |
4610 ** next term. | 4393 ** next term. |
4611 */ | 4394 */ |
4612 if( pReader->nData==0 ){ | 4395 if( pReader->nData==0 ){ |
4613 dataBufferReset(&pReader->term); | 4396 dataBufferReset(&pReader->term); |
4614 }else{ | 4397 }else{ |
4615 int n, nPrefix, nSuffix; | 4398 int n, nPrefix, nSuffix; |
4616 | 4399 |
4617 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); | 4400 n = getVarint32(pReader->pData, &nPrefix); |
4618 if( !n ) return SQLITE_CORRUPT_BKPT; | 4401 n += getVarint32(pReader->pData+n, &nSuffix); |
4619 pReader->nData -= n; | |
4620 pReader->pData += n; | |
4621 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); | |
4622 if( !n ) return SQLITE_CORRUPT_BKPT; | |
4623 pReader->nData -= n; | |
4624 pReader->pData += n; | |
4625 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; | |
4626 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; | |
4627 | 4402 |
4628 /* Truncate the current term and append suffix data. */ | 4403 /* Truncate the current term and append suffix data. */ |
4629 pReader->term.nData = nPrefix; | 4404 pReader->term.nData = nPrefix; |
4630 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); | 4405 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); |
4631 | 4406 |
4632 pReader->pData += nSuffix; | 4407 assert( n+nSuffix<=pReader->nData ); |
4633 pReader->nData -= nSuffix; | 4408 pReader->pData += n+nSuffix; |
| 4409 pReader->nData -= n+nSuffix; |
4634 } | 4410 } |
4635 pReader->iBlockid++; | 4411 pReader->iBlockid++; |
4636 return SQLITE_OK; | |
4637 } | 4412 } |
4638 | 4413 |
4639 /* Compare the current term to pTerm[nTerm], returning strcmp-style | 4414 /* Compare the current term to pTerm[nTerm], returning strcmp-style |
4640 ** results. If isPrefix, equality means equal through nTerm bytes. | 4415 ** results. If isPrefix, equality means equal through nTerm bytes. |
4641 */ | 4416 */ |
4642 static int interiorReaderTermCmp(InteriorReader *pReader, | 4417 static int interiorReaderTermCmp(InteriorReader *pReader, |
4643 const char *pTerm, int nTerm, int isPrefix){ | 4418 const char *pTerm, int nTerm, int isPrefix){ |
4644 const char *pReaderTerm = interiorReaderTerm(pReader); | 4419 const char *pReaderTerm = interiorReaderTerm(pReader); |
4645 int nReaderTerm = interiorReaderTermBytes(pReader); | 4420 int nReaderTerm = interiorReaderTermBytes(pReader); |
4646 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; | 4421 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; |
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4998 | 4773 |
4999 /* Estimate the length of the merged doclist so we can leave space | 4774 /* Estimate the length of the merged doclist so we can leave space |
5000 ** to encode it. | 4775 ** to encode it. |
5001 */ | 4776 */ |
5002 for(i=0, nData=0; i<nReaders; i++){ | 4777 for(i=0, nData=0; i<nReaders; i++){ |
5003 nData += dlrAllDataBytes(&pReaders[i]); | 4778 nData += dlrAllDataBytes(&pReaders[i]); |
5004 } | 4779 } |
5005 n = putVarint(c, nData); | 4780 n = putVarint(c, nData); |
5006 dataBufferAppend(&pWriter->data, c, n); | 4781 dataBufferAppend(&pWriter->data, c, n); |
5007 | 4782 |
5008 rc = docListMerge(&pWriter->data, pReaders, nReaders); | 4783 docListMerge(&pWriter->data, pReaders, nReaders); |
5009 if( rc!= SQLITE_OK ) return rc; | |
5010 ASSERT_VALID_DOCLIST(DL_DEFAULT, | 4784 ASSERT_VALID_DOCLIST(DL_DEFAULT, |
5011 pWriter->data.pData+iDoclistData+n, | 4785 pWriter->data.pData+iDoclistData+n, |
5012 pWriter->data.nData-iDoclistData-n, NULL); | 4786 pWriter->data.nData-iDoclistData-n, NULL); |
5013 | 4787 |
5014 /* The actual amount of doclist data at this point could be smaller | 4788 /* The actual amount of doclist data at this point could be smaller |
5015 ** than the length we encoded. Additionally, the space required to | 4789 ** than the length we encoded. Additionally, the space required to |
5016 ** encode this length could be smaller. For small doclists, this is | 4790 ** encode this length could be smaller. For small doclists, this is |
5017 ** not a big deal, we can just use memmove() to adjust things. | 4791 ** not a big deal, we can just use memmove() to adjust things. |
5018 */ | 4792 */ |
5019 nActualData = pWriter->data.nData-(iDoclistData+n); | 4793 nActualData = pWriter->data.nData-(iDoclistData+n); |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5109 */ | 4883 */ |
5110 /* TODO(shess) Revise writeZeroSegment() so that doclists are | 4884 /* TODO(shess) Revise writeZeroSegment() so that doclists are |
5111 ** constructed directly in pWriter->data. | 4885 ** constructed directly in pWriter->data. |
5112 */ | 4886 */ |
5113 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, | 4887 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, |
5114 const char *pTerm, int nTerm, | 4888 const char *pTerm, int nTerm, |
5115 const char *pData, int nData){ | 4889 const char *pData, int nData){ |
5116 int rc; | 4890 int rc; |
5117 DLReader reader; | 4891 DLReader reader; |
5118 | 4892 |
5119 rc = dlrInit(&reader, DL_DEFAULT, pData, nData); | 4893 dlrInit(&reader, DL_DEFAULT, pData, nData); |
5120 if( rc!=SQLITE_OK ) return rc; | |
5121 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); | 4894 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); |
5122 dlrDestroy(&reader); | 4895 dlrDestroy(&reader); |
5123 | 4896 |
5124 return rc; | 4897 return rc; |
5125 } | 4898 } |
5126 | 4899 |
5127 | 4900 |
5128 /****************************************************************/ | 4901 /****************************************************************/ |
5129 /* LeafReader is used to iterate over an individual leaf node. */ | 4902 /* LeafReader is used to iterate over an individual leaf node. */ |
5130 typedef struct LeafReader { | 4903 typedef struct LeafReader { |
(...skipping 24 matching lines...) Expand all Loading... |
5155 /* Access the doclist data for the current term. */ | 4928 /* Access the doclist data for the current term. */ |
5156 static int leafReaderDataBytes(LeafReader *pReader){ | 4929 static int leafReaderDataBytes(LeafReader *pReader){ |
5157 int nData; | 4930 int nData; |
5158 assert( pReader->term.nData>0 ); | 4931 assert( pReader->term.nData>0 ); |
5159 getVarint32(pReader->pData, &nData); | 4932 getVarint32(pReader->pData, &nData); |
5160 return nData; | 4933 return nData; |
5161 } | 4934 } |
5162 static const char *leafReaderData(LeafReader *pReader){ | 4935 static const char *leafReaderData(LeafReader *pReader){ |
5163 int n, nData; | 4936 int n, nData; |
5164 assert( pReader->term.nData>0 ); | 4937 assert( pReader->term.nData>0 ); |
5165 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); | 4938 n = getVarint32(pReader->pData, &nData); |
5166 if( !n || nData>pReader->nData-n ) return NULL; | |
5167 return pReader->pData+n; | 4939 return pReader->pData+n; |
5168 } | 4940 } |
5169 | 4941 |
5170 static int leafReaderInit(const char *pData, int nData, LeafReader *pReader){ | 4942 static void leafReaderInit(const char *pData, int nData, |
| 4943 LeafReader *pReader){ |
5171 int nTerm, n; | 4944 int nTerm, n; |
5172 | 4945 |
5173 /* All callers check this precondition. */ | |
5174 assert( nData>0 ); | 4946 assert( nData>0 ); |
5175 assert( pData[0]=='\0' ); | 4947 assert( pData[0]=='\0' ); |
5176 | 4948 |
5177 CLEAR(pReader); | 4949 CLEAR(pReader); |
5178 | 4950 |
5179 /* Read the first term, skipping the header byte. */ | 4951 /* Read the first term, skipping the header byte. */ |
5180 n = getVarint32Safe(pData+1, &nTerm, nData-1); | 4952 n = getVarint32(pData+1, &nTerm); |
5181 if( !n || nTerm<0 || nTerm>nData-1-n ) return SQLITE_CORRUPT_BKPT; | |
5182 dataBufferInit(&pReader->term, nTerm); | 4953 dataBufferInit(&pReader->term, nTerm); |
5183 dataBufferReplace(&pReader->term, pData+1+n, nTerm); | 4954 dataBufferReplace(&pReader->term, pData+1+n, nTerm); |
5184 | 4955 |
5185 /* Position after the first term. */ | 4956 /* Position after the first term. */ |
| 4957 assert( 1+n+nTerm<nData ); |
5186 pReader->pData = pData+1+n+nTerm; | 4958 pReader->pData = pData+1+n+nTerm; |
5187 pReader->nData = nData-1-n-nTerm; | 4959 pReader->nData = nData-1-n-nTerm; |
5188 return SQLITE_OK; | |
5189 } | 4960 } |
5190 | 4961 |
5191 /* Step the reader forward to the next term. */ | 4962 /* Step the reader forward to the next term. */ |
5192 static int leafReaderStep(LeafReader *pReader){ | 4963 static void leafReaderStep(LeafReader *pReader){ |
5193 int n, nData, nPrefix, nSuffix; | 4964 int n, nData, nPrefix, nSuffix; |
5194 assert( !leafReaderAtEnd(pReader) ); | 4965 assert( !leafReaderAtEnd(pReader) ); |
5195 | 4966 |
5196 /* Skip previous entry's data block. */ | 4967 /* Skip previous entry's data block. */ |
5197 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); | 4968 n = getVarint32(pReader->pData, &nData); |
5198 if( !n || nData<0 || nData>pReader->nData-n ) return SQLITE_CORRUPT_BKPT; | 4969 assert( n+nData<=pReader->nData ); |
5199 pReader->pData += n+nData; | 4970 pReader->pData += n+nData; |
5200 pReader->nData -= n+nData; | 4971 pReader->nData -= n+nData; |
5201 | 4972 |
5202 if( !leafReaderAtEnd(pReader) ){ | 4973 if( !leafReaderAtEnd(pReader) ){ |
5203 /* Construct the new term using a prefix from the old term plus a | 4974 /* Construct the new term using a prefix from the old term plus a |
5204 ** suffix from the leaf data. | 4975 ** suffix from the leaf data. |
5205 */ | 4976 */ |
5206 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); | 4977 n = getVarint32(pReader->pData, &nPrefix); |
5207 if( !n ) return SQLITE_CORRUPT_BKPT; | 4978 n += getVarint32(pReader->pData+n, &nSuffix); |
5208 pReader->nData -= n; | 4979 assert( n+nSuffix<pReader->nData ); |
5209 pReader->pData += n; | |
5210 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); | |
5211 if( !n ) return SQLITE_CORRUPT_BKPT; | |
5212 pReader->nData -= n; | |
5213 pReader->pData += n; | |
5214 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; | |
5215 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; | |
5216 pReader->term.nData = nPrefix; | 4980 pReader->term.nData = nPrefix; |
5217 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); | 4981 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); |
5218 | 4982 |
5219 pReader->pData += nSuffix; | 4983 pReader->pData += n+nSuffix; |
5220 pReader->nData -= nSuffix; | 4984 pReader->nData -= n+nSuffix; |
5221 } | 4985 } |
5222 return SQLITE_OK; | |
5223 } | 4986 } |
5224 | 4987 |
5225 /* strcmp-style comparison of pReader's current term against pTerm. | 4988 /* strcmp-style comparison of pReader's current term against pTerm. |
5226 ** If isPrefix, equality means equal through nTerm bytes. | 4989 ** If isPrefix, equality means equal through nTerm bytes. |
5227 */ | 4990 */ |
5228 static int leafReaderTermCmp(LeafReader *pReader, | 4991 static int leafReaderTermCmp(LeafReader *pReader, |
5229 const char *pTerm, int nTerm, int isPrefix){ | 4992 const char *pTerm, int nTerm, int isPrefix){ |
5230 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; | 4993 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; |
5231 if( n==0 ){ | 4994 if( n==0 ){ |
5232 if( pReader->term.nData>0 ) return -1; | 4995 if( pReader->term.nData>0 ) return -1; |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5305 } | 5068 } |
5306 leafReaderDestroy(&pReader->leafReader); | 5069 leafReaderDestroy(&pReader->leafReader); |
5307 dataBufferDestroy(&pReader->rootData); | 5070 dataBufferDestroy(&pReader->rootData); |
5308 SCRAMBLE(pReader); | 5071 SCRAMBLE(pReader); |
5309 } | 5072 } |
5310 | 5073 |
5311 /* Initialize pReader with the given root data (if iStartBlockid==0 | 5074 /* Initialize pReader with the given root data (if iStartBlockid==0 |
5312 ** the leaf data was entirely contained in the root), or from the | 5075 ** the leaf data was entirely contained in the root), or from the |
5313 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. | 5076 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. |
5314 */ | 5077 */ |
5315 /* TODO(shess): Figure out a means of indicating how many leaves are | |
5316 ** expected, for purposes of detecting corruption. | |
5317 */ | |
5318 static int leavesReaderInit(fulltext_vtab *v, | 5078 static int leavesReaderInit(fulltext_vtab *v, |
5319 int idx, | 5079 int idx, |
5320 sqlite_int64 iStartBlockid, | 5080 sqlite_int64 iStartBlockid, |
5321 sqlite_int64 iEndBlockid, | 5081 sqlite_int64 iEndBlockid, |
5322 const char *pRootData, int nRootData, | 5082 const char *pRootData, int nRootData, |
5323 LeavesReader *pReader){ | 5083 LeavesReader *pReader){ |
5324 CLEAR(pReader); | 5084 CLEAR(pReader); |
5325 pReader->idx = idx; | 5085 pReader->idx = idx; |
5326 | 5086 |
5327 dataBufferInit(&pReader->rootData, 0); | 5087 dataBufferInit(&pReader->rootData, 0); |
5328 if( iStartBlockid==0 ){ | 5088 if( iStartBlockid==0 ){ |
5329 int rc; | |
5330 /* Corrupt if this can't be a leaf node. */ | |
5331 if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){ | |
5332 return SQLITE_CORRUPT_BKPT; | |
5333 } | |
5334 /* Entire leaf level fit in root data. */ | 5089 /* Entire leaf level fit in root data. */ |
5335 dataBufferReplace(&pReader->rootData, pRootData, nRootData); | 5090 dataBufferReplace(&pReader->rootData, pRootData, nRootData); |
5336 rc = leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, | 5091 leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, |
5337 &pReader->leafReader); | 5092 &pReader->leafReader); |
5338 if( rc!=SQLITE_OK ){ | |
5339 dataBufferDestroy(&pReader->rootData); | |
5340 return rc; | |
5341 } | |
5342 }else{ | 5093 }else{ |
5343 sqlite3_stmt *s; | 5094 sqlite3_stmt *s; |
5344 int rc = sql_get_leaf_statement(v, idx, &s); | 5095 int rc = sql_get_leaf_statement(v, idx, &s); |
5345 if( rc!=SQLITE_OK ) return rc; | 5096 if( rc!=SQLITE_OK ) return rc; |
5346 | 5097 |
5347 rc = sqlite3_bind_int64(s, 1, iStartBlockid); | 5098 rc = sqlite3_bind_int64(s, 1, iStartBlockid); |
5348 if( rc!=SQLITE_OK ) goto err; | 5099 if( rc!=SQLITE_OK ) return rc; |
5349 | 5100 |
5350 rc = sqlite3_bind_int64(s, 2, iEndBlockid); | 5101 rc = sqlite3_bind_int64(s, 2, iEndBlockid); |
5351 if( rc!=SQLITE_OK ) goto err; | 5102 if( rc!=SQLITE_OK ) return rc; |
5352 | 5103 |
5353 rc = sqlite3_step(s); | 5104 rc = sqlite3_step(s); |
5354 | |
5355 /* Corrupt if interior node referenced missing leaf node. */ | |
5356 if( rc==SQLITE_DONE ){ | 5105 if( rc==SQLITE_DONE ){ |
5357 rc = SQLITE_CORRUPT_BKPT; | 5106 pReader->eof = 1; |
5358 goto err; | 5107 return SQLITE_OK; |
5359 } | 5108 } |
5360 | 5109 if( rc!=SQLITE_ROW ) return rc; |
5361 if( rc!=SQLITE_ROW ) goto err; | |
5362 rc = SQLITE_OK; | |
5363 | |
5364 /* Corrupt if leaf data isn't a blob. */ | |
5365 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ | |
5366 rc = SQLITE_CORRUPT_BKPT; | |
5367 }else{ | |
5368 const char *pLeafData = sqlite3_column_blob(s, 0); | |
5369 int nLeafData = sqlite3_column_bytes(s, 0); | |
5370 | |
5371 /* Corrupt if this can't be a leaf node. */ | |
5372 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ | |
5373 rc = SQLITE_CORRUPT_BKPT; | |
5374 }else{ | |
5375 rc = leafReaderInit(pLeafData, nLeafData, &pReader->leafReader); | |
5376 } | |
5377 } | |
5378 | |
5379 err: | |
5380 if( rc!=SQLITE_OK ){ | |
5381 if( idx==-1 ){ | |
5382 sqlite3_finalize(s); | |
5383 }else{ | |
5384 sqlite3_reset(s); | |
5385 } | |
5386 return rc; | |
5387 } | |
5388 | 5110 |
5389 pReader->pStmt = s; | 5111 pReader->pStmt = s; |
| 5112 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), |
| 5113 sqlite3_column_bytes(pReader->pStmt, 0), |
| 5114 &pReader->leafReader); |
5390 } | 5115 } |
5391 return SQLITE_OK; | 5116 return SQLITE_OK; |
5392 } | 5117 } |
5393 | 5118 |
5394 /* Step the current leaf forward to the next term. If we reach the | 5119 /* Step the current leaf forward to the next term. If we reach the |
5395 ** end of the current leaf, step forward to the next leaf block. | 5120 ** end of the current leaf, step forward to the next leaf block. |
5396 */ | 5121 */ |
5397 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ | 5122 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ |
5398 int rc; | |
5399 assert( !leavesReaderAtEnd(pReader) ); | 5123 assert( !leavesReaderAtEnd(pReader) ); |
5400 rc = leafReaderStep(&pReader->leafReader); | 5124 leafReaderStep(&pReader->leafReader); |
5401 if( rc!=SQLITE_OK ) return rc; | |
5402 | 5125 |
5403 if( leafReaderAtEnd(&pReader->leafReader) ){ | 5126 if( leafReaderAtEnd(&pReader->leafReader) ){ |
| 5127 int rc; |
5404 if( pReader->rootData.pData ){ | 5128 if( pReader->rootData.pData ){ |
5405 pReader->eof = 1; | 5129 pReader->eof = 1; |
5406 return SQLITE_OK; | 5130 return SQLITE_OK; |
5407 } | 5131 } |
5408 rc = sqlite3_step(pReader->pStmt); | 5132 rc = sqlite3_step(pReader->pStmt); |
5409 if( rc!=SQLITE_ROW ){ | 5133 if( rc!=SQLITE_ROW ){ |
5410 pReader->eof = 1; | 5134 pReader->eof = 1; |
5411 return rc==SQLITE_DONE ? SQLITE_OK : rc; | 5135 return rc==SQLITE_DONE ? SQLITE_OK : rc; |
5412 } | 5136 } |
5413 | 5137 leafReaderDestroy(&pReader->leafReader); |
5414 /* Corrupt if leaf data isn't a blob. */ | 5138 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), |
5415 if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){ | 5139 sqlite3_column_bytes(pReader->pStmt, 0), |
5416 return SQLITE_CORRUPT_BKPT; | 5140 &pReader->leafReader); |
5417 }else{ | |
5418 LeafReader tmp; | |
5419 const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0); | |
5420 int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0); | |
5421 | |
5422 /* Corrupt if this can't be a leaf node. */ | |
5423 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ | |
5424 return SQLITE_CORRUPT_BKPT; | |
5425 } | |
5426 | |
5427 rc = leafReaderInit(pLeafData, nLeafData, &tmp); | |
5428 if( rc!=SQLITE_OK ) return rc; | |
5429 leafReaderDestroy(&pReader->leafReader); | |
5430 pReader->leafReader = tmp; | |
5431 } | |
5432 } | 5141 } |
5433 return SQLITE_OK; | 5142 return SQLITE_OK; |
5434 } | 5143 } |
5435 | 5144 |
5436 /* Order LeavesReaders by their term, ignoring idx. Readers at eof | 5145 /* Order LeavesReaders by their term, ignoring idx. Readers at eof |
5437 ** always sort to the end. | 5146 ** always sort to the end. |
5438 */ | 5147 */ |
5439 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ | 5148 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ |
5440 if( leavesReaderAtEnd(lr1) ){ | 5149 if( leavesReaderAtEnd(lr1) ){ |
5441 if( leavesReaderAtEnd(lr2) ) return 0; | 5150 if( leavesReaderAtEnd(lr2) ) return 0; |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5482 | 5191 |
5483 rc = sqlite3_bind_int(s, 1, iLevel); | 5192 rc = sqlite3_bind_int(s, 1, iLevel); |
5484 if( rc!=SQLITE_OK ) return rc; | 5193 if( rc!=SQLITE_OK ) return rc; |
5485 | 5194 |
5486 i = 0; | 5195 i = 0; |
5487 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 5196 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
5488 sqlite_int64 iStart = sqlite3_column_int64(s, 0); | 5197 sqlite_int64 iStart = sqlite3_column_int64(s, 0); |
5489 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); | 5198 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |
5490 const char *pRootData = sqlite3_column_blob(s, 2); | 5199 const char *pRootData = sqlite3_column_blob(s, 2); |
5491 int nRootData = sqlite3_column_bytes(s, 2); | 5200 int nRootData = sqlite3_column_bytes(s, 2); |
5492 sqlite_int64 iIndex = sqlite3_column_int64(s, 3); | |
5493 | 5201 |
5494 /* Corrupt if we get back different types than we stored. */ | 5202 assert( i<MERGE_COUNT ); |
5495 /* Also corrupt if the index is not sequential starting at 0. */ | |
5496 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
5497 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
5498 sqlite3_column_type(s, 2)!=SQLITE_BLOB || | |
5499 i!=iIndex || | |
5500 i>=MERGE_COUNT ){ | |
5501 rc = SQLITE_CORRUPT_BKPT; | |
5502 break; | |
5503 } | |
5504 | |
5505 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, | 5203 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, |
5506 &pReaders[i]); | 5204 &pReaders[i]); |
5507 if( rc!=SQLITE_OK ) break; | 5205 if( rc!=SQLITE_OK ) break; |
5508 | 5206 |
5509 i++; | 5207 i++; |
5510 } | 5208 } |
5511 if( rc!=SQLITE_DONE ){ | 5209 if( rc!=SQLITE_DONE ){ |
5512 while( i-->0 ){ | 5210 while( i-->0 ){ |
5513 leavesReaderDestroy(&pReaders[i]); | 5211 leavesReaderDestroy(&pReaders[i]); |
5514 } | 5212 } |
5515 sqlite3_reset(s); /* So we don't leave a lock. */ | |
5516 return rc; | 5213 return rc; |
5517 } | 5214 } |
5518 | 5215 |
5519 *piReaders = i; | 5216 *piReaders = i; |
5520 | 5217 |
5521 /* Leave our results sorted by term, then age. */ | 5218 /* Leave our results sorted by term, then age. */ |
5522 while( i-- ){ | 5219 while( i-- ){ |
5523 leavesReaderReorder(pReaders+i, *piReaders-i); | 5220 leavesReaderReorder(pReaders+i, *piReaders-i); |
5524 } | 5221 } |
5525 return SQLITE_OK; | 5222 return SQLITE_OK; |
5526 } | 5223 } |
5527 | 5224 |
5528 /* Merge doclists from pReaders[nReaders] into a single doclist, which | 5225 /* Merge doclists from pReaders[nReaders] into a single doclist, which |
5529 ** is written to pWriter. Assumes pReaders is ordered oldest to | 5226 ** is written to pWriter. Assumes pReaders is ordered oldest to |
5530 ** newest. | 5227 ** newest. |
5531 */ | 5228 */ |
5532 /* TODO(shess) Consider putting this inline in segmentMerge(). */ | 5229 /* TODO(shess) Consider putting this inline in segmentMerge(). */ |
5533 static int leavesReadersMerge(fulltext_vtab *v, | 5230 static int leavesReadersMerge(fulltext_vtab *v, |
5534 LeavesReader *pReaders, int nReaders, | 5231 LeavesReader *pReaders, int nReaders, |
5535 LeafWriter *pWriter){ | 5232 LeafWriter *pWriter){ |
5536 DLReader dlReaders[MERGE_COUNT]; | 5233 DLReader dlReaders[MERGE_COUNT]; |
5537 const char *pTerm = leavesReaderTerm(pReaders); | 5234 const char *pTerm = leavesReaderTerm(pReaders); |
5538 int i, nTerm = leavesReaderTermBytes(pReaders); | 5235 int i, nTerm = leavesReaderTermBytes(pReaders); |
5539 int rc; | |
5540 | 5236 |
5541 assert( nReaders<=MERGE_COUNT ); | 5237 assert( nReaders<=MERGE_COUNT ); |
5542 | 5238 |
5543 for(i=0; i<nReaders; i++){ | 5239 for(i=0; i<nReaders; i++){ |
5544 const char *pData = leavesReaderData(pReaders+i); | 5240 dlrInit(&dlReaders[i], DL_DEFAULT, |
5545 if( pData==NULL ){ | 5241 leavesReaderData(pReaders+i), |
5546 rc = SQLITE_CORRUPT_BKPT; | 5242 leavesReaderDataBytes(pReaders+i)); |
5547 break; | |
5548 } | |
5549 rc = dlrInit(&dlReaders[i], DL_DEFAULT, | |
5550 pData, | |
5551 leavesReaderDataBytes(pReaders+i)); | |
5552 if( rc!=SQLITE_OK ) break; | |
5553 } | |
5554 if( rc!=SQLITE_OK ){ | |
5555 while( i-->0 ){ | |
5556 dlrDestroy(&dlReaders[i]); | |
5557 } | |
5558 return rc; | |
5559 } | 5243 } |
5560 | 5244 |
5561 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); | 5245 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); |
5562 } | 5246 } |
5563 | 5247 |
5564 /* Forward ref due to mutual recursion with segdirNextIndex(). */ | 5248 /* Forward ref due to mutual recursion with segdirNextIndex(). */ |
5565 static int segmentMerge(fulltext_vtab *v, int iLevel); | 5249 static int segmentMerge(fulltext_vtab *v, int iLevel); |
5566 | 5250 |
5567 /* Put the next available index at iLevel into *pidx. If iLevel | 5251 /* Put the next available index at iLevel into *pidx. If iLevel |
5568 ** already has MERGE_COUNT segments, they are merged to a higher | 5252 ** already has MERGE_COUNT segments, they are merged to a higher |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5602 if( rc!=SQLITE_OK ) return rc; | 5286 if( rc!=SQLITE_OK ) return rc; |
5603 | 5287 |
5604 /* TODO(shess) This assumes that we'll always see exactly | 5288 /* TODO(shess) This assumes that we'll always see exactly |
5605 ** MERGE_COUNT segments to merge at a given level. That will be | 5289 ** MERGE_COUNT segments to merge at a given level. That will be |
5606 ** broken if we allow the developer to request preemptive or | 5290 ** broken if we allow the developer to request preemptive or |
5607 ** deferred merging. | 5291 ** deferred merging. |
5608 */ | 5292 */ |
5609 memset(&lrs, '\0', sizeof(lrs)); | 5293 memset(&lrs, '\0', sizeof(lrs)); |
5610 rc = leavesReadersInit(v, iLevel, lrs, &i); | 5294 rc = leavesReadersInit(v, iLevel, lrs, &i); |
5611 if( rc!=SQLITE_OK ) return rc; | 5295 if( rc!=SQLITE_OK ) return rc; |
| 5296 assert( i==MERGE_COUNT ); |
5612 | 5297 |
5613 leafWriterInit(iLevel+1, idx, &writer); | 5298 leafWriterInit(iLevel+1, idx, &writer); |
5614 | 5299 |
5615 if( i!=MERGE_COUNT ){ | |
5616 rc = SQLITE_CORRUPT_BKPT; | |
5617 goto err; | |
5618 } | |
5619 | |
5620 /* Since leavesReaderReorder() pushes readers at eof to the end, | 5300 /* Since leavesReaderReorder() pushes readers at eof to the end, |
5621 ** when the first reader is empty, all will be empty. | 5301 ** when the first reader is empty, all will be empty. |
5622 */ | 5302 */ |
5623 while( !leavesReaderAtEnd(lrs) ){ | 5303 while( !leavesReaderAtEnd(lrs) ){ |
5624 /* Figure out how many readers share their next term. */ | 5304 /* Figure out how many readers share their next term. */ |
5625 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ | 5305 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ |
5626 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; | 5306 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; |
5627 } | 5307 } |
5628 | 5308 |
5629 rc = leavesReadersMerge(v, lrs, i, &writer); | 5309 rc = leavesReadersMerge(v, lrs, i, &writer); |
(...skipping 22 matching lines...) Expand all Loading... |
5652 | 5332 |
5653 err: | 5333 err: |
5654 for(i=0; i<MERGE_COUNT; i++){ | 5334 for(i=0; i<MERGE_COUNT; i++){ |
5655 leavesReaderDestroy(&lrs[i]); | 5335 leavesReaderDestroy(&lrs[i]); |
5656 } | 5336 } |
5657 leafWriterDestroy(&writer); | 5337 leafWriterDestroy(&writer); |
5658 return rc; | 5338 return rc; |
5659 } | 5339 } |
5660 | 5340 |
5661 /* Accumulate the union of *acc and *pData into *acc. */ | 5341 /* Accumulate the union of *acc and *pData into *acc. */ |
5662 static int docListAccumulateUnion(DataBuffer *acc, | 5342 static void docListAccumulateUnion(DataBuffer *acc, |
5663 const char *pData, int nData) { | 5343 const char *pData, int nData) { |
5664 DataBuffer tmp = *acc; | 5344 DataBuffer tmp = *acc; |
5665 int rc; | |
5666 dataBufferInit(acc, tmp.nData+nData); | 5345 dataBufferInit(acc, tmp.nData+nData); |
5667 rc = docListUnion(tmp.pData, tmp.nData, pData, nData, acc); | 5346 docListUnion(tmp.pData, tmp.nData, pData, nData, acc); |
5668 dataBufferDestroy(&tmp); | 5347 dataBufferDestroy(&tmp); |
5669 return rc; | |
5670 } | 5348 } |
5671 | 5349 |
5672 /* TODO(shess) It might be interesting to explore different merge | 5350 /* TODO(shess) It might be interesting to explore different merge |
5673 ** strategies, here. For instance, since this is a sorted merge, we | 5351 ** strategies, here. For instance, since this is a sorted merge, we |
5674 ** could easily merge many doclists in parallel. With some | 5352 ** could easily merge many doclists in parallel. With some |
5675 ** comprehension of the storage format, we could merge all of the | 5353 ** comprehension of the storage format, we could merge all of the |
5676 ** doclists within a leaf node directly from the leaf node's storage. | 5354 ** doclists within a leaf node directly from the leaf node's storage. |
5677 ** It may be worthwhile to merge smaller doclists before larger | 5355 ** It may be worthwhile to merge smaller doclists before larger |
5678 ** doclists, since they can be traversed more quickly - but the | 5356 ** doclists, since they can be traversed more quickly - but the |
5679 ** results may have less overlap, making them more expensive in a | 5357 ** results may have less overlap, making them more expensive in a |
(...skipping 21 matching lines...) Expand all Loading... |
5701 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); | 5379 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); |
5702 rc=leavesReaderStep(v, pReader)){ | 5380 rc=leavesReaderStep(v, pReader)){ |
5703 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is | 5381 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is |
5704 ** already taken to compare the terms of two LeavesReaders. Think | 5382 ** already taken to compare the terms of two LeavesReaders. Think |
5705 ** on a better name. [Meanwhile, break encapsulation rather than | 5383 ** on a better name. [Meanwhile, break encapsulation rather than |
5706 ** use a confusing name.] | 5384 ** use a confusing name.] |
5707 */ | 5385 */ |
5708 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); | 5386 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); |
5709 if( c>0 ) break; /* Past any possible matches. */ | 5387 if( c>0 ) break; /* Past any possible matches. */ |
5710 if( c==0 ){ | 5388 if( c==0 ){ |
5711 int iBuffer, nData; | |
5712 const char *pData = leavesReaderData(pReader); | 5389 const char *pData = leavesReaderData(pReader); |
5713 if( pData==NULL ){ | 5390 int iBuffer, nData = leavesReaderDataBytes(pReader); |
5714 rc = SQLITE_CORRUPT_BKPT; | |
5715 break; | |
5716 } | |
5717 nData = leavesReaderDataBytes(pReader); | |
5718 | 5391 |
5719 /* Find the first empty buffer. */ | 5392 /* Find the first empty buffer. */ |
5720 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ | 5393 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |
5721 if( 0==pBuffers[iBuffer].nData ) break; | 5394 if( 0==pBuffers[iBuffer].nData ) break; |
5722 } | 5395 } |
5723 | 5396 |
5724 /* Out of buffers, add an empty one. */ | 5397 /* Out of buffers, add an empty one. */ |
5725 if( iBuffer==nBuffers ){ | 5398 if( iBuffer==nBuffers ){ |
5726 if( nBuffers==nMaxBuffers ){ | 5399 if( nBuffers==nMaxBuffers ){ |
5727 DataBuffer *p; | 5400 DataBuffer *p; |
(...skipping 25 matching lines...) Expand all Loading... |
5753 dataBufferReplace(&(pBuffers[0]), pData, nData); | 5426 dataBufferReplace(&(pBuffers[0]), pData, nData); |
5754 }else{ | 5427 }else{ |
5755 /* pAcc is the empty buffer the merged data will end up in. */ | 5428 /* pAcc is the empty buffer the merged data will end up in. */ |
5756 DataBuffer *pAcc = &(pBuffers[iBuffer]); | 5429 DataBuffer *pAcc = &(pBuffers[iBuffer]); |
5757 DataBuffer *p = &(pBuffers[0]); | 5430 DataBuffer *p = &(pBuffers[0]); |
5758 | 5431 |
5759 /* Handle position 0 specially to avoid need to prime pAcc | 5432 /* Handle position 0 specially to avoid need to prime pAcc |
5760 ** with pData/nData. | 5433 ** with pData/nData. |
5761 */ | 5434 */ |
5762 dataBufferSwap(p, pAcc); | 5435 dataBufferSwap(p, pAcc); |
5763 rc = docListAccumulateUnion(pAcc, pData, nData); | 5436 docListAccumulateUnion(pAcc, pData, nData); |
5764 if( rc!=SQLITE_OK ) goto err; | |
5765 | 5437 |
5766 /* Accumulate remaining doclists into pAcc. */ | 5438 /* Accumulate remaining doclists into pAcc. */ |
5767 for(++p; p<pAcc; ++p){ | 5439 for(++p; p<pAcc; ++p){ |
5768 rc = docListAccumulateUnion(pAcc, p->pData, p->nData); | 5440 docListAccumulateUnion(pAcc, p->pData, p->nData); |
5769 if( rc!=SQLITE_OK ) goto err; | |
5770 | 5441 |
5771 /* dataBufferReset() could allow a large doclist to blow up | 5442 /* dataBufferReset() could allow a large doclist to blow up |
5772 ** our memory requirements. | 5443 ** our memory requirements. |
5773 */ | 5444 */ |
5774 if( p->nCapacity<1024 ){ | 5445 if( p->nCapacity<1024 ){ |
5775 dataBufferReset(p); | 5446 dataBufferReset(p); |
5776 }else{ | 5447 }else{ |
5777 dataBufferDestroy(p); | 5448 dataBufferDestroy(p); |
5778 dataBufferInit(p, 0); | 5449 dataBufferInit(p, 0); |
5779 } | 5450 } |
5780 } | 5451 } |
5781 } | 5452 } |
5782 } | 5453 } |
5783 } | 5454 } |
5784 | 5455 |
5785 /* Union all the doclists together into *out. */ | 5456 /* Union all the doclists together into *out. */ |
5786 /* TODO(shess) What if *out is big? Sigh. */ | 5457 /* TODO(shess) What if *out is big? Sigh. */ |
5787 if( rc==SQLITE_OK && nBuffers>0 ){ | 5458 if( rc==SQLITE_OK && nBuffers>0 ){ |
5788 int iBuffer; | 5459 int iBuffer; |
5789 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ | 5460 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |
5790 if( pBuffers[iBuffer].nData>0 ){ | 5461 if( pBuffers[iBuffer].nData>0 ){ |
5791 if( out->nData==0 ){ | 5462 if( out->nData==0 ){ |
5792 dataBufferSwap(out, &(pBuffers[iBuffer])); | 5463 dataBufferSwap(out, &(pBuffers[iBuffer])); |
5793 }else{ | 5464 }else{ |
5794 rc = docListAccumulateUnion(out, pBuffers[iBuffer].pData, | 5465 docListAccumulateUnion(out, pBuffers[iBuffer].pData, |
5795 pBuffers[iBuffer].nData); | 5466 pBuffers[iBuffer].nData); |
5796 if( rc!=SQLITE_OK ) break; | |
5797 } | 5467 } |
5798 } | 5468 } |
5799 } | 5469 } |
5800 } | 5470 } |
5801 | 5471 |
5802 err: | |
5803 while( nBuffers-- ){ | 5472 while( nBuffers-- ){ |
5804 dataBufferDestroy(&(pBuffers[nBuffers])); | 5473 dataBufferDestroy(&(pBuffers[nBuffers])); |
5805 } | 5474 } |
5806 if( pBuffers!=NULL ) sqlite3_free(pBuffers); | 5475 if( pBuffers!=NULL ) sqlite3_free(pBuffers); |
5807 | 5476 |
5808 return rc; | 5477 return rc; |
5809 } | 5478 } |
5810 | 5479 |
5811 /* Call loadSegmentLeavesInt() with pData/nData as input. */ | 5480 /* Call loadSegmentLeavesInt() with pData/nData as input. */ |
5812 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, | 5481 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5851 ** nodes which could include pTerm/nTerm/isPrefix. Note that the | 5520 ** nodes which could include pTerm/nTerm/isPrefix. Note that the |
5852 ** interior node terms logically come between the blocks, so there is | 5521 ** interior node terms logically come between the blocks, so there is |
5853 ** one more blockid than there are terms (that block contains terms >= | 5522 ** one more blockid than there are terms (that block contains terms >= |
5854 ** the last interior-node term). | 5523 ** the last interior-node term). |
5855 */ | 5524 */ |
5856 /* TODO(shess) The calling code may already know that the end child is | 5525 /* TODO(shess) The calling code may already know that the end child is |
5857 ** not worth calculating, because the end may be in a later sibling | 5526 ** not worth calculating, because the end may be in a later sibling |
5858 ** node. Consider whether breaking symmetry is worthwhile. I suspect | 5527 ** node. Consider whether breaking symmetry is worthwhile. I suspect |
5859 ** it is not worthwhile. | 5528 ** it is not worthwhile. |
5860 */ | 5529 */ |
5861 static int getChildrenContaining(const char *pData, int nData, | 5530 static void getChildrenContaining(const char *pData, int nData, |
5862 const char *pTerm, int nTerm, int isPrefix, | 5531 const char *pTerm, int nTerm, int isPrefix, |
5863 sqlite_int64 *piStartChild, | 5532 sqlite_int64 *piStartChild, |
5864 sqlite_int64 *piEndChild){ | 5533 sqlite_int64 *piEndChild){ |
5865 InteriorReader reader; | 5534 InteriorReader reader; |
5866 int rc; | |
5867 | 5535 |
5868 assert( nData>1 ); | 5536 assert( nData>1 ); |
5869 assert( *pData!='\0' ); | 5537 assert( *pData!='\0' ); |
5870 rc = interiorReaderInit(pData, nData, &reader); | 5538 interiorReaderInit(pData, nData, &reader); |
5871 if( rc!=SQLITE_OK ) return rc; | |
5872 | 5539 |
5873 /* Scan for the first child which could contain pTerm/nTerm. */ | 5540 /* Scan for the first child which could contain pTerm/nTerm. */ |
5874 while( !interiorReaderAtEnd(&reader) ){ | 5541 while( !interiorReaderAtEnd(&reader) ){ |
5875 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; | 5542 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; |
5876 rc = interiorReaderStep(&reader); | 5543 interiorReaderStep(&reader); |
5877 if( rc!=SQLITE_OK ){ | |
5878 interiorReaderDestroy(&reader); | |
5879 return rc; | |
5880 } | |
5881 } | 5544 } |
5882 *piStartChild = interiorReaderCurrentBlockid(&reader); | 5545 *piStartChild = interiorReaderCurrentBlockid(&reader); |
5883 | 5546 |
5884 /* Keep scanning to find a term greater than our term, using prefix | 5547 /* Keep scanning to find a term greater than our term, using prefix |
5885 ** comparison if indicated. If isPrefix is false, this will be the | 5548 ** comparison if indicated. If isPrefix is false, this will be the |
5886 ** same blockid as the starting block. | 5549 ** same blockid as the starting block. |
5887 */ | 5550 */ |
5888 while( !interiorReaderAtEnd(&reader) ){ | 5551 while( !interiorReaderAtEnd(&reader) ){ |
5889 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; | 5552 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; |
5890 rc = interiorReaderStep(&reader); | 5553 interiorReaderStep(&reader); |
5891 if( rc!=SQLITE_OK ){ | |
5892 interiorReaderDestroy(&reader); | |
5893 return rc; | |
5894 } | |
5895 } | 5554 } |
5896 *piEndChild = interiorReaderCurrentBlockid(&reader); | 5555 *piEndChild = interiorReaderCurrentBlockid(&reader); |
5897 | 5556 |
5898 interiorReaderDestroy(&reader); | 5557 interiorReaderDestroy(&reader); |
5899 | 5558 |
5900 /* Children must ascend, and if !prefix, both must be the same. */ | 5559 /* Children must ascend, and if !prefix, both must be the same. */ |
5901 assert( *piEndChild>=*piStartChild ); | 5560 assert( *piEndChild>=*piStartChild ); |
5902 assert( isPrefix || *piStartChild==*piEndChild ); | 5561 assert( isPrefix || *piStartChild==*piEndChild ); |
5903 return rc; | |
5904 } | 5562 } |
5905 | 5563 |
5906 /* Read block at iBlockid and pass it with other params to | 5564 /* Read block at iBlockid and pass it with other params to |
5907 ** getChildrenContaining(). | 5565 ** getChildrenContaining(). |
5908 */ | 5566 */ |
5909 static int loadAndGetChildrenContaining( | 5567 static int loadAndGetChildrenContaining( |
5910 fulltext_vtab *v, | 5568 fulltext_vtab *v, |
5911 sqlite_int64 iBlockid, | 5569 sqlite_int64 iBlockid, |
5912 const char *pTerm, int nTerm, int isPrefix, | 5570 const char *pTerm, int nTerm, int isPrefix, |
5913 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild | 5571 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild |
5914 ){ | 5572 ){ |
5915 sqlite3_stmt *s = NULL; | 5573 sqlite3_stmt *s = NULL; |
5916 int rc; | 5574 int rc; |
5917 | 5575 |
5918 assert( iBlockid!=0 ); | 5576 assert( iBlockid!=0 ); |
5919 assert( pTerm!=NULL ); | 5577 assert( pTerm!=NULL ); |
5920 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ | 5578 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ |
5921 assert( piStartChild!=NULL ); | 5579 assert( piStartChild!=NULL ); |
5922 assert( piEndChild!=NULL ); | 5580 assert( piEndChild!=NULL ); |
5923 | 5581 |
5924 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); | 5582 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); |
5925 if( rc!=SQLITE_OK ) return rc; | 5583 if( rc!=SQLITE_OK ) return rc; |
5926 | 5584 |
5927 rc = sqlite3_bind_int64(s, 1, iBlockid); | 5585 rc = sqlite3_bind_int64(s, 1, iBlockid); |
5928 if( rc!=SQLITE_OK ) return rc; | 5586 if( rc!=SQLITE_OK ) return rc; |
5929 | 5587 |
5930 rc = sqlite3_step(s); | 5588 rc = sqlite3_step(s); |
5931 /* Corrupt if interior node references missing child node. */ | 5589 if( rc==SQLITE_DONE ) return SQLITE_ERROR; |
5932 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; | |
5933 if( rc!=SQLITE_ROW ) return rc; | 5590 if( rc!=SQLITE_ROW ) return rc; |
5934 | 5591 |
5935 /* Corrupt if child node isn't a blob. */ | 5592 getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), |
5936 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ | 5593 pTerm, nTerm, isPrefix, piStartChild, piEndChild); |
5937 sqlite3_reset(s); /* So we don't leave a lock. */ | |
5938 return SQLITE_CORRUPT_BKPT; | |
5939 }else{ | |
5940 const char *pData = sqlite3_column_blob(s, 0); | |
5941 int nData = sqlite3_column_bytes(s, 0); | |
5942 | |
5943 /* Corrupt if child is not a valid interior node. */ | |
5944 if( pData==NULL || nData<1 || pData[0]=='\0' ){ | |
5945 sqlite3_reset(s); /* So we don't leave a lock. */ | |
5946 return SQLITE_CORRUPT_BKPT; | |
5947 } | |
5948 | |
5949 rc = getChildrenContaining(pData, nData, pTerm, nTerm, | |
5950 isPrefix, piStartChild, piEndChild); | |
5951 if( rc!=SQLITE_OK ){ | |
5952 sqlite3_reset(s); | |
5953 return rc; | |
5954 } | |
5955 } | |
5956 | 5594 |
5957 /* We expect only one row. We must execute another sqlite3_step() | 5595 /* We expect only one row. We must execute another sqlite3_step() |
5958 * to complete the iteration; otherwise the table will remain | 5596 * to complete the iteration; otherwise the table will remain |
5959 * locked. */ | 5597 * locked. */ |
5960 rc = sqlite3_step(s); | 5598 rc = sqlite3_step(s); |
5961 if( rc==SQLITE_ROW ) return SQLITE_ERROR; | 5599 if( rc==SQLITE_ROW ) return SQLITE_ERROR; |
5962 if( rc!=SQLITE_DONE ) return rc; | 5600 if( rc!=SQLITE_DONE ) return rc; |
5963 | 5601 |
5964 return SQLITE_OK; | 5602 return SQLITE_OK; |
5965 } | 5603 } |
5966 | 5604 |
5967 /* Traverse the tree represented by pData[nData] looking for | 5605 /* Traverse the tree represented by pData[nData] looking for |
5968 ** pTerm[nTerm], placing its doclist into *out. This is internal to | 5606 ** pTerm[nTerm], placing its doclist into *out. This is internal to |
5969 ** loadSegment() to make error-handling cleaner. | 5607 ** loadSegment() to make error-handling cleaner. |
5970 */ | 5608 */ |
5971 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, | 5609 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, |
5972 sqlite_int64 iLeavesEnd, | 5610 sqlite_int64 iLeavesEnd, |
5973 const char *pTerm, int nTerm, int isPrefix, | 5611 const char *pTerm, int nTerm, int isPrefix, |
5974 DataBuffer *out){ | 5612 DataBuffer *out){ |
5975 /* Special case where root is a leaf. */ | 5613 /* Special case where root is a leaf. */ |
5976 if( *pData=='\0' ){ | 5614 if( *pData=='\0' ){ |
5977 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); | 5615 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); |
5978 }else{ | 5616 }else{ |
5979 int rc; | 5617 int rc; |
5980 sqlite_int64 iStartChild, iEndChild; | 5618 sqlite_int64 iStartChild, iEndChild; |
5981 | 5619 |
5982 /* Process pData as an interior node, then loop down the tree | 5620 /* Process pData as an interior node, then loop down the tree |
5983 ** until we find the set of leaf nodes to scan for the term. | 5621 ** until we find the set of leaf nodes to scan for the term. |
5984 */ | 5622 */ |
5985 rc = getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, | 5623 getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, |
5986 &iStartChild, &iEndChild); | 5624 &iStartChild, &iEndChild); |
5987 if( rc!=SQLITE_OK ) return rc; | |
5988 while( iStartChild>iLeavesEnd ){ | 5625 while( iStartChild>iLeavesEnd ){ |
5989 sqlite_int64 iNextStart, iNextEnd; | 5626 sqlite_int64 iNextStart, iNextEnd; |
5990 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, | 5627 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, |
5991 &iNextStart, &iNextEnd); | 5628 &iNextStart, &iNextEnd); |
5992 if( rc!=SQLITE_OK ) return rc; | 5629 if( rc!=SQLITE_OK ) return rc; |
5993 | 5630 |
5994 /* If we've branched, follow the end branch, too. */ | 5631 /* If we've branched, follow the end branch, too. */ |
5995 if( iStartChild!=iEndChild ){ | 5632 if( iStartChild!=iEndChild ){ |
5996 sqlite_int64 iDummy; | 5633 sqlite_int64 iDummy; |
5997 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, | 5634 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6029 ** within a segment, with pairwise merges across segments, or across | 5666 ** within a segment, with pairwise merges across segments, or across |
6030 ** all segments at once. | 5667 ** all segments at once. |
6031 */ | 5668 */ |
6032 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, | 5669 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, |
6033 sqlite_int64 iLeavesEnd, | 5670 sqlite_int64 iLeavesEnd, |
6034 const char *pTerm, int nTerm, int isPrefix, | 5671 const char *pTerm, int nTerm, int isPrefix, |
6035 DataBuffer *out){ | 5672 DataBuffer *out){ |
6036 DataBuffer result; | 5673 DataBuffer result; |
6037 int rc; | 5674 int rc; |
6038 | 5675 |
6039 /* Corrupt if segment root can't be valid. */ | 5676 assert( nData>1 ); |
6040 if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT; | |
6041 | 5677 |
6042 /* This code should never be called with buffered updates. */ | 5678 /* This code should never be called with buffered updates. */ |
6043 assert( v->nPendingData<0 ); | 5679 assert( v->nPendingData<0 ); |
6044 | 5680 |
6045 dataBufferInit(&result, 0); | 5681 dataBufferInit(&result, 0); |
6046 rc = loadSegmentInt(v, pData, nData, iLeavesEnd, | 5682 rc = loadSegmentInt(v, pData, nData, iLeavesEnd, |
6047 pTerm, nTerm, isPrefix, &result); | 5683 pTerm, nTerm, isPrefix, &result); |
6048 if( rc==SQLITE_OK && result.nData>0 ){ | 5684 if( rc==SQLITE_OK && result.nData>0 ){ |
6049 if( out->nData==0 ){ | 5685 if( out->nData==0 ){ |
6050 DataBuffer tmp = *out; | 5686 DataBuffer tmp = *out; |
6051 *out = result; | 5687 *out = result; |
6052 result = tmp; | 5688 result = tmp; |
6053 }else{ | 5689 }else{ |
6054 DataBuffer merged; | 5690 DataBuffer merged; |
6055 DLReader readers[2]; | 5691 DLReader readers[2]; |
6056 | 5692 |
6057 rc = dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); | 5693 dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); |
6058 if( rc==SQLITE_OK ){ | 5694 dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); |
6059 rc = dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); | 5695 dataBufferInit(&merged, out->nData+result.nData); |
6060 if( rc==SQLITE_OK ){ | 5696 docListMerge(&merged, readers, 2); |
6061 dataBufferInit(&merged, out->nData+result.nData); | 5697 dataBufferDestroy(out); |
6062 rc = docListMerge(&merged, readers, 2); | 5698 *out = merged; |
6063 dataBufferDestroy(out); | 5699 dlrDestroy(&readers[0]); |
6064 *out = merged; | 5700 dlrDestroy(&readers[1]); |
6065 dlrDestroy(&readers[1]); | |
6066 } | |
6067 dlrDestroy(&readers[0]); | |
6068 } | |
6069 } | 5701 } |
6070 } | 5702 } |
6071 | |
6072 dataBufferDestroy(&result); | 5703 dataBufferDestroy(&result); |
6073 return rc; | 5704 return rc; |
6074 } | 5705 } |
6075 | 5706 |
6076 /* Scan the database and merge together the posting lists for the term | 5707 /* Scan the database and merge together the posting lists for the term |
6077 ** into *out. | 5708 ** into *out. |
6078 */ | 5709 */ |
6079 static int termSelect(fulltext_vtab *v, int iColumn, | 5710 static int termSelect(fulltext_vtab *v, int iColumn, |
6080 const char *pTerm, int nTerm, int isPrefix, | 5711 const char *pTerm, int nTerm, int isPrefix, |
6081 DocListType iType, DataBuffer *out){ | 5712 DocListType iType, DataBuffer *out){ |
6082 DataBuffer doclist; | 5713 DataBuffer doclist; |
6083 sqlite3_stmt *s; | 5714 sqlite3_stmt *s; |
6084 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); | 5715 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); |
6085 if( rc!=SQLITE_OK ) return rc; | 5716 if( rc!=SQLITE_OK ) return rc; |
6086 | 5717 |
6087 /* This code should never be called with buffered updates. */ | 5718 /* This code should never be called with buffered updates. */ |
6088 assert( v->nPendingData<0 ); | 5719 assert( v->nPendingData<0 ); |
6089 | 5720 |
6090 dataBufferInit(&doclist, 0); | 5721 dataBufferInit(&doclist, 0); |
6091 | 5722 |
6092 /* Traverse the segments from oldest to newest so that newer doclist | 5723 /* Traverse the segments from oldest to newest so that newer doclist |
6093 ** elements for given docids overwrite older elements. | 5724 ** elements for given docids overwrite older elements. |
6094 */ | 5725 */ |
6095 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 5726 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
6096 const char *pData = sqlite3_column_blob(s, 2); | 5727 const char *pData = sqlite3_column_blob(s, 2); |
6097 const int nData = sqlite3_column_bytes(s, 2); | 5728 const int nData = sqlite3_column_bytes(s, 2); |
6098 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); | 5729 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); |
6099 | |
6100 /* Corrupt if we get back different types than we stored. */ | |
6101 if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
6102 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
6103 rc = SQLITE_CORRUPT_BKPT; | |
6104 goto err; | |
6105 } | |
6106 | |
6107 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, | 5730 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, |
6108 &doclist); | 5731 &doclist); |
6109 if( rc!=SQLITE_OK ) goto err; | 5732 if( rc!=SQLITE_OK ) goto err; |
6110 } | 5733 } |
6111 if( rc==SQLITE_DONE ){ | 5734 if( rc==SQLITE_DONE ){ |
6112 rc = SQLITE_OK; | |
6113 if( doclist.nData!=0 ){ | 5735 if( doclist.nData!=0 ){ |
6114 /* TODO(shess) The old term_select_all() code applied the column | 5736 /* TODO(shess) The old term_select_all() code applied the column |
6115 ** restrict as we merged segments, leading to smaller buffers. | 5737 ** restrict as we merged segments, leading to smaller buffers. |
6116 ** This is probably worthwhile to bring back, once the new storage | 5738 ** This is probably worthwhile to bring back, once the new storage |
6117 ** system is checked in. | 5739 ** system is checked in. |
6118 */ | 5740 */ |
6119 if( iColumn==v->nColumn) iColumn = -1; | 5741 if( iColumn==v->nColumn) iColumn = -1; |
6120 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, | 5742 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |
6121 iColumn, iType, out); | 5743 iColumn, iType, out); |
6122 } | 5744 } |
| 5745 rc = SQLITE_OK; |
6123 } | 5746 } |
6124 | 5747 |
6125 err: | 5748 err: |
6126 sqlite3_reset(s); /* So we don't leave a lock. */ | |
6127 dataBufferDestroy(&doclist); | 5749 dataBufferDestroy(&doclist); |
6128 return rc; | 5750 return rc; |
6129 } | 5751 } |
6130 | 5752 |
6131 /****************************************************************/ | 5753 /****************************************************************/ |
6132 /* Used to hold hashtable data for sorting. */ | 5754 /* Used to hold hashtable data for sorting. */ |
6133 typedef struct TermData { | 5755 typedef struct TermData { |
6134 const char *pTerm; | 5756 const char *pTerm; |
6135 int nTerm; | 5757 int nTerm; |
6136 DLCollector *pCollector; | 5758 DLCollector *pCollector; |
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6458 /* optimize() helper function. Put the readers in order and iterate | 6080 /* optimize() helper function. Put the readers in order and iterate |
6459 ** through them, merging doclists for matching terms into pWriter. | 6081 ** through them, merging doclists for matching terms into pWriter. |
6460 ** Returns SQLITE_OK on success, or the SQLite error code which | 6082 ** Returns SQLITE_OK on success, or the SQLite error code which |
6461 ** prevented success. | 6083 ** prevented success. |
6462 */ | 6084 */ |
6463 static int optimizeInternal(fulltext_vtab *v, | 6085 static int optimizeInternal(fulltext_vtab *v, |
6464 OptLeavesReader *readers, int nReaders, | 6086 OptLeavesReader *readers, int nReaders, |
6465 LeafWriter *pWriter){ | 6087 LeafWriter *pWriter){ |
6466 int i, rc = SQLITE_OK; | 6088 int i, rc = SQLITE_OK; |
6467 DataBuffer doclist, merged, tmp; | 6089 DataBuffer doclist, merged, tmp; |
6468 const char *pData; | |
6469 | 6090 |
6470 /* Order the readers. */ | 6091 /* Order the readers. */ |
6471 i = nReaders; | 6092 i = nReaders; |
6472 while( i-- > 0 ){ | 6093 while( i-- > 0 ){ |
6473 optLeavesReaderReorder(&readers[i], nReaders-i); | 6094 optLeavesReaderReorder(&readers[i], nReaders-i); |
6474 } | 6095 } |
6475 | 6096 |
6476 dataBufferInit(&doclist, LEAF_MAX); | 6097 dataBufferInit(&doclist, LEAF_MAX); |
6477 dataBufferInit(&merged, LEAF_MAX); | 6098 dataBufferInit(&merged, LEAF_MAX); |
6478 | 6099 |
6479 /* Exhausted readers bubble to the end, so when the first reader is | 6100 /* Exhausted readers bubble to the end, so when the first reader is |
6480 ** at eof, all are at eof. | 6101 ** at eof, all are at eof. |
6481 */ | 6102 */ |
6482 while( !optLeavesReaderAtEnd(&readers[0]) ){ | 6103 while( !optLeavesReaderAtEnd(&readers[0]) ){ |
6483 | 6104 |
6484 /* Figure out how many readers share the next term. */ | 6105 /* Figure out how many readers share the next term. */ |
6485 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ | 6106 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ |
6486 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; | 6107 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; |
6487 } | 6108 } |
6488 | 6109 |
6489 pData = optLeavesReaderData(&readers[0]); | |
6490 if( pData==NULL ){ | |
6491 rc = SQLITE_CORRUPT_BKPT; | |
6492 break; | |
6493 } | |
6494 | |
6495 /* Special-case for no merge. */ | 6110 /* Special-case for no merge. */ |
6496 if( i==1 ){ | 6111 if( i==1 ){ |
6497 /* Trim deletions from the doclist. */ | 6112 /* Trim deletions from the doclist. */ |
6498 dataBufferReset(&merged); | 6113 dataBufferReset(&merged); |
6499 rc = docListTrim(DL_DEFAULT, | 6114 docListTrim(DL_DEFAULT, |
6500 pData, | 6115 optLeavesReaderData(&readers[0]), |
6501 optLeavesReaderDataBytes(&readers[0]), | 6116 optLeavesReaderDataBytes(&readers[0]), |
6502 -1, DL_DEFAULT, &merged); | 6117 -1, DL_DEFAULT, &merged); |
6503 if( rc!= SQLITE_OK ) break; | |
6504 }else{ | 6118 }else{ |
6505 DLReader dlReaders[MERGE_COUNT]; | 6119 DLReader dlReaders[MERGE_COUNT]; |
6506 int iReader, nReaders; | 6120 int iReader, nReaders; |
6507 | 6121 |
6508 /* Prime the pipeline with the first reader's doclist. After | 6122 /* Prime the pipeline with the first reader's doclist. After |
6509 ** one pass index 0 will reference the accumulated doclist. | 6123 ** one pass index 0 will reference the accumulated doclist. |
6510 */ | 6124 */ |
6511 rc = dlrInit(&dlReaders[0], DL_DEFAULT, | 6125 dlrInit(&dlReaders[0], DL_DEFAULT, |
6512 pData, | 6126 optLeavesReaderData(&readers[0]), |
6513 optLeavesReaderDataBytes(&readers[0])); | 6127 optLeavesReaderDataBytes(&readers[0])); |
6514 if( rc!=SQLITE_OK ) break; | |
6515 iReader = 1; | 6128 iReader = 1; |
6516 | 6129 |
6517 assert( iReader<i ); /* Must execute the loop at least once. */ | 6130 assert( iReader<i ); /* Must execute the loop at least once. */ |
6518 while( iReader<i ){ | 6131 while( iReader<i ){ |
6519 /* Merge 16 inputs per pass. */ | 6132 /* Merge 16 inputs per pass. */ |
6520 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; | 6133 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; |
6521 iReader++, nReaders++ ){ | 6134 iReader++, nReaders++ ){ |
6522 pData = optLeavesReaderData(&readers[iReader]); | 6135 dlrInit(&dlReaders[nReaders], DL_DEFAULT, |
6523 if( pData == NULL ){ | 6136 optLeavesReaderData(&readers[iReader]), |
6524 rc = SQLITE_CORRUPT_BKPT; | 6137 optLeavesReaderDataBytes(&readers[iReader])); |
6525 break; | |
6526 } | |
6527 rc = dlrInit(&dlReaders[nReaders], DL_DEFAULT, | |
6528 pData, | |
6529 optLeavesReaderDataBytes(&readers[iReader])); | |
6530 if( rc != SQLITE_OK ) break; | |
6531 } | 6138 } |
6532 | 6139 |
6533 /* Merge doclists and swap result into accumulator. */ | 6140 /* Merge doclists and swap result into accumulator. */ |
6534 if( rc==SQLITE_OK ){ | 6141 dataBufferReset(&merged); |
6535 dataBufferReset(&merged); | 6142 docListMerge(&merged, dlReaders, nReaders); |
6536 rc = docListMerge(&merged, dlReaders, nReaders); | 6143 tmp = merged; |
6537 tmp = merged; | 6144 merged = doclist; |
6538 merged = doclist; | 6145 doclist = tmp; |
6539 doclist = tmp; | |
6540 } | |
6541 | 6146 |
6542 while( nReaders-- > 0 ){ | 6147 while( nReaders-- > 0 ){ |
6543 dlrDestroy(&dlReaders[nReaders]); | 6148 dlrDestroy(&dlReaders[nReaders]); |
6544 } | 6149 } |
6545 | 6150 |
6546 if( rc!=SQLITE_OK ) goto err; | |
6547 | |
6548 /* Accumulated doclist to reader 0 for next pass. */ | 6151 /* Accumulated doclist to reader 0 for next pass. */ |
6549 rc = dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); | 6152 dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); |
6550 if( rc!=SQLITE_OK ) goto err; | |
6551 } | 6153 } |
6552 | 6154 |
6553 /* Destroy reader that was left in the pipeline. */ | 6155 /* Destroy reader that was left in the pipeline. */ |
6554 dlrDestroy(&dlReaders[0]); | 6156 dlrDestroy(&dlReaders[0]); |
6555 | 6157 |
6556 /* Trim deletions from the doclist. */ | 6158 /* Trim deletions from the doclist. */ |
6557 dataBufferReset(&merged); | 6159 dataBufferReset(&merged); |
6558 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, | 6160 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |
6559 -1, DL_DEFAULT, &merged); | 6161 -1, DL_DEFAULT, &merged); |
6560 if( rc!=SQLITE_OK ) goto err; | |
6561 } | 6162 } |
6562 | 6163 |
6563 /* Only pass doclists with hits (skip if all hits deleted). */ | 6164 /* Only pass doclists with hits (skip if all hits deleted). */ |
6564 if( merged.nData>0 ){ | 6165 if( merged.nData>0 ){ |
6565 rc = leafWriterStep(v, pWriter, | 6166 rc = leafWriterStep(v, pWriter, |
6566 optLeavesReaderTerm(&readers[0]), | 6167 optLeavesReaderTerm(&readers[0]), |
6567 optLeavesReaderTermBytes(&readers[0]), | 6168 optLeavesReaderTermBytes(&readers[0]), |
6568 merged.pData, merged.nData); | 6169 merged.pData, merged.nData); |
6569 if( rc!=SQLITE_OK ) goto err; | 6170 if( rc!=SQLITE_OK ) goto err; |
6570 } | 6171 } |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6630 */ | 6231 */ |
6631 leafWriterInit(iMaxLevel, 0, &writer); | 6232 leafWriterInit(iMaxLevel, 0, &writer); |
6632 | 6233 |
6633 i = 0; | 6234 i = 0; |
6634 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 6235 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
6635 sqlite_int64 iStart = sqlite3_column_int64(s, 0); | 6236 sqlite_int64 iStart = sqlite3_column_int64(s, 0); |
6636 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); | 6237 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |
6637 const char *pRootData = sqlite3_column_blob(s, 2); | 6238 const char *pRootData = sqlite3_column_blob(s, 2); |
6638 int nRootData = sqlite3_column_bytes(s, 2); | 6239 int nRootData = sqlite3_column_bytes(s, 2); |
6639 | 6240 |
6640 /* Corrupt if we get back different types than we stored. */ | |
6641 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
6642 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
6643 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
6644 rc = SQLITE_CORRUPT_BKPT; | |
6645 break; | |
6646 } | |
6647 | |
6648 assert( i<nReaders ); | 6241 assert( i<nReaders ); |
6649 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, | 6242 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, |
6650 &readers[i].reader); | 6243 &readers[i].reader); |
6651 if( rc!=SQLITE_OK ) break; | 6244 if( rc!=SQLITE_OK ) break; |
6652 | 6245 |
6653 readers[i].segment = i; | 6246 readers[i].segment = i; |
6654 i++; | 6247 i++; |
6655 } | 6248 } |
6656 | 6249 |
6657 /* If we managed to successfully read them all, optimize them. */ | 6250 /* If we managed to successfully read them all, optimize them. */ |
6658 if( rc==SQLITE_DONE ){ | 6251 if( rc==SQLITE_DONE ){ |
6659 assert( i==nReaders ); | 6252 assert( i==nReaders ); |
6660 rc = optimizeInternal(v, readers, nReaders, &writer); | 6253 rc = optimizeInternal(v, readers, nReaders, &writer); |
6661 }else{ | |
6662 sqlite3_reset(s); /* So we don't leave a lock. */ | |
6663 } | 6254 } |
6664 | 6255 |
6665 while( i-- > 0 ){ | 6256 while( i-- > 0 ){ |
6666 leavesReaderDestroy(&readers[i].reader); | 6257 leavesReaderDestroy(&readers[i].reader); |
6667 } | 6258 } |
6668 sqlite3_free(readers); | 6259 sqlite3_free(readers); |
6669 | 6260 |
6670 /* If we've successfully gotten to here, delete the old segments | 6261 /* If we've successfully gotten to here, delete the old segments |
6671 ** and flush the interior structure of the new segment. | 6262 ** and flush the interior structure of the new segment. |
6672 */ | 6263 */ |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6716 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of | 6307 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of |
6717 ** pRootData if iStartBlockid is 0 (in which case the entire segment | 6308 ** pRootData if iStartBlockid is 0 (in which case the entire segment |
6718 ** fit in a leaf). | 6309 ** fit in a leaf). |
6719 */ | 6310 */ |
6720 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, | 6311 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, |
6721 fts2Hash *pTerms){ | 6312 fts2Hash *pTerms){ |
6722 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); | 6313 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); |
6723 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); | 6314 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); |
6724 const char *pRootData = sqlite3_column_blob(s, 2); | 6315 const char *pRootData = sqlite3_column_blob(s, 2); |
6725 const int nRootData = sqlite3_column_bytes(s, 2); | 6316 const int nRootData = sqlite3_column_bytes(s, 2); |
6726 int rc; | |
6727 LeavesReader reader; | 6317 LeavesReader reader; |
6728 | 6318 int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, |
6729 /* Corrupt if we get back different types than we stored. */ | 6319 pRootData, nRootData, &reader); |
6730 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
6731 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
6732 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
6733 return SQLITE_CORRUPT_BKPT; | |
6734 } | |
6735 | |
6736 rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, | |
6737 pRootData, nRootData, &reader); | |
6738 if( rc!=SQLITE_OK ) return rc; | 6320 if( rc!=SQLITE_OK ) return rc; |
6739 | 6321 |
6740 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ | 6322 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ |
6741 const char *pTerm = leavesReaderTerm(&reader); | 6323 const char *pTerm = leavesReaderTerm(&reader); |
6742 const int nTerm = leavesReaderTermBytes(&reader); | 6324 const int nTerm = leavesReaderTermBytes(&reader); |
6743 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); | 6325 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); |
6744 void *newValue = (void *)((char *)oldValue+1); | 6326 void *newValue = (void *)((char *)oldValue+1); |
6745 | 6327 |
6746 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, | 6328 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, |
6747 ** the data value passed is returned in case of malloc failure. | 6329 ** the data value passed is returned in case of malloc failure. |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6889 } | 6471 } |
6890 } | 6472 } |
6891 | 6473 |
6892 /* Expand the DL_DEFAULT doclist in pData into a text result in | 6474 /* Expand the DL_DEFAULT doclist in pData into a text result in |
6893 ** pContext. | 6475 ** pContext. |
6894 */ | 6476 */ |
6895 static void createDoclistResult(sqlite3_context *pContext, | 6477 static void createDoclistResult(sqlite3_context *pContext, |
6896 const char *pData, int nData){ | 6478 const char *pData, int nData){ |
6897 DataBuffer dump; | 6479 DataBuffer dump; |
6898 DLReader dlReader; | 6480 DLReader dlReader; |
6899 int rc; | |
6900 | 6481 |
6901 assert( pData!=NULL && nData>0 ); | 6482 assert( pData!=NULL && nData>0 ); |
6902 | 6483 |
6903 rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); | |
6904 if( rc!=SQLITE_OK ) return; | |
6905 dataBufferInit(&dump, 0); | 6484 dataBufferInit(&dump, 0); |
6906 for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ | 6485 dlrInit(&dlReader, DL_DEFAULT, pData, nData); |
| 6486 for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ |
6907 char buf[256]; | 6487 char buf[256]; |
6908 PLReader plReader; | 6488 PLReader plReader; |
6909 | 6489 |
6910 rc = plrInit(&plReader, &dlReader); | 6490 plrInit(&plReader, &dlReader); |
6911 if( rc!=SQLITE_OK ) break; | |
6912 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ | 6491 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ |
6913 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); | 6492 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); |
6914 dataBufferAppend(&dump, buf, strlen(buf)); | 6493 dataBufferAppend(&dump, buf, strlen(buf)); |
6915 }else{ | 6494 }else{ |
6916 int iColumn = plrColumn(&plReader); | 6495 int iColumn = plrColumn(&plReader); |
6917 | 6496 |
6918 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", | 6497 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", |
6919 dlrDocid(&dlReader), iColumn); | 6498 dlrDocid(&dlReader), iColumn); |
6920 dataBufferAppend(&dump, buf, strlen(buf)); | 6499 dataBufferAppend(&dump, buf, strlen(buf)); |
6921 | 6500 |
6922 for( ; !plrAtEnd(&plReader); rc = plrStep(&plReader) ){ | 6501 for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ |
6923 if( rc!=SQLITE_OK ) break; | |
6924 if( plrColumn(&plReader)!=iColumn ){ | 6502 if( plrColumn(&plReader)!=iColumn ){ |
6925 iColumn = plrColumn(&plReader); | 6503 iColumn = plrColumn(&plReader); |
6926 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); | 6504 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); |
6927 assert( dump.nData>0 ); | 6505 assert( dump.nData>0 ); |
6928 dump.nData--; /* Overwrite trailing space. */ | 6506 dump.nData--; /* Overwrite trailing space. */ |
6929 assert( dump.pData[dump.nData]==' '); | 6507 assert( dump.pData[dump.nData]==' '); |
6930 dataBufferAppend(&dump, buf, strlen(buf)); | 6508 dataBufferAppend(&dump, buf, strlen(buf)); |
6931 } | 6509 } |
6932 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ | 6510 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ |
6933 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", | 6511 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", |
6934 plrPosition(&plReader), | 6512 plrPosition(&plReader), |
6935 plrStartOffset(&plReader), plrEndOffset(&plReader)); | 6513 plrStartOffset(&plReader), plrEndOffset(&plReader)); |
6936 }else if( DL_DEFAULT==DL_POSITIONS ){ | 6514 }else if( DL_DEFAULT==DL_POSITIONS ){ |
6937 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); | 6515 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); |
6938 }else{ | 6516 }else{ |
6939 assert( NULL=="Unhandled DL_DEFAULT value"); | 6517 assert( NULL=="Unhandled DL_DEFAULT value"); |
6940 } | 6518 } |
6941 dataBufferAppend(&dump, buf, strlen(buf)); | 6519 dataBufferAppend(&dump, buf, strlen(buf)); |
6942 } | 6520 } |
6943 plrDestroy(&plReader); | 6521 plrDestroy(&plReader); |
6944 if( rc!= SQLITE_OK ) break; | |
6945 | 6522 |
6946 assert( dump.nData>0 ); | 6523 assert( dump.nData>0 ); |
6947 dump.nData--; /* Overwrite trailing space. */ | 6524 dump.nData--; /* Overwrite trailing space. */ |
6948 assert( dump.pData[dump.nData]==' '); | 6525 assert( dump.pData[dump.nData]==' '); |
6949 dataBufferAppend(&dump, "]] ", 3); | 6526 dataBufferAppend(&dump, "]] ", 3); |
6950 } | 6527 } |
6951 } | 6528 } |
6952 dlrDestroy(&dlReader); | 6529 dlrDestroy(&dlReader); |
6953 if( rc!=SQLITE_OK ){ | |
6954 dataBufferDestroy(&dump); | |
6955 return; | |
6956 } | |
6957 | 6530 |
6958 assert( dump.nData>0 ); | 6531 assert( dump.nData>0 ); |
6959 dump.nData--; /* Overwrite trailing space. */ | 6532 dump.nData--; /* Overwrite trailing space. */ |
6960 assert( dump.pData[dump.nData]==' '); | 6533 assert( dump.pData[dump.nData]==' '); |
6961 dump.pData[dump.nData] = '\0'; | 6534 dump.pData[dump.nData] = '\0'; |
6962 assert( dump.nData>0 ); | 6535 assert( dump.nData>0 ); |
6963 | 6536 |
6964 /* Passes ownership of dump's buffer to pContext. */ | 6537 /* Passes ownership of dump's buffer to pContext. */ |
6965 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); | 6538 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); |
6966 dump.pData = NULL; | 6539 dump.pData = NULL; |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7240 ){ | 6813 ){ |
7241 rc = SQLITE_NOMEM; | 6814 rc = SQLITE_NOMEM; |
7242 } | 6815 } |
7243 } | 6816 } |
7244 | 6817 |
7245 /* Create the virtual table wrapper around the hash-table and overload | 6818 /* Create the virtual table wrapper around the hash-table and overload |
7246 ** the two scalar functions. If this is successful, register the | 6819 ** the two scalar functions. If this is successful, register the |
7247 ** module with sqlite. | 6820 ** module with sqlite. |
7248 */ | 6821 */ |
7249 if( SQLITE_OK==rc | 6822 if( SQLITE_OK==rc |
7250 #if GEARS_FTS2_CHANGES && !SQLITE_TEST | |
7251 /* fts2_tokenizer() disabled for security reasons. */ | |
7252 #else | |
7253 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) | 6823 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) |
7254 #endif | |
7255 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) | 6824 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) |
7256 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) | 6825 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) |
7257 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) | 6826 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) |
7258 #ifdef SQLITE_TEST | 6827 #ifdef SQLITE_TEST |
7259 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) | 6828 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) |
7260 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) | 6829 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) |
7261 #endif | 6830 #endif |
7262 ){ | 6831 ){ |
7263 return sqlite3_create_module_v2( | 6832 return sqlite3_create_module_v2( |
7264 db, "fts2", &fts2Module, (void *)pHash, hashDestroy | 6833 db, "fts2", &fts2Module, (void *)pHash, hashDestroy |
(...skipping 17 matching lines...) Expand all Loading... |
7282 sqlite3 *db, | 6851 sqlite3 *db, |
7283 char **pzErrMsg, | 6852 char **pzErrMsg, |
7284 const sqlite3_api_routines *pApi | 6853 const sqlite3_api_routines *pApi |
7285 ){ | 6854 ){ |
7286 SQLITE_EXTENSION_INIT2(pApi) | 6855 SQLITE_EXTENSION_INIT2(pApi) |
7287 return sqlite3Fts2Init(db); | 6856 return sqlite3Fts2Init(db); |
7288 } | 6857 } |
7289 #endif | 6858 #endif |
7290 | 6859 |
7291 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ | 6860 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ |
OLD | NEW |