| OLD | NEW |
| 1 /* fts2 has a design flaw which can lead to database corruption (see | 1 /* fts2 has a design flaw which can lead to database corruption (see |
| 2 ** below). It is recommended not to use it any longer, instead use | 2 ** below). It is recommended not to use it any longer, instead use |
| 3 ** fts3 (or higher). If you believe that your use of fts2 is safe, | 3 ** fts3 (or higher). If you believe that your use of fts2 is safe, |
| 4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. | 4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. |
| 5 */ | 5 */ |
| 6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ | 6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ |
| 7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) | 7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) |
| 8 #error fts2 has a design flaw and has been deprecated. | 8 #error fts2 has a design flaw and has been deprecated. |
| 9 #endif | 9 #endif |
| 10 /* The flaw is that fts2 uses the content table's unaliased rowid as | 10 /* The flaw is that fts2 uses the content table's unaliased rowid as |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 ** | 30 ** |
| 31 ** May you do good and not evil. | 31 ** May you do good and not evil. |
| 32 ** May you find forgiveness for yourself and forgive others. | 32 ** May you find forgiveness for yourself and forgive others. |
| 33 ** May you share freely, never taking more than you give. | 33 ** May you share freely, never taking more than you give. |
| 34 ** | 34 ** |
| 35 ****************************************************************************** | 35 ****************************************************************************** |
| 36 ** | 36 ** |
| 37 ** This is an SQLite module implementing full-text search. | 37 ** This is an SQLite module implementing full-text search. |
| 38 */ | 38 */ |
| 39 | 39 |
| 40 /* TODO(shess): To make it easier to spot changes without groveling | |
| 41 ** through changelogs, I've defined GEARS_FTS2_CHANGES to call them | |
| 42 ** out, and I will document them here. On imports, these changes | |
| 43 ** should be reviewed to make sure they are still present, or are | |
| 44 ** dropped as appropriate. | |
| 45 ** | |
| 46 ** SQLite core adds the custom function fts2_tokenizer() to be used | |
| 47 ** for defining new tokenizers. The second parameter is a vtable | |
| 48 ** pointer encoded as a blob. Obviously this cannot be exposed to | |
| 49 ** Gears callers for security reasons. It could be suppressed in the | |
| 50 ** authorizer, but for now I have simply commented the definition out. | |
| 51 */ | |
| 52 #define GEARS_FTS2_CHANGES 1 | |
| 53 | |
| 54 /* | 40 /* |
| 55 ** The code in this file is only compiled if: | 41 ** The code in this file is only compiled if: |
| 56 ** | 42 ** |
| 57 ** * The FTS2 module is being built as an extension | 43 ** * The FTS2 module is being built as an extension |
| 58 ** (in which case SQLITE_CORE is not defined), or | 44 ** (in which case SQLITE_CORE is not defined), or |
| 59 ** | 45 ** |
| 60 ** * The FTS2 module is being built into the core of | 46 ** * The FTS2 module is being built into the core of |
| 61 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). | 47 ** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). |
| 62 */ | 48 */ |
| 63 | 49 |
| (...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 317 #endif | 303 #endif |
| 318 | 304 |
| 319 #include <assert.h> | 305 #include <assert.h> |
| 320 #include <stdlib.h> | 306 #include <stdlib.h> |
| 321 #include <stdio.h> | 307 #include <stdio.h> |
| 322 #include <string.h> | 308 #include <string.h> |
| 323 #include "fts2.h" | 309 #include "fts2.h" |
| 324 #include "fts2_hash.h" | 310 #include "fts2_hash.h" |
| 325 #include "fts2_tokenizer.h" | 311 #include "fts2_tokenizer.h" |
| 326 #include "sqlite3.h" | 312 #include "sqlite3.h" |
| 327 #ifndef SQLITE_CORE | 313 #include "sqlite3ext.h" |
| 328 # include "sqlite3ext.h" | 314 SQLITE_EXTENSION_INIT1 |
| 329 SQLITE_EXTENSION_INIT1 | |
| 330 #endif | |
| 331 | 315 |
| 332 | 316 |
| 333 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it | 317 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it |
| 334 ** would be nice to order the file better, perhaps something along the | 318 ** would be nice to order the file better, perhaps something along the |
| 335 ** lines of: | 319 ** lines of: |
| 336 ** | 320 ** |
| 337 ** - utility functions | 321 ** - utility functions |
| 338 ** - table setup functions | 322 ** - table setup functions |
| 339 ** - table update functions | 323 ** - table update functions |
| 340 ** - table query functions | 324 ** - table query functions |
| 341 ** | 325 ** |
| 342 ** Put the query functions last because they're likely to reference | 326 ** Put the query functions last because they're likely to reference |
| 343 ** typedefs or functions from the table update section. | 327 ** typedefs or functions from the table update section. |
| 344 */ | 328 */ |
| 345 | 329 |
| 346 #if 0 | 330 #if 0 |
| 347 # define TRACE(A) printf A; fflush(stdout) | 331 # define TRACE(A) printf A; fflush(stdout) |
| 348 #else | 332 #else |
| 349 # define TRACE(A) | 333 # define TRACE(A) |
| 350 #endif | 334 #endif |
| 351 | 335 |
| 352 #if 0 | |
| 353 /* Useful to set breakpoints. See main.c sqlite3Corrupt(). */ | |
| 354 static int fts2Corrupt(void){ | |
| 355 return SQLITE_CORRUPT; | |
| 356 } | |
| 357 # define SQLITE_CORRUPT_BKPT fts2Corrupt() | |
| 358 #else | |
| 359 # define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT | |
| 360 #endif | |
| 361 | |
| 362 /* It is not safe to call isspace(), tolower(), or isalnum() on | 336 /* It is not safe to call isspace(), tolower(), or isalnum() on |
| 363 ** hi-bit-set characters. This is the same solution used in the | 337 ** hi-bit-set characters. This is the same solution used in the |
| 364 ** tokenizer. | 338 ** tokenizer. |
| 365 */ | 339 */ |
| 366 /* TODO(shess) The snippet-generation code should be using the | 340 /* TODO(shess) The snippet-generation code should be using the |
| 367 ** tokenizer-generated tokens rather than doing its own local | 341 ** tokenizer-generated tokens rather than doing its own local |
| 368 ** tokenization. | 342 ** tokenization. |
| 369 */ | 343 */ |
| 370 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ | 344 /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ |
| 371 static int safe_isspace(char c){ | 345 static int safe_isspace(char c){ |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 440 vu >>= 7; | 414 vu >>= 7; |
| 441 }while( vu!=0 ); | 415 }while( vu!=0 ); |
| 442 q[-1] &= 0x7f; /* turn off high bit in final byte */ | 416 q[-1] &= 0x7f; /* turn off high bit in final byte */ |
| 443 assert( q - (unsigned char *)p <= VARINT_MAX ); | 417 assert( q - (unsigned char *)p <= VARINT_MAX ); |
| 444 return (int) (q - (unsigned char *)p); | 418 return (int) (q - (unsigned char *)p); |
| 445 } | 419 } |
| 446 | 420 |
| 447 /* Read a 64-bit variable-length integer from memory starting at p[0]. | 421 /* Read a 64-bit variable-length integer from memory starting at p[0]. |
| 448 * Return the number of bytes read, or 0 on error. | 422 * Return the number of bytes read, or 0 on error. |
| 449 * The value is stored in *v. */ | 423 * The value is stored in *v. */ |
| 450 static int getVarintSafe(const char *p, sqlite_int64 *v, int max){ | 424 static int getVarint(const char *p, sqlite_int64 *v){ |
| 451 const unsigned char *q = (const unsigned char *) p; | 425 const unsigned char *q = (const unsigned char *) p; |
| 452 sqlite_uint64 x = 0, y = 1; | 426 sqlite_uint64 x = 0, y = 1; |
| 453 if( max>VARINT_MAX ) max = VARINT_MAX; | 427 while( (*q & 0x80) == 0x80 ){ |
| 454 while( max && (*q & 0x80) == 0x80 ){ | |
| 455 max--; | |
| 456 x += y * (*q++ & 0x7f); | 428 x += y * (*q++ & 0x7f); |
| 457 y <<= 7; | 429 y <<= 7; |
| 458 } | 430 if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ |
| 459 if ( !max ){ | 431 assert( 0 ); |
| 460 assert( 0 ); | 432 return 0; |
| 461 return 0; /* tried to read too much; bad data */ | 433 } |
| 462 } | 434 } |
| 463 x += y * (*q++); | 435 x += y * (*q++); |
| 464 *v = (sqlite_int64) x; | 436 *v = (sqlite_int64) x; |
| 465 return (int) (q - (unsigned char *)p); | 437 return (int) (q - (unsigned char *)p); |
| 466 } | 438 } |
| 467 | 439 |
| 468 static int getVarint(const char *p, sqlite_int64 *v){ | 440 static int getVarint32(const char *p, int *pi){ |
| 469 return getVarintSafe(p, v, VARINT_MAX); | |
| 470 } | |
| 471 | |
| 472 static int getVarint32Safe(const char *p, int *pi, int max){ | |
| 473 sqlite_int64 i; | 441 sqlite_int64 i; |
| 474 int ret = getVarintSafe(p, &i, max); | 442 int ret = getVarint(p, &i); |
| 475 if( !ret ) return ret; | |
| 476 *pi = (int) i; | 443 *pi = (int) i; |
| 477 assert( *pi==i ); | 444 assert( *pi==i ); |
| 478 return ret; | 445 return ret; |
| 479 } | 446 } |
| 480 | 447 |
| 481 static int getVarint32(const char* p, int *pi){ | |
| 482 return getVarint32Safe(p, pi, VARINT_MAX); | |
| 483 } | |
| 484 | |
| 485 /*******************************************************************/ | 448 /*******************************************************************/ |
| 486 /* DataBuffer is used to collect data into a buffer in piecemeal | 449 /* DataBuffer is used to collect data into a buffer in piecemeal |
| 487 ** fashion. It implements the usual distinction between amount of | 450 ** fashion. It implements the usual distinction between amount of |
| 488 ** data currently stored (nData) and buffer capacity (nCapacity). | 451 ** data currently stored (nData) and buffer capacity (nCapacity). |
| 489 ** | 452 ** |
| 490 ** dataBufferInit - create a buffer with given initial capacity. | 453 ** dataBufferInit - create a buffer with given initial capacity. |
| 491 ** dataBufferReset - forget buffer's data, retaining capacity. | 454 ** dataBufferReset - forget buffer's data, retaining capacity. |
| 492 ** dataBufferDestroy - free buffer's data. | 455 ** dataBufferDestroy - free buffer's data. |
| 493 ** dataBufferSwap - swap contents of two buffers. | 456 ** dataBufferSwap - swap contents of two buffers. |
| 494 ** dataBufferExpand - expand capacity without adding data. | 457 ** dataBufferExpand - expand capacity without adding data. |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 643 DocListType iType; | 606 DocListType iType; |
| 644 const char *pData; | 607 const char *pData; |
| 645 int nData; | 608 int nData; |
| 646 | 609 |
| 647 sqlite_int64 iDocid; | 610 sqlite_int64 iDocid; |
| 648 int nElement; | 611 int nElement; |
| 649 } DLReader; | 612 } DLReader; |
| 650 | 613 |
| 651 static int dlrAtEnd(DLReader *pReader){ | 614 static int dlrAtEnd(DLReader *pReader){ |
| 652 assert( pReader->nData>=0 ); | 615 assert( pReader->nData>=0 ); |
| 653 return pReader->nData<=0; | 616 return pReader->nData==0; |
| 654 } | 617 } |
| 655 static sqlite_int64 dlrDocid(DLReader *pReader){ | 618 static sqlite_int64 dlrDocid(DLReader *pReader){ |
| 656 assert( !dlrAtEnd(pReader) ); | 619 assert( !dlrAtEnd(pReader) ); |
| 657 return pReader->iDocid; | 620 return pReader->iDocid; |
| 658 } | 621 } |
| 659 static const char *dlrDocData(DLReader *pReader){ | 622 static const char *dlrDocData(DLReader *pReader){ |
| 660 assert( !dlrAtEnd(pReader) ); | 623 assert( !dlrAtEnd(pReader) ); |
| 661 return pReader->pData; | 624 return pReader->pData; |
| 662 } | 625 } |
| 663 static int dlrDocDataBytes(DLReader *pReader){ | 626 static int dlrDocDataBytes(DLReader *pReader){ |
| 664 assert( !dlrAtEnd(pReader) ); | 627 assert( !dlrAtEnd(pReader) ); |
| 665 return pReader->nElement; | 628 return pReader->nElement; |
| 666 } | 629 } |
| 667 static int dlrAllDataBytes(DLReader *pReader){ | 630 static int dlrAllDataBytes(DLReader *pReader){ |
| 668 assert( !dlrAtEnd(pReader) ); | 631 assert( !dlrAtEnd(pReader) ); |
| 669 return pReader->nData; | 632 return pReader->nData; |
| 670 } | 633 } |
| 671 /* TODO(shess) Consider adding a field to track iDocid varint length | 634 /* TODO(shess) Consider adding a field to track iDocid varint length |
| 672 ** to make these two functions faster. This might matter (a tiny bit) | 635 ** to make these two functions faster. This might matter (a tiny bit) |
| 673 ** for queries. | 636 ** for queries. |
| 674 */ | 637 */ |
| 675 static const char *dlrPosData(DLReader *pReader){ | 638 static const char *dlrPosData(DLReader *pReader){ |
| 676 sqlite_int64 iDummy; | 639 sqlite_int64 iDummy; |
| 677 int n = getVarintSafe(pReader->pData, &iDummy, pReader->nElement); | 640 int n = getVarint(pReader->pData, &iDummy); |
| 678 if( !n ) return NULL; | |
| 679 assert( !dlrAtEnd(pReader) ); | 641 assert( !dlrAtEnd(pReader) ); |
| 680 return pReader->pData+n; | 642 return pReader->pData+n; |
| 681 } | 643 } |
| 682 static int dlrPosDataLen(DLReader *pReader){ | 644 static int dlrPosDataLen(DLReader *pReader){ |
| 683 sqlite_int64 iDummy; | 645 sqlite_int64 iDummy; |
| 684 int n = getVarint(pReader->pData, &iDummy); | 646 int n = getVarint(pReader->pData, &iDummy); |
| 685 assert( !dlrAtEnd(pReader) ); | 647 assert( !dlrAtEnd(pReader) ); |
| 686 return pReader->nElement-n; | 648 return pReader->nElement-n; |
| 687 } | 649 } |
| 688 static int dlrStep(DLReader *pReader){ | 650 static void dlrStep(DLReader *pReader){ |
| 689 assert( !dlrAtEnd(pReader) ); | 651 assert( !dlrAtEnd(pReader) ); |
| 690 | 652 |
| 691 /* Skip past current doclist element. */ | 653 /* Skip past current doclist element. */ |
| 692 assert( pReader->nElement<=pReader->nData ); | 654 assert( pReader->nElement<=pReader->nData ); |
| 693 pReader->pData += pReader->nElement; | 655 pReader->pData += pReader->nElement; |
| 694 pReader->nData -= pReader->nElement; | 656 pReader->nData -= pReader->nElement; |
| 695 | 657 |
| 696 /* If there is more data, read the next doclist element. */ | 658 /* If there is more data, read the next doclist element. */ |
| 697 if( pReader->nData>0 ){ | 659 if( pReader->nData!=0 ){ |
| 698 sqlite_int64 iDocidDelta; | 660 sqlite_int64 iDocidDelta; |
| 699 int nTotal = 0; | 661 int iDummy, n = getVarint(pReader->pData, &iDocidDelta); |
| 700 int iDummy, n = getVarintSafe(pReader->pData, &iDocidDelta, pReader->nData); | |
| 701 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 702 nTotal += n; | |
| 703 pReader->iDocid += iDocidDelta; | 662 pReader->iDocid += iDocidDelta; |
| 704 if( pReader->iType>=DL_POSITIONS ){ | 663 if( pReader->iType>=DL_POSITIONS ){ |
| 664 assert( n<pReader->nData ); |
| 705 while( 1 ){ | 665 while( 1 ){ |
| 706 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 666 n += getVarint32(pReader->pData+n, &iDummy); |
| 707 pReader->nData-nTotal); | 667 assert( n<=pReader->nData ); |
| 708 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 709 nTotal += n; | |
| 710 if( iDummy==POS_END ) break; | 668 if( iDummy==POS_END ) break; |
| 711 if( iDummy==POS_COLUMN ){ | 669 if( iDummy==POS_COLUMN ){ |
| 712 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 670 n += getVarint32(pReader->pData+n, &iDummy); |
| 713 pReader->nData-nTotal); | 671 assert( n<pReader->nData ); |
| 714 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 715 nTotal += n; | |
| 716 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ | 672 }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ |
| 717 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | 673 n += getVarint32(pReader->pData+n, &iDummy); |
| 718 pReader->nData-nTotal); | 674 n += getVarint32(pReader->pData+n, &iDummy); |
| 719 if( !n ) return SQLITE_CORRUPT_BKPT; | 675 assert( n<pReader->nData ); |
| 720 nTotal += n; | |
| 721 n = getVarint32Safe(pReader->pData+nTotal, &iDummy, | |
| 722 pReader->nData-nTotal); | |
| 723 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 724 nTotal += n; | |
| 725 } | 676 } |
| 726 } | 677 } |
| 727 } | 678 } |
| 728 pReader->nElement = nTotal; | 679 pReader->nElement = n; |
| 729 assert( pReader->nElement<=pReader->nData ); | 680 assert( pReader->nElement<=pReader->nData ); |
| 730 } | 681 } |
| 731 return SQLITE_OK; | |
| 732 } | 682 } |
| 733 static void dlrDestroy(DLReader *pReader){ | 683 static void dlrInit(DLReader *pReader, DocListType iType, |
| 734 SCRAMBLE(pReader); | 684 const char *pData, int nData){ |
| 735 } | |
| 736 static int dlrInit(DLReader *pReader, DocListType iType, | |
| 737 const char *pData, int nData){ | |
| 738 int rc; | |
| 739 assert( pData!=NULL && nData!=0 ); | 685 assert( pData!=NULL && nData!=0 ); |
| 740 pReader->iType = iType; | 686 pReader->iType = iType; |
| 741 pReader->pData = pData; | 687 pReader->pData = pData; |
| 742 pReader->nData = nData; | 688 pReader->nData = nData; |
| 743 pReader->nElement = 0; | 689 pReader->nElement = 0; |
| 744 pReader->iDocid = 0; | 690 pReader->iDocid = 0; |
| 745 | 691 |
| 746 /* Load the first element's data. There must be a first element. */ | 692 /* Load the first element's data. There must be a first element. */ |
| 747 rc = dlrStep(pReader); | 693 dlrStep(pReader); |
| 748 if( rc!=SQLITE_OK ) dlrDestroy(pReader); | 694 } |
| 749 return rc; | 695 static void dlrDestroy(DLReader *pReader){ |
| 696 SCRAMBLE(pReader); |
| 750 } | 697 } |
| 751 | 698 |
| 752 #ifndef NDEBUG | 699 #ifndef NDEBUG |
| 753 /* Verify that the doclist can be validly decoded. Also returns the | 700 /* Verify that the doclist can be validly decoded. Also returns the |
| 754 ** last docid found because it is convenient in other assertions for | 701 ** last docid found because it is convenient in other assertions for |
| 755 ** DLWriter. | 702 ** DLWriter. |
| 756 */ | 703 */ |
| 757 static void docListValidate(DocListType iType, const char *pData, int nData, | 704 static void docListValidate(DocListType iType, const char *pData, int nData, |
| 758 sqlite_int64 *pLastDocid){ | 705 sqlite_int64 *pLastDocid){ |
| 759 sqlite_int64 iPrevDocid = 0; | 706 sqlite_int64 iPrevDocid = 0; |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 826 ** | 773 ** |
| 827 ** iLastDocid is the final docid in the doclist in pData. It is | 774 ** iLastDocid is the final docid in the doclist in pData. It is |
| 828 ** needed to create the new iPrevDocid for future delta-encoding. The | 775 ** needed to create the new iPrevDocid for future delta-encoding. The |
| 829 ** code could decode the passed doclist to recreate iLastDocid, but | 776 ** code could decode the passed doclist to recreate iLastDocid, but |
| 830 ** the only current user (docListMerge) already has decoded this | 777 ** the only current user (docListMerge) already has decoded this |
| 831 ** information. | 778 ** information. |
| 832 */ | 779 */ |
| 833 /* TODO(shess) This has become just a helper for docListMerge. | 780 /* TODO(shess) This has become just a helper for docListMerge. |
| 834 ** Consider a refactor to make this cleaner. | 781 ** Consider a refactor to make this cleaner. |
| 835 */ | 782 */ |
| 836 static int dlwAppend(DLWriter *pWriter, | 783 static void dlwAppend(DLWriter *pWriter, |
| 837 const char *pData, int nData, | 784 const char *pData, int nData, |
| 838 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ | 785 sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ |
| 839 sqlite_int64 iDocid = 0; | 786 sqlite_int64 iDocid = 0; |
| 840 char c[VARINT_MAX]; | 787 char c[VARINT_MAX]; |
| 841 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ | 788 int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ |
| 842 #ifndef NDEBUG | 789 #ifndef NDEBUG |
| 843 sqlite_int64 iLastDocidDelta; | 790 sqlite_int64 iLastDocidDelta; |
| 844 #endif | 791 #endif |
| 845 | 792 |
| 846 /* Recode the initial docid as delta from iPrevDocid. */ | 793 /* Recode the initial docid as delta from iPrevDocid. */ |
| 847 nFirstOld = getVarintSafe(pData, &iDocid, nData); | 794 nFirstOld = getVarint(pData, &iDocid); |
| 848 if( !nFirstOld ) return SQLITE_CORRUPT_BKPT; | |
| 849 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); | 795 assert( nFirstOld<nData || (nFirstOld==nData && pWriter->iType==DL_DOCIDS) ); |
| 850 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); | 796 nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); |
| 851 | 797 |
| 852 /* Verify that the incoming doclist is valid AND that it ends with | 798 /* Verify that the incoming doclist is valid AND that it ends with |
| 853 ** the expected docid. This is essential because we'll trust this | 799 ** the expected docid. This is essential because we'll trust this |
| 854 ** docid in future delta-encoding. | 800 ** docid in future delta-encoding. |
| 855 */ | 801 */ |
| 856 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); | 802 ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); |
| 857 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); | 803 assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); |
| 858 | 804 |
| 859 /* Append recoded initial docid and everything else. Rest of docids | 805 /* Append recoded initial docid and everything else. Rest of docids |
| 860 ** should have been delta-encoded from previous initial docid. | 806 ** should have been delta-encoded from previous initial docid. |
| 861 */ | 807 */ |
| 862 if( nFirstOld<nData ){ | 808 if( nFirstOld<nData ){ |
| 863 dataBufferAppend2(pWriter->b, c, nFirstNew, | 809 dataBufferAppend2(pWriter->b, c, nFirstNew, |
| 864 pData+nFirstOld, nData-nFirstOld); | 810 pData+nFirstOld, nData-nFirstOld); |
| 865 }else{ | 811 }else{ |
| 866 dataBufferAppend(pWriter->b, c, nFirstNew); | 812 dataBufferAppend(pWriter->b, c, nFirstNew); |
| 867 } | 813 } |
| 868 pWriter->iPrevDocid = iLastDocid; | 814 pWriter->iPrevDocid = iLastDocid; |
| 869 return SQLITE_OK; | |
| 870 } | 815 } |
| 871 static int dlwCopy(DLWriter *pWriter, DLReader *pReader){ | 816 static void dlwCopy(DLWriter *pWriter, DLReader *pReader){ |
| 872 return dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), | 817 dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), |
| 873 dlrDocid(pReader), dlrDocid(pReader)); | 818 dlrDocid(pReader), dlrDocid(pReader)); |
| 874 } | 819 } |
| 875 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ | 820 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ |
| 876 char c[VARINT_MAX]; | 821 char c[VARINT_MAX]; |
| 877 int n = putVarint(c, iDocid-pWriter->iPrevDocid); | 822 int n = putVarint(c, iDocid-pWriter->iPrevDocid); |
| 878 | 823 |
| 879 /* Docids must ascend. */ | 824 /* Docids must ascend. */ |
| 880 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); | 825 assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); |
| 881 assert( pWriter->iType==DL_DOCIDS ); | 826 assert( pWriter->iType==DL_DOCIDS ); |
| 882 | 827 |
| 883 dataBufferAppend(pWriter->b, c, n); | 828 dataBufferAppend(pWriter->b, c, n); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 924 return pReader->iPosition; | 869 return pReader->iPosition; |
| 925 } | 870 } |
| 926 static int plrStartOffset(PLReader *pReader){ | 871 static int plrStartOffset(PLReader *pReader){ |
| 927 assert( !plrAtEnd(pReader) ); | 872 assert( !plrAtEnd(pReader) ); |
| 928 return pReader->iStartOffset; | 873 return pReader->iStartOffset; |
| 929 } | 874 } |
| 930 static int plrEndOffset(PLReader *pReader){ | 875 static int plrEndOffset(PLReader *pReader){ |
| 931 assert( !plrAtEnd(pReader) ); | 876 assert( !plrAtEnd(pReader) ); |
| 932 return pReader->iEndOffset; | 877 return pReader->iEndOffset; |
| 933 } | 878 } |
| 934 static int plrStep(PLReader *pReader){ | 879 static void plrStep(PLReader *pReader){ |
| 935 int i, n, nTotal = 0; | 880 int i, n; |
| 936 | 881 |
| 937 assert( !plrAtEnd(pReader) ); | 882 assert( !plrAtEnd(pReader) ); |
| 938 | 883 |
| 939 if( pReader->nData<=0 ){ | 884 if( pReader->nData==0 ){ |
| 940 pReader->pData = NULL; | 885 pReader->pData = NULL; |
| 941 return SQLITE_OK; | 886 return; |
| 942 } | 887 } |
| 943 | 888 |
| 944 n = getVarint32Safe(pReader->pData, &i, pReader->nData); | 889 n = getVarint32(pReader->pData, &i); |
| 945 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 946 nTotal += n; | |
| 947 if( i==POS_COLUMN ){ | 890 if( i==POS_COLUMN ){ |
| 948 n = getVarint32Safe(pReader->pData+nTotal, &pReader->iColumn, | 891 n += getVarint32(pReader->pData+n, &pReader->iColumn); |
| 949 pReader->nData-nTotal); | |
| 950 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 951 nTotal += n; | |
| 952 pReader->iPosition = 0; | 892 pReader->iPosition = 0; |
| 953 pReader->iStartOffset = 0; | 893 pReader->iStartOffset = 0; |
| 954 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 894 n += getVarint32(pReader->pData+n, &i); |
| 955 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 956 nTotal += n; | |
| 957 } | 895 } |
| 958 /* Should never see adjacent column changes. */ | 896 /* Should never see adjacent column changes. */ |
| 959 assert( i!=POS_COLUMN ); | 897 assert( i!=POS_COLUMN ); |
| 960 | 898 |
| 961 if( i==POS_END ){ | 899 if( i==POS_END ){ |
| 962 assert( nTotal<=pReader->nData ); | |
| 963 pReader->nData = 0; | 900 pReader->nData = 0; |
| 964 pReader->pData = NULL; | 901 pReader->pData = NULL; |
| 965 return SQLITE_OK; | 902 return; |
| 966 } | 903 } |
| 967 | 904 |
| 968 pReader->iPosition += i-POS_BASE; | 905 pReader->iPosition += i-POS_BASE; |
| 969 if( pReader->iType==DL_POSITIONS_OFFSETS ){ | 906 if( pReader->iType==DL_POSITIONS_OFFSETS ){ |
| 970 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 907 n += getVarint32(pReader->pData+n, &i); |
| 971 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 972 nTotal += n; | |
| 973 pReader->iStartOffset += i; | 908 pReader->iStartOffset += i; |
| 974 n = getVarint32Safe(pReader->pData+nTotal, &i, pReader->nData-nTotal); | 909 n += getVarint32(pReader->pData+n, &i); |
| 975 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 976 nTotal += n; | |
| 977 pReader->iEndOffset = pReader->iStartOffset+i; | 910 pReader->iEndOffset = pReader->iStartOffset+i; |
| 978 } | 911 } |
| 979 assert( nTotal<=pReader->nData ); | 912 assert( n<=pReader->nData ); |
| 980 pReader->pData += nTotal; | 913 pReader->pData += n; |
| 981 pReader->nData -= nTotal; | 914 pReader->nData -= n; |
| 982 return SQLITE_OK; | |
| 983 } | 915 } |
| 984 | 916 |
| 985 static void plrDestroy(PLReader *pReader){ | 917 static void plrInit(PLReader *pReader, DLReader *pDLReader){ |
| 986 SCRAMBLE(pReader); | |
| 987 } | |
| 988 | |
| 989 static int plrInit(PLReader *pReader, DLReader *pDLReader){ | |
| 990 int rc; | |
| 991 pReader->pData = dlrPosData(pDLReader); | 918 pReader->pData = dlrPosData(pDLReader); |
| 992 pReader->nData = dlrPosDataLen(pDLReader); | 919 pReader->nData = dlrPosDataLen(pDLReader); |
| 993 pReader->iType = pDLReader->iType; | 920 pReader->iType = pDLReader->iType; |
| 994 pReader->iColumn = 0; | 921 pReader->iColumn = 0; |
| 995 pReader->iPosition = 0; | 922 pReader->iPosition = 0; |
| 996 pReader->iStartOffset = 0; | 923 pReader->iStartOffset = 0; |
| 997 pReader->iEndOffset = 0; | 924 pReader->iEndOffset = 0; |
| 998 rc = plrStep(pReader); | 925 plrStep(pReader); |
| 999 if( rc!=SQLITE_OK ) plrDestroy(pReader); | 926 } |
| 1000 return rc; | 927 static void plrDestroy(PLReader *pReader){ |
| 928 SCRAMBLE(pReader); |
| 1001 } | 929 } |
| 1002 | 930 |
| 1003 /*******************************************************************/ | 931 /*******************************************************************/ |
| 1004 /* PLWriter is used in constructing a document's position list. As a | 932 /* PLWriter is used in constructing a document's position list. As a |
| 1005 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. | 933 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. |
| 1006 ** PLWriter writes to the associated DLWriter's buffer. | 934 ** PLWriter writes to the associated DLWriter's buffer. |
| 1007 ** | 935 ** |
| 1008 ** plwInit - init for writing a document's poslist. | 936 ** plwInit - init for writing a document's poslist. |
| 1009 ** plwDestroy - clear a writer. | 937 ** plwDestroy - clear a writer. |
| 1010 ** plwAdd - append position and offset information. | 938 ** plwAdd - append position and offset information. |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1176 /* Copy the doclist data of iType in pData/nData into *out, trimming | 1104 /* Copy the doclist data of iType in pData/nData into *out, trimming |
| 1177 ** unnecessary data as we go. Only columns matching iColumn are | 1105 ** unnecessary data as we go. Only columns matching iColumn are |
| 1178 ** copied, all columns copied if iColumn is -1. Elements with no | 1106 ** copied, all columns copied if iColumn is -1. Elements with no |
| 1179 ** matching columns are dropped. The output is an iOutType doclist. | 1107 ** matching columns are dropped. The output is an iOutType doclist. |
| 1180 */ | 1108 */ |
| 1181 /* NOTE(shess) This code is only valid after all doclists are merged. | 1109 /* NOTE(shess) This code is only valid after all doclists are merged. |
| 1182 ** If this is run before merges, then doclist items which represent | 1110 ** If this is run before merges, then doclist items which represent |
| 1183 ** deletion will be trimmed, and will thus not effect a deletion | 1111 ** deletion will be trimmed, and will thus not effect a deletion |
| 1184 ** during the merge. | 1112 ** during the merge. |
| 1185 */ | 1113 */ |
| 1186 static int docListTrim(DocListType iType, const char *pData, int nData, | 1114 static void docListTrim(DocListType iType, const char *pData, int nData, |
| 1187 int iColumn, DocListType iOutType, DataBuffer *out){ | 1115 int iColumn, DocListType iOutType, DataBuffer *out){ |
| 1188 DLReader dlReader; | 1116 DLReader dlReader; |
| 1189 DLWriter dlWriter; | 1117 DLWriter dlWriter; |
| 1190 int rc; | |
| 1191 | 1118 |
| 1192 assert( iOutType<=iType ); | 1119 assert( iOutType<=iType ); |
| 1193 | 1120 |
| 1194 rc = dlrInit(&dlReader, iType, pData, nData); | 1121 dlrInit(&dlReader, iType, pData, nData); |
| 1195 if( rc!=SQLITE_OK ) return rc; | |
| 1196 dlwInit(&dlWriter, iOutType, out); | 1122 dlwInit(&dlWriter, iOutType, out); |
| 1197 | 1123 |
| 1198 while( !dlrAtEnd(&dlReader) ){ | 1124 while( !dlrAtEnd(&dlReader) ){ |
| 1199 PLReader plReader; | 1125 PLReader plReader; |
| 1200 PLWriter plWriter; | 1126 PLWriter plWriter; |
| 1201 int match = 0; | 1127 int match = 0; |
| 1202 | 1128 |
| 1203 rc = plrInit(&plReader, &dlReader); | 1129 plrInit(&plReader, &dlReader); |
| 1204 if( rc!=SQLITE_OK ) break; | |
| 1205 | 1130 |
| 1206 while( !plrAtEnd(&plReader) ){ | 1131 while( !plrAtEnd(&plReader) ){ |
| 1207 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ | 1132 if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ |
| 1208 if( !match ){ | 1133 if( !match ){ |
| 1209 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); | 1134 plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); |
| 1210 match = 1; | 1135 match = 1; |
| 1211 } | 1136 } |
| 1212 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), | 1137 plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), |
| 1213 plrStartOffset(&plReader), plrEndOffset(&plReader)); | 1138 plrStartOffset(&plReader), plrEndOffset(&plReader)); |
| 1214 } | 1139 } |
| 1215 rc = plrStep(&plReader); | 1140 plrStep(&plReader); |
| 1216 if( rc!=SQLITE_OK ){ | |
| 1217 plrDestroy(&plReader); | |
| 1218 goto err; | |
| 1219 } | |
| 1220 } | 1141 } |
| 1221 if( match ){ | 1142 if( match ){ |
| 1222 plwTerminate(&plWriter); | 1143 plwTerminate(&plWriter); |
| 1223 plwDestroy(&plWriter); | 1144 plwDestroy(&plWriter); |
| 1224 } | 1145 } |
| 1225 | 1146 |
| 1226 plrDestroy(&plReader); | 1147 plrDestroy(&plReader); |
| 1227 rc = dlrStep(&dlReader); | 1148 dlrStep(&dlReader); |
| 1228 if( rc!=SQLITE_OK ) break; | |
| 1229 } | 1149 } |
| 1230 err: | |
| 1231 dlwDestroy(&dlWriter); | 1150 dlwDestroy(&dlWriter); |
| 1232 dlrDestroy(&dlReader); | 1151 dlrDestroy(&dlReader); |
| 1233 return rc; | |
| 1234 } | 1152 } |
| 1235 | 1153 |
| 1236 /* Used by docListMerge() to keep doclists in the ascending order by | 1154 /* Used by docListMerge() to keep doclists in the ascending order by |
| 1237 ** docid, then ascending order by age (so the newest comes first). | 1155 ** docid, then ascending order by age (so the newest comes first). |
| 1238 */ | 1156 */ |
| 1239 typedef struct OrderedDLReader { | 1157 typedef struct OrderedDLReader { |
| 1240 DLReader *pReader; | 1158 DLReader *pReader; |
| 1241 | 1159 |
| 1242 /* TODO(shess) If we assume that docListMerge pReaders is ordered by | 1160 /* TODO(shess) If we assume that docListMerge pReaders is ordered by |
| 1243 ** age (which we do), then we could use pReader comparisons to break | 1161 ** age (which we do), then we could use pReader comparisons to break |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1280 } | 1198 } |
| 1281 | 1199 |
| 1282 /* Given an array of doclist readers, merge their doclist elements | 1200 /* Given an array of doclist readers, merge their doclist elements |
| 1283 ** into out in sorted order (by docid), dropping elements from older | 1201 ** into out in sorted order (by docid), dropping elements from older |
| 1284 ** readers when there is a duplicate docid. pReaders is assumed to be | 1202 ** readers when there is a duplicate docid. pReaders is assumed to be |
| 1285 ** ordered by age, oldest first. | 1203 ** ordered by age, oldest first. |
| 1286 */ | 1204 */ |
| 1287 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably | 1205 /* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably |
| 1288 ** be fixed. | 1206 ** be fixed. |
| 1289 */ | 1207 */ |
| 1290 static int docListMerge(DataBuffer *out, | 1208 static void docListMerge(DataBuffer *out, |
| 1291 DLReader *pReaders, int nReaders){ | 1209 DLReader *pReaders, int nReaders){ |
| 1292 OrderedDLReader readers[MERGE_COUNT]; | 1210 OrderedDLReader readers[MERGE_COUNT]; |
| 1293 DLWriter writer; | 1211 DLWriter writer; |
| 1294 int i, n; | 1212 int i, n; |
| 1295 const char *pStart = 0; | 1213 const char *pStart = 0; |
| 1296 int nStart = 0; | 1214 int nStart = 0; |
| 1297 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; | 1215 sqlite_int64 iFirstDocid = 0, iLastDocid = 0; |
| 1298 int rc = SQLITE_OK; | |
| 1299 | 1216 |
| 1300 assert( nReaders>0 ); | 1217 assert( nReaders>0 ); |
| 1301 if( nReaders==1 ){ | 1218 if( nReaders==1 ){ |
| 1302 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); | 1219 dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); |
| 1303 return SQLITE_OK; | 1220 return; |
| 1304 } | 1221 } |
| 1305 | 1222 |
| 1306 assert( nReaders<=MERGE_COUNT ); | 1223 assert( nReaders<=MERGE_COUNT ); |
| 1307 n = 0; | 1224 n = 0; |
| 1308 for(i=0; i<nReaders; i++){ | 1225 for(i=0; i<nReaders; i++){ |
| 1309 assert( pReaders[i].iType==pReaders[0].iType ); | 1226 assert( pReaders[i].iType==pReaders[0].iType ); |
| 1310 readers[i].pReader = pReaders+i; | 1227 readers[i].pReader = pReaders+i; |
| 1311 readers[i].idx = i; | 1228 readers[i].idx = i; |
| 1312 n += dlrAllDataBytes(&pReaders[i]); | 1229 n += dlrAllDataBytes(&pReaders[i]); |
| 1313 } | 1230 } |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1326 sqlite_int64 iDocid = dlrDocid(readers[0].pReader); | 1243 sqlite_int64 iDocid = dlrDocid(readers[0].pReader); |
| 1327 | 1244 |
| 1328 /* If this is a continuation of the current buffer to copy, extend | 1245 /* If this is a continuation of the current buffer to copy, extend |
| 1329 ** that buffer. memcpy() seems to be more efficient if it has a | 1246 ** that buffer. memcpy() seems to be more efficient if it has a |
| 1330 ** lots of data to copy. | 1247 ** lots of data to copy. |
| 1331 */ | 1248 */ |
| 1332 if( dlrDocData(readers[0].pReader)==pStart+nStart ){ | 1249 if( dlrDocData(readers[0].pReader)==pStart+nStart ){ |
| 1333 nStart += dlrDocDataBytes(readers[0].pReader); | 1250 nStart += dlrDocDataBytes(readers[0].pReader); |
| 1334 }else{ | 1251 }else{ |
| 1335 if( pStart!=0 ){ | 1252 if( pStart!=0 ){ |
| 1336 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); | 1253 dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); |
| 1337 if( rc!=SQLITE_OK ) goto err; | |
| 1338 } | 1254 } |
| 1339 pStart = dlrDocData(readers[0].pReader); | 1255 pStart = dlrDocData(readers[0].pReader); |
| 1340 nStart = dlrDocDataBytes(readers[0].pReader); | 1256 nStart = dlrDocDataBytes(readers[0].pReader); |
| 1341 iFirstDocid = iDocid; | 1257 iFirstDocid = iDocid; |
| 1342 } | 1258 } |
| 1343 iLastDocid = iDocid; | 1259 iLastDocid = iDocid; |
| 1344 rc = dlrStep(readers[0].pReader); | 1260 dlrStep(readers[0].pReader); |
| 1345 if( rc!=SQLITE_OK ) goto err; | |
| 1346 | 1261 |
| 1347 /* Drop all of the older elements with the same docid. */ | 1262 /* Drop all of the older elements with the same docid. */ |
| 1348 for(i=1; i<nReaders && | 1263 for(i=1; i<nReaders && |
| 1349 !dlrAtEnd(readers[i].pReader) && | 1264 !dlrAtEnd(readers[i].pReader) && |
| 1350 dlrDocid(readers[i].pReader)==iDocid; i++){ | 1265 dlrDocid(readers[i].pReader)==iDocid; i++){ |
| 1351 rc = dlrStep(readers[i].pReader); | 1266 dlrStep(readers[i].pReader); |
| 1352 if( rc!=SQLITE_OK ) goto err; | |
| 1353 } | 1267 } |
| 1354 | 1268 |
| 1355 /* Get the readers back into order. */ | 1269 /* Get the readers back into order. */ |
| 1356 while( i-->0 ){ | 1270 while( i-->0 ){ |
| 1357 orderedDLReaderReorder(readers+i, nReaders-i); | 1271 orderedDLReaderReorder(readers+i, nReaders-i); |
| 1358 } | 1272 } |
| 1359 } | 1273 } |
| 1360 | 1274 |
| 1361 /* Copy over any remaining elements. */ | 1275 /* Copy over any remaining elements. */ |
| 1362 if( nStart>0 ) | 1276 if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); |
| 1363 rc = dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); | |
| 1364 err: | |
| 1365 dlwDestroy(&writer); | 1277 dlwDestroy(&writer); |
| 1366 return rc; | |
| 1367 } | 1278 } |
| 1368 | 1279 |
| 1369 /* Helper function for posListUnion(). Compares the current position | 1280 /* Helper function for posListUnion(). Compares the current position |
| 1370 ** between left and right, returning as standard C idiom of <0 if | 1281 ** between left and right, returning as standard C idiom of <0 if |
| 1371 ** left<right, >0 if left>right, and 0 if left==right. "End" always | 1282 ** left<right, >0 if left>right, and 0 if left==right. "End" always |
| 1372 ** compares greater. | 1283 ** compares greater. |
| 1373 */ | 1284 */ |
| 1374 static int posListCmp(PLReader *pLeft, PLReader *pRight){ | 1285 static int posListCmp(PLReader *pLeft, PLReader *pRight){ |
| 1375 assert( pLeft->iType==pRight->iType ); | 1286 assert( pLeft->iType==pRight->iType ); |
| 1376 if( pLeft->iType==DL_DOCIDS ) return 0; | 1287 if( pLeft->iType==DL_DOCIDS ) return 0; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 1392 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; | 1303 if( plrEndOffset(pLeft)>plrEndOffset(pRight) ) return 1; |
| 1393 | 1304 |
| 1394 return 0; | 1305 return 0; |
| 1395 } | 1306 } |
| 1396 | 1307 |
| 1397 /* Write the union of position lists in pLeft and pRight to pOut. | 1308 /* Write the union of position lists in pLeft and pRight to pOut. |
| 1398 ** "Union" in this case meaning "All unique position tuples". Should | 1309 ** "Union" in this case meaning "All unique position tuples". Should |
| 1399 ** work with any doclist type, though both inputs and the output | 1310 ** work with any doclist type, though both inputs and the output |
| 1400 ** should be the same type. | 1311 ** should be the same type. |
| 1401 */ | 1312 */ |
| 1402 static int posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ | 1313 static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ |
| 1403 PLReader left, right; | 1314 PLReader left, right; |
| 1404 PLWriter writer; | 1315 PLWriter writer; |
| 1405 int rc; | |
| 1406 | 1316 |
| 1407 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); | 1317 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |
| 1408 assert( pLeft->iType==pRight->iType ); | 1318 assert( pLeft->iType==pRight->iType ); |
| 1409 assert( pLeft->iType==pOut->iType ); | 1319 assert( pLeft->iType==pOut->iType ); |
| 1410 | 1320 |
| 1411 rc = plrInit(&left, pLeft); | 1321 plrInit(&left, pLeft); |
| 1412 if( rc != SQLITE_OK ) return rc; | 1322 plrInit(&right, pRight); |
| 1413 rc = plrInit(&right, pRight); | |
| 1414 if( rc != SQLITE_OK ){ | |
| 1415 plrDestroy(&left); | |
| 1416 return rc; | |
| 1417 } | |
| 1418 plwInit(&writer, pOut, dlrDocid(pLeft)); | 1323 plwInit(&writer, pOut, dlrDocid(pLeft)); |
| 1419 | 1324 |
| 1420 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ | 1325 while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ |
| 1421 int c = posListCmp(&left, &right); | 1326 int c = posListCmp(&left, &right); |
| 1422 if( c<0 ){ | 1327 if( c<0 ){ |
| 1423 plwCopy(&writer, &left); | 1328 plwCopy(&writer, &left); |
| 1424 rc = plrStep(&left); | 1329 plrStep(&left); |
| 1425 if( rc != SQLITE_OK ) break; | |
| 1426 }else if( c>0 ){ | 1330 }else if( c>0 ){ |
| 1427 plwCopy(&writer, &right); | 1331 plwCopy(&writer, &right); |
| 1428 rc = plrStep(&right); | 1332 plrStep(&right); |
| 1429 if( rc != SQLITE_OK ) break; | |
| 1430 }else{ | 1333 }else{ |
| 1431 plwCopy(&writer, &left); | 1334 plwCopy(&writer, &left); |
| 1432 rc = plrStep(&left); | 1335 plrStep(&left); |
| 1433 if( rc != SQLITE_OK ) break; | 1336 plrStep(&right); |
| 1434 rc = plrStep(&right); | |
| 1435 if( rc != SQLITE_OK ) break; | |
| 1436 } | 1337 } |
| 1437 } | 1338 } |
| 1438 | 1339 |
| 1439 plwTerminate(&writer); | 1340 plwTerminate(&writer); |
| 1440 plwDestroy(&writer); | 1341 plwDestroy(&writer); |
| 1441 plrDestroy(&left); | 1342 plrDestroy(&left); |
| 1442 plrDestroy(&right); | 1343 plrDestroy(&right); |
| 1443 return rc; | |
| 1444 } | 1344 } |
| 1445 | 1345 |
| 1446 /* Write the union of doclists in pLeft and pRight to pOut. For | 1346 /* Write the union of doclists in pLeft and pRight to pOut. For |
| 1447 ** docids in common between the inputs, the union of the position | 1347 ** docids in common between the inputs, the union of the position |
| 1448 ** lists is written. Inputs and outputs are always type DL_DEFAULT. | 1348 ** lists is written. Inputs and outputs are always type DL_DEFAULT. |
| 1449 */ | 1349 */ |
| 1450 static int docListUnion( | 1350 static void docListUnion( |
| 1451 const char *pLeft, int nLeft, | 1351 const char *pLeft, int nLeft, |
| 1452 const char *pRight, int nRight, | 1352 const char *pRight, int nRight, |
| 1453 DataBuffer *pOut /* Write the combined doclist here */ | 1353 DataBuffer *pOut /* Write the combined doclist here */ |
| 1454 ){ | 1354 ){ |
| 1455 DLReader left, right; | 1355 DLReader left, right; |
| 1456 DLWriter writer; | 1356 DLWriter writer; |
| 1457 int rc; | |
| 1458 | 1357 |
| 1459 if( nLeft==0 ){ | 1358 if( nLeft==0 ){ |
| 1460 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); | 1359 if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); |
| 1461 return SQLITE_OK; | 1360 return; |
| 1462 } | 1361 } |
| 1463 if( nRight==0 ){ | 1362 if( nRight==0 ){ |
| 1464 dataBufferAppend(pOut, pLeft, nLeft); | 1363 dataBufferAppend(pOut, pLeft, nLeft); |
| 1465 return SQLITE_OK; | 1364 return; |
| 1466 } | 1365 } |
| 1467 | 1366 |
| 1468 rc = dlrInit(&left, DL_DEFAULT, pLeft, nLeft); | 1367 dlrInit(&left, DL_DEFAULT, pLeft, nLeft); |
| 1469 if( rc!=SQLITE_OK ) return rc; | 1368 dlrInit(&right, DL_DEFAULT, pRight, nRight); |
| 1470 rc = dlrInit(&right, DL_DEFAULT, pRight, nRight); | |
| 1471 if( rc!=SQLITE_OK ){ | |
| 1472 dlrDestroy(&left); | |
| 1473 return rc; | |
| 1474 } | |
| 1475 dlwInit(&writer, DL_DEFAULT, pOut); | 1369 dlwInit(&writer, DL_DEFAULT, pOut); |
| 1476 | 1370 |
| 1477 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ | 1371 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |
| 1478 if( dlrAtEnd(&right) ){ | 1372 if( dlrAtEnd(&right) ){ |
| 1479 rc = dlwCopy(&writer, &left); | 1373 dlwCopy(&writer, &left); |
| 1480 if( rc!=SQLITE_OK ) break; | 1374 dlrStep(&left); |
| 1481 rc = dlrStep(&left); | |
| 1482 if( rc!=SQLITE_OK ) break; | |
| 1483 }else if( dlrAtEnd(&left) ){ | 1375 }else if( dlrAtEnd(&left) ){ |
| 1484 rc = dlwCopy(&writer, &right); | 1376 dlwCopy(&writer, &right); |
| 1485 if( rc!=SQLITE_OK ) break; | 1377 dlrStep(&right); |
| 1486 rc = dlrStep(&right); | |
| 1487 if( rc!=SQLITE_OK ) break; | |
| 1488 }else if( dlrDocid(&left)<dlrDocid(&right) ){ | 1378 }else if( dlrDocid(&left)<dlrDocid(&right) ){ |
| 1489 rc = dlwCopy(&writer, &left); | 1379 dlwCopy(&writer, &left); |
| 1490 if( rc!=SQLITE_OK ) break; | 1380 dlrStep(&left); |
| 1491 rc = dlrStep(&left); | |
| 1492 if( rc!=SQLITE_OK ) break; | |
| 1493 }else if( dlrDocid(&left)>dlrDocid(&right) ){ | 1381 }else if( dlrDocid(&left)>dlrDocid(&right) ){ |
| 1494 rc = dlwCopy(&writer, &right); | 1382 dlwCopy(&writer, &right); |
| 1495 if( rc!=SQLITE_OK ) break; | 1383 dlrStep(&right); |
| 1496 rc = dlrStep(&right); | |
| 1497 if( rc!=SQLITE_OK ) break; | |
| 1498 }else{ | 1384 }else{ |
| 1499 rc = posListUnion(&left, &right, &writer); | 1385 posListUnion(&left, &right, &writer); |
| 1500 if( rc!=SQLITE_OK ) break; | 1386 dlrStep(&left); |
| 1501 rc = dlrStep(&left); | 1387 dlrStep(&right); |
| 1502 if( rc!=SQLITE_OK ) break; | |
| 1503 rc = dlrStep(&right); | |
| 1504 if( rc!=SQLITE_OK ) break; | |
| 1505 } | 1388 } |
| 1506 } | 1389 } |
| 1507 | 1390 |
| 1508 dlrDestroy(&left); | 1391 dlrDestroy(&left); |
| 1509 dlrDestroy(&right); | 1392 dlrDestroy(&right); |
| 1510 dlwDestroy(&writer); | 1393 dlwDestroy(&writer); |
| 1511 return rc; | |
| 1512 } | 1394 } |
| 1513 | 1395 |
| 1514 /* pLeft and pRight are DLReaders positioned to the same docid. | 1396 /* pLeft and pRight are DLReaders positioned to the same docid. |
| 1515 ** | 1397 ** |
| 1516 ** If there are no instances in pLeft or pRight where the position | 1398 ** If there are no instances in pLeft or pRight where the position |
| 1517 ** of pLeft is one less than the position of pRight, then this | 1399 ** of pLeft is one less than the position of pRight, then this |
| 1518 ** routine adds nothing to pOut. | 1400 ** routine adds nothing to pOut. |
| 1519 ** | 1401 ** |
| 1520 ** If there are one or more instances where positions from pLeft | 1402 ** If there are one or more instances where positions from pLeft |
| 1521 ** are exactly one less than positions from pRight, then add a new | 1403 ** are exactly one less than positions from pRight, then add a new |
| 1522 ** document record to pOut. If pOut wants to hold positions, then | 1404 ** document record to pOut. If pOut wants to hold positions, then |
| 1523 ** include the positions from pRight that are one more than a | 1405 ** include the positions from pRight that are one more than a |
| 1524 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. | 1406 ** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. |
| 1525 */ | 1407 */ |
| 1526 static int posListPhraseMerge(DLReader *pLeft, DLReader *pRight, | 1408 static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight, |
| 1527 DLWriter *pOut){ | 1409 DLWriter *pOut){ |
| 1528 PLReader left, right; | 1410 PLReader left, right; |
| 1529 PLWriter writer; | 1411 PLWriter writer; |
| 1530 int match = 0; | 1412 int match = 0; |
| 1531 int rc; | |
| 1532 | 1413 |
| 1533 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); | 1414 assert( dlrDocid(pLeft)==dlrDocid(pRight) ); |
| 1534 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); | 1415 assert( pOut->iType!=DL_POSITIONS_OFFSETS ); |
| 1535 | 1416 |
| 1536 rc = plrInit(&left, pLeft); | 1417 plrInit(&left, pLeft); |
| 1537 if( rc!=SQLITE_OK ) return rc; | 1418 plrInit(&right, pRight); |
| 1538 rc = plrInit(&right, pRight); | |
| 1539 if( rc!=SQLITE_OK ){ | |
| 1540 plrDestroy(&left); | |
| 1541 return rc; | |
| 1542 } | |
| 1543 | 1419 |
| 1544 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ | 1420 while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ |
| 1545 if( plrColumn(&left)<plrColumn(&right) ){ | 1421 if( plrColumn(&left)<plrColumn(&right) ){ |
| 1546 rc = plrStep(&left); | 1422 plrStep(&left); |
| 1547 if( rc!=SQLITE_OK ) break; | |
| 1548 }else if( plrColumn(&left)>plrColumn(&right) ){ | 1423 }else if( plrColumn(&left)>plrColumn(&right) ){ |
| 1549 rc = plrStep(&right); | 1424 plrStep(&right); |
| 1550 if( rc!=SQLITE_OK ) break; | |
| 1551 }else if( plrPosition(&left)+1<plrPosition(&right) ){ | 1425 }else if( plrPosition(&left)+1<plrPosition(&right) ){ |
| 1552 rc = plrStep(&left); | 1426 plrStep(&left); |
| 1553 if( rc!=SQLITE_OK ) break; | |
| 1554 }else if( plrPosition(&left)+1>plrPosition(&right) ){ | 1427 }else if( plrPosition(&left)+1>plrPosition(&right) ){ |
| 1555 rc = plrStep(&right); | 1428 plrStep(&right); |
| 1556 if( rc!=SQLITE_OK ) break; | |
| 1557 }else{ | 1429 }else{ |
| 1558 if( !match ){ | 1430 if( !match ){ |
| 1559 plwInit(&writer, pOut, dlrDocid(pLeft)); | 1431 plwInit(&writer, pOut, dlrDocid(pLeft)); |
| 1560 match = 1; | 1432 match = 1; |
| 1561 } | 1433 } |
| 1562 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); | 1434 plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); |
| 1563 rc = plrStep(&left); | 1435 plrStep(&left); |
| 1564 if( rc!=SQLITE_OK ) break; | 1436 plrStep(&right); |
| 1565 rc = plrStep(&right); | |
| 1566 if( rc!=SQLITE_OK ) break; | |
| 1567 } | 1437 } |
| 1568 } | 1438 } |
| 1569 | 1439 |
| 1570 if( match ){ | 1440 if( match ){ |
| 1571 plwTerminate(&writer); | 1441 plwTerminate(&writer); |
| 1572 plwDestroy(&writer); | 1442 plwDestroy(&writer); |
| 1573 } | 1443 } |
| 1574 | 1444 |
| 1575 plrDestroy(&left); | 1445 plrDestroy(&left); |
| 1576 plrDestroy(&right); | 1446 plrDestroy(&right); |
| 1577 return rc; | |
| 1578 } | 1447 } |
| 1579 | 1448 |
| 1580 /* We have two doclists with positions: pLeft and pRight. | 1449 /* We have two doclists with positions: pLeft and pRight. |
| 1581 ** Write the phrase intersection of these two doclists into pOut. | 1450 ** Write the phrase intersection of these two doclists into pOut. |
| 1582 ** | 1451 ** |
| 1583 ** A phrase intersection means that two documents only match | 1452 ** A phrase intersection means that two documents only match |
| 1584 ** if pLeft.iPos+1==pRight.iPos. | 1453 ** if pLeft.iPos+1==pRight.iPos. |
| 1585 ** | 1454 ** |
| 1586 ** iType controls the type of data written to pOut. If iType is | 1455 ** iType controls the type of data written to pOut. If iType is |
| 1587 ** DL_POSITIONS, the positions are those from pRight. | 1456 ** DL_POSITIONS, the positions are those from pRight. |
| 1588 */ | 1457 */ |
| 1589 static int docListPhraseMerge( | 1458 static void docListPhraseMerge( |
| 1590 const char *pLeft, int nLeft, | 1459 const char *pLeft, int nLeft, |
| 1591 const char *pRight, int nRight, | 1460 const char *pRight, int nRight, |
| 1592 DocListType iType, | 1461 DocListType iType, |
| 1593 DataBuffer *pOut /* Write the combined doclist here */ | 1462 DataBuffer *pOut /* Write the combined doclist here */ |
| 1594 ){ | 1463 ){ |
| 1595 DLReader left, right; | 1464 DLReader left, right; |
| 1596 DLWriter writer; | 1465 DLWriter writer; |
| 1597 int rc; | |
| 1598 | 1466 |
| 1599 if( nLeft==0 || nRight==0 ) return SQLITE_OK; | 1467 if( nLeft==0 || nRight==0 ) return; |
| 1600 | 1468 |
| 1601 assert( iType!=DL_POSITIONS_OFFSETS ); | 1469 assert( iType!=DL_POSITIONS_OFFSETS ); |
| 1602 | 1470 |
| 1603 rc = dlrInit(&left, DL_POSITIONS, pLeft, nLeft); | 1471 dlrInit(&left, DL_POSITIONS, pLeft, nLeft); |
| 1604 if( rc!=SQLITE_OK ) return rc; | 1472 dlrInit(&right, DL_POSITIONS, pRight, nRight); |
| 1605 rc = dlrInit(&right, DL_POSITIONS, pRight, nRight); | |
| 1606 if( rc!=SQLITE_OK ){ | |
| 1607 dlrDestroy(&left); | |
| 1608 return rc; | |
| 1609 } | |
| 1610 dlwInit(&writer, iType, pOut); | 1473 dlwInit(&writer, iType, pOut); |
| 1611 | 1474 |
| 1612 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ | 1475 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |
| 1613 if( dlrDocid(&left)<dlrDocid(&right) ){ | 1476 if( dlrDocid(&left)<dlrDocid(&right) ){ |
| 1614 rc = dlrStep(&left); | 1477 dlrStep(&left); |
| 1615 if( rc!=SQLITE_OK ) break; | |
| 1616 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1478 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
| 1617 rc = dlrStep(&right); | 1479 dlrStep(&right); |
| 1618 if( rc!=SQLITE_OK ) break; | |
| 1619 }else{ | 1480 }else{ |
| 1620 rc = posListPhraseMerge(&left, &right, &writer); | 1481 posListPhraseMerge(&left, &right, &writer); |
| 1621 if( rc!=SQLITE_OK ) break; | 1482 dlrStep(&left); |
| 1622 rc = dlrStep(&left); | 1483 dlrStep(&right); |
| 1623 if( rc!=SQLITE_OK ) break; | |
| 1624 rc = dlrStep(&right); | |
| 1625 if( rc!=SQLITE_OK ) break; | |
| 1626 } | 1484 } |
| 1627 } | 1485 } |
| 1628 | 1486 |
| 1629 dlrDestroy(&left); | 1487 dlrDestroy(&left); |
| 1630 dlrDestroy(&right); | 1488 dlrDestroy(&right); |
| 1631 dlwDestroy(&writer); | 1489 dlwDestroy(&writer); |
| 1632 return rc; | |
| 1633 } | 1490 } |
| 1634 | 1491 |
| 1635 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1492 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
| 1636 ** Write the intersection of these two doclists into pOut as a | 1493 ** Write the intersection of these two doclists into pOut as a |
| 1637 ** DL_DOCIDS doclist. | 1494 ** DL_DOCIDS doclist. |
| 1638 */ | 1495 */ |
| 1639 static int docListAndMerge( | 1496 static void docListAndMerge( |
| 1640 const char *pLeft, int nLeft, | 1497 const char *pLeft, int nLeft, |
| 1641 const char *pRight, int nRight, | 1498 const char *pRight, int nRight, |
| 1642 DataBuffer *pOut /* Write the combined doclist here */ | 1499 DataBuffer *pOut /* Write the combined doclist here */ |
| 1643 ){ | 1500 ){ |
| 1644 DLReader left, right; | 1501 DLReader left, right; |
| 1645 DLWriter writer; | 1502 DLWriter writer; |
| 1646 int rc; | |
| 1647 | 1503 |
| 1648 if( nLeft==0 || nRight==0 ) return SQLITE_OK; | 1504 if( nLeft==0 || nRight==0 ) return; |
| 1649 | 1505 |
| 1650 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1506 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
| 1651 if( rc!=SQLITE_OK ) return rc; | 1507 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
| 1652 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
| 1653 if( rc!=SQLITE_OK ){ | |
| 1654 dlrDestroy(&left); | |
| 1655 return rc; | |
| 1656 } | |
| 1657 dlwInit(&writer, DL_DOCIDS, pOut); | 1508 dlwInit(&writer, DL_DOCIDS, pOut); |
| 1658 | 1509 |
| 1659 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ | 1510 while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ |
| 1660 if( dlrDocid(&left)<dlrDocid(&right) ){ | 1511 if( dlrDocid(&left)<dlrDocid(&right) ){ |
| 1661 rc = dlrStep(&left); | 1512 dlrStep(&left); |
| 1662 if( rc!=SQLITE_OK ) break; | |
| 1663 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1513 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
| 1664 rc = dlrStep(&right); | 1514 dlrStep(&right); |
| 1665 if( rc!=SQLITE_OK ) break; | |
| 1666 }else{ | 1515 }else{ |
| 1667 dlwAdd(&writer, dlrDocid(&left)); | 1516 dlwAdd(&writer, dlrDocid(&left)); |
| 1668 rc = dlrStep(&left); | 1517 dlrStep(&left); |
| 1669 if( rc!=SQLITE_OK ) break; | 1518 dlrStep(&right); |
| 1670 rc = dlrStep(&right); | |
| 1671 if( rc!=SQLITE_OK ) break; | |
| 1672 } | 1519 } |
| 1673 } | 1520 } |
| 1674 | 1521 |
| 1675 dlrDestroy(&left); | 1522 dlrDestroy(&left); |
| 1676 dlrDestroy(&right); | 1523 dlrDestroy(&right); |
| 1677 dlwDestroy(&writer); | 1524 dlwDestroy(&writer); |
| 1678 return rc; | |
| 1679 } | 1525 } |
| 1680 | 1526 |
| 1681 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1527 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
| 1682 ** Write the union of these two doclists into pOut as a | 1528 ** Write the union of these two doclists into pOut as a |
| 1683 ** DL_DOCIDS doclist. | 1529 ** DL_DOCIDS doclist. |
| 1684 */ | 1530 */ |
| 1685 static int docListOrMerge( | 1531 static void docListOrMerge( |
| 1686 const char *pLeft, int nLeft, | 1532 const char *pLeft, int nLeft, |
| 1687 const char *pRight, int nRight, | 1533 const char *pRight, int nRight, |
| 1688 DataBuffer *pOut /* Write the combined doclist here */ | 1534 DataBuffer *pOut /* Write the combined doclist here */ |
| 1689 ){ | 1535 ){ |
| 1690 DLReader left, right; | 1536 DLReader left, right; |
| 1691 DLWriter writer; | 1537 DLWriter writer; |
| 1692 int rc; | |
| 1693 | 1538 |
| 1694 if( nLeft==0 ){ | 1539 if( nLeft==0 ){ |
| 1695 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); | 1540 if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight); |
| 1696 return SQLITE_OK; | 1541 return; |
| 1697 } | 1542 } |
| 1698 if( nRight==0 ){ | 1543 if( nRight==0 ){ |
| 1699 dataBufferAppend(pOut, pLeft, nLeft); | 1544 dataBufferAppend(pOut, pLeft, nLeft); |
| 1700 return SQLITE_OK; | 1545 return; |
| 1701 } | 1546 } |
| 1702 | 1547 |
| 1703 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1548 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
| 1704 if( rc!=SQLITE_OK ) return rc; | 1549 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
| 1705 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
| 1706 if( rc!=SQLITE_OK ){ | |
| 1707 dlrDestroy(&left); | |
| 1708 return rc; | |
| 1709 } | |
| 1710 dlwInit(&writer, DL_DOCIDS, pOut); | 1550 dlwInit(&writer, DL_DOCIDS, pOut); |
| 1711 | 1551 |
| 1712 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ | 1552 while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ |
| 1713 if( dlrAtEnd(&right) ){ | 1553 if( dlrAtEnd(&right) ){ |
| 1714 dlwAdd(&writer, dlrDocid(&left)); | 1554 dlwAdd(&writer, dlrDocid(&left)); |
| 1715 rc = dlrStep(&left); | 1555 dlrStep(&left); |
| 1716 if( rc!=SQLITE_OK ) break; | |
| 1717 }else if( dlrAtEnd(&left) ){ | 1556 }else if( dlrAtEnd(&left) ){ |
| 1718 dlwAdd(&writer, dlrDocid(&right)); | 1557 dlwAdd(&writer, dlrDocid(&right)); |
| 1719 rc = dlrStep(&right); | 1558 dlrStep(&right); |
| 1720 if( rc!=SQLITE_OK ) break; | |
| 1721 }else if( dlrDocid(&left)<dlrDocid(&right) ){ | 1559 }else if( dlrDocid(&left)<dlrDocid(&right) ){ |
| 1722 dlwAdd(&writer, dlrDocid(&left)); | 1560 dlwAdd(&writer, dlrDocid(&left)); |
| 1723 rc = dlrStep(&left); | 1561 dlrStep(&left); |
| 1724 if( rc!=SQLITE_OK ) break; | |
| 1725 }else if( dlrDocid(&right)<dlrDocid(&left) ){ | 1562 }else if( dlrDocid(&right)<dlrDocid(&left) ){ |
| 1726 dlwAdd(&writer, dlrDocid(&right)); | 1563 dlwAdd(&writer, dlrDocid(&right)); |
| 1727 rc = dlrStep(&right); | 1564 dlrStep(&right); |
| 1728 if( rc!=SQLITE_OK ) break; | |
| 1729 }else{ | 1565 }else{ |
| 1730 dlwAdd(&writer, dlrDocid(&left)); | 1566 dlwAdd(&writer, dlrDocid(&left)); |
| 1731 rc = dlrStep(&left); | 1567 dlrStep(&left); |
| 1732 if( rc!=SQLITE_OK ) break; | 1568 dlrStep(&right); |
| 1733 rc = dlrStep(&right); | |
| 1734 if( rc!=SQLITE_OK ) break; | |
| 1735 } | 1569 } |
| 1736 } | 1570 } |
| 1737 | 1571 |
| 1738 dlrDestroy(&left); | 1572 dlrDestroy(&left); |
| 1739 dlrDestroy(&right); | 1573 dlrDestroy(&right); |
| 1740 dlwDestroy(&writer); | 1574 dlwDestroy(&writer); |
| 1741 return rc; | |
| 1742 } | 1575 } |
| 1743 | 1576 |
| 1744 /* We have two DL_DOCIDS doclists: pLeft and pRight. | 1577 /* We have two DL_DOCIDS doclists: pLeft and pRight. |
| 1745 ** Write into pOut as DL_DOCIDS doclist containing all documents that | 1578 ** Write into pOut as DL_DOCIDS doclist containing all documents that |
| 1746 ** occur in pLeft but not in pRight. | 1579 ** occur in pLeft but not in pRight. |
| 1747 */ | 1580 */ |
| 1748 static int docListExceptMerge( | 1581 static void docListExceptMerge( |
| 1749 const char *pLeft, int nLeft, | 1582 const char *pLeft, int nLeft, |
| 1750 const char *pRight, int nRight, | 1583 const char *pRight, int nRight, |
| 1751 DataBuffer *pOut /* Write the combined doclist here */ | 1584 DataBuffer *pOut /* Write the combined doclist here */ |
| 1752 ){ | 1585 ){ |
| 1753 DLReader left, right; | 1586 DLReader left, right; |
| 1754 DLWriter writer; | 1587 DLWriter writer; |
| 1755 int rc; | |
| 1756 | 1588 |
| 1757 if( nLeft==0 ) return SQLITE_OK; | 1589 if( nLeft==0 ) return; |
| 1758 if( nRight==0 ){ | 1590 if( nRight==0 ){ |
| 1759 dataBufferAppend(pOut, pLeft, nLeft); | 1591 dataBufferAppend(pOut, pLeft, nLeft); |
| 1760 return SQLITE_OK; | 1592 return; |
| 1761 } | 1593 } |
| 1762 | 1594 |
| 1763 rc = dlrInit(&left, DL_DOCIDS, pLeft, nLeft); | 1595 dlrInit(&left, DL_DOCIDS, pLeft, nLeft); |
| 1764 if( rc!=SQLITE_OK ) return rc; | 1596 dlrInit(&right, DL_DOCIDS, pRight, nRight); |
| 1765 rc = dlrInit(&right, DL_DOCIDS, pRight, nRight); | |
| 1766 if( rc!=SQLITE_OK ){ | |
| 1767 dlrDestroy(&left); | |
| 1768 return rc; | |
| 1769 } | |
| 1770 dlwInit(&writer, DL_DOCIDS, pOut); | 1597 dlwInit(&writer, DL_DOCIDS, pOut); |
| 1771 | 1598 |
| 1772 while( !dlrAtEnd(&left) ){ | 1599 while( !dlrAtEnd(&left) ){ |
| 1773 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ | 1600 while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){ |
| 1774 rc = dlrStep(&right); | 1601 dlrStep(&right); |
| 1775 if( rc!=SQLITE_OK ) goto err; | |
| 1776 } | 1602 } |
| 1777 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ | 1603 if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){ |
| 1778 dlwAdd(&writer, dlrDocid(&left)); | 1604 dlwAdd(&writer, dlrDocid(&left)); |
| 1779 } | 1605 } |
| 1780 rc = dlrStep(&left); | 1606 dlrStep(&left); |
| 1781 if( rc!=SQLITE_OK ) break; | |
| 1782 } | 1607 } |
| 1783 | 1608 |
| 1784 err: | |
| 1785 dlrDestroy(&left); | 1609 dlrDestroy(&left); |
| 1786 dlrDestroy(&right); | 1610 dlrDestroy(&right); |
| 1787 dlwDestroy(&writer); | 1611 dlwDestroy(&writer); |
| 1788 return rc; | |
| 1789 } | 1612 } |
| 1790 | 1613 |
| 1791 static char *string_dup_n(const char *s, int n){ | 1614 static char *string_dup_n(const char *s, int n){ |
| 1792 char *str = sqlite3_malloc(n + 1); | 1615 char *str = sqlite3_malloc(n + 1); |
| 1793 memcpy(str, s, n); | 1616 memcpy(str, s, n); |
| 1794 str[n] = '\0'; | 1617 str[n] = '\0'; |
| 1795 return str; | 1618 return str; |
| 1796 } | 1619 } |
| 1797 | 1620 |
| 1798 /* Duplicate a string; the caller must free() the returned string. | 1621 /* Duplicate a string; the caller must free() the returned string. |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1982 /* CONTENT_EXISTS */ "select rowid from %_content limit 1", | 1805 /* CONTENT_EXISTS */ "select rowid from %_content limit 1", |
| 1983 | 1806 |
| 1984 /* BLOCK_INSERT */ "insert into %_segments values (?)", | 1807 /* BLOCK_INSERT */ "insert into %_segments values (?)", |
| 1985 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", | 1808 /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", |
| 1986 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", | 1809 /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", |
| 1987 /* BLOCK_DELETE_ALL */ "delete from %_segments", | 1810 /* BLOCK_DELETE_ALL */ "delete from %_segments", |
| 1988 | 1811 |
| 1989 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", | 1812 /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", |
| 1990 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", | 1813 /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", |
| 1991 /* SEGDIR_SELECT_LEVEL */ | 1814 /* SEGDIR_SELECT_LEVEL */ |
| 1992 "select start_block, leaves_end_block, root, idx from %_segdir " | 1815 "select start_block, leaves_end_block, root from %_segdir " |
| 1993 " where level = ? order by idx", | 1816 " where level = ? order by idx", |
| 1994 /* SEGDIR_SPAN */ | 1817 /* SEGDIR_SPAN */ |
| 1995 "select min(start_block), max(end_block) from %_segdir " | 1818 "select min(start_block), max(end_block) from %_segdir " |
| 1996 " where level = ? and start_block <> 0", | 1819 " where level = ? and start_block <> 0", |
| 1997 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", | 1820 /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", |
| 1998 | 1821 |
| 1999 /* NOTE(shess): The first three results of the following two | 1822 /* NOTE(shess): The first three results of the following two |
| 2000 ** statements must match. | 1823 ** statements must match. |
| 2001 */ | 1824 */ |
| 2002 /* SEGDIR_SELECT_SEGMENT */ | 1825 /* SEGDIR_SELECT_SEGMENT */ |
| (...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3581 } | 3404 } |
| 3582 } else { /* full-text query */ | 3405 } else { /* full-text query */ |
| 3583 rc = sqlite3_reset(c->pStmt); | 3406 rc = sqlite3_reset(c->pStmt); |
| 3584 if( rc!=SQLITE_OK ) return rc; | 3407 if( rc!=SQLITE_OK ) return rc; |
| 3585 | 3408 |
| 3586 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ | 3409 if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ |
| 3587 c->eof = 1; | 3410 c->eof = 1; |
| 3588 return SQLITE_OK; | 3411 return SQLITE_OK; |
| 3589 } | 3412 } |
| 3590 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); | 3413 rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); |
| 3591 if( rc!=SQLITE_OK ) return rc; | 3414 dlrStep(&c->reader); |
| 3592 rc = dlrStep(&c->reader); | |
| 3593 if( rc!=SQLITE_OK ) return rc; | 3415 if( rc!=SQLITE_OK ) return rc; |
| 3594 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ | 3416 /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ |
| 3595 rc = sqlite3_step(c->pStmt); | 3417 rc = sqlite3_step(c->pStmt); |
| 3596 if( rc==SQLITE_ROW ){ /* the case we expect */ | 3418 if( rc==SQLITE_ROW ){ /* the case we expect */ |
| 3597 c->eof = 0; | 3419 c->eof = 0; |
| 3598 return SQLITE_OK; | 3420 return SQLITE_OK; |
| 3599 } | 3421 } |
| 3600 | 3422 /* an error occurred; abort */ |
| 3601 /* Corrupt if the index refers to missing document. */ | 3423 return rc==SQLITE_DONE ? SQLITE_ERROR : rc; |
| 3602 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; | |
| 3603 | |
| 3604 return rc; | |
| 3605 } | 3424 } |
| 3606 } | 3425 } |
| 3607 | 3426 |
| 3608 | 3427 |
| 3609 /* TODO(shess) If we pushed LeafReader to the top of the file, or to | 3428 /* TODO(shess) If we pushed LeafReader to the top of the file, or to |
| 3610 ** another file, term_select() could be pushed above | 3429 ** another file, term_select() could be pushed above |
| 3611 ** docListOfTerm(). | 3430 ** docListOfTerm(). |
| 3612 */ | 3431 */ |
| 3613 static int termSelect(fulltext_vtab *v, int iColumn, | 3432 static int termSelect(fulltext_vtab *v, int iColumn, |
| 3614 const char *pTerm, int nTerm, int isPrefix, | 3433 const char *pTerm, int nTerm, int isPrefix, |
| (...skipping 27 matching lines...) Expand all Loading... |
| 3642 if( rc ) return rc; | 3461 if( rc ) return rc; |
| 3643 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ | 3462 for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ |
| 3644 dataBufferInit(&right, 0); | 3463 dataBufferInit(&right, 0); |
| 3645 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, | 3464 rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, |
| 3646 pQTerm[i].isPrefix, DL_POSITIONS, &right); | 3465 pQTerm[i].isPrefix, DL_POSITIONS, &right); |
| 3647 if( rc ){ | 3466 if( rc ){ |
| 3648 dataBufferDestroy(&left); | 3467 dataBufferDestroy(&left); |
| 3649 return rc; | 3468 return rc; |
| 3650 } | 3469 } |
| 3651 dataBufferInit(&new, 0); | 3470 dataBufferInit(&new, 0); |
| 3652 rc = docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, | 3471 docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, |
| 3653 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); | 3472 i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new); |
| 3654 dataBufferDestroy(&left); | 3473 dataBufferDestroy(&left); |
| 3655 dataBufferDestroy(&right); | 3474 dataBufferDestroy(&right); |
| 3656 if( rc!=SQLITE_OK ){ | |
| 3657 dataBufferDestroy(&new); | |
| 3658 return rc; | |
| 3659 } | |
| 3660 left = new; | 3475 left = new; |
| 3661 } | 3476 } |
| 3662 *pResult = left; | 3477 *pResult = left; |
| 3663 return rc; | 3478 return SQLITE_OK; |
| 3664 } | 3479 } |
| 3665 | 3480 |
| 3666 /* Add a new term pTerm[0..nTerm-1] to the query *q. | 3481 /* Add a new term pTerm[0..nTerm-1] to the query *q. |
| 3667 */ | 3482 */ |
| 3668 static void queryAdd(Query *q, const char *pTerm, int nTerm){ | 3483 static void queryAdd(Query *q, const char *pTerm, int nTerm){ |
| 3669 QueryTerm *t; | 3484 QueryTerm *t; |
| 3670 ++q->nTerms; | 3485 ++q->nTerms; |
| 3671 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); | 3486 q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); |
| 3672 if( q->pTerms==0 ){ | 3487 if( q->pTerms==0 ){ |
| 3673 q->nTerms = 0; | 3488 q->nTerms = 0; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3720 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ | 3535 sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ |
| 3721 const char *pSegment, int nSegment, /* Query expression being parsed */ | 3536 const char *pSegment, int nSegment, /* Query expression being parsed */ |
| 3722 int inPhrase, /* True if within "..." */ | 3537 int inPhrase, /* True if within "..." */ |
| 3723 Query *pQuery /* Append results here */ | 3538 Query *pQuery /* Append results here */ |
| 3724 ){ | 3539 ){ |
| 3725 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; | 3540 const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; |
| 3726 sqlite3_tokenizer_cursor *pCursor; | 3541 sqlite3_tokenizer_cursor *pCursor; |
| 3727 int firstIndex = pQuery->nTerms; | 3542 int firstIndex = pQuery->nTerms; |
| 3728 int iCol; | 3543 int iCol; |
| 3729 int nTerm = 1; | 3544 int nTerm = 1; |
| 3730 int iEndLast = -1; | |
| 3731 | 3545 |
| 3732 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); | 3546 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); |
| 3733 if( rc!=SQLITE_OK ) return rc; | 3547 if( rc!=SQLITE_OK ) return rc; |
| 3734 pCursor->pTokenizer = pTokenizer; | 3548 pCursor->pTokenizer = pTokenizer; |
| 3735 | 3549 |
| 3736 while( 1 ){ | 3550 while( 1 ){ |
| 3737 const char *pToken; | 3551 const char *pToken; |
| 3738 int nToken, iBegin, iEnd, iPos; | 3552 int nToken, iBegin, iEnd, iPos; |
| 3739 | 3553 |
| 3740 rc = pModule->xNext(pCursor, | 3554 rc = pModule->xNext(pCursor, |
| 3741 &pToken, &nToken, | 3555 &pToken, &nToken, |
| 3742 &iBegin, &iEnd, &iPos); | 3556 &iBegin, &iEnd, &iPos); |
| 3743 if( rc!=SQLITE_OK ) break; | 3557 if( rc!=SQLITE_OK ) break; |
| 3744 if( !inPhrase && | 3558 if( !inPhrase && |
| 3745 pSegment[iEnd]==':' && | 3559 pSegment[iEnd]==':' && |
| 3746 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ | 3560 (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ |
| 3747 pQuery->nextColumn = iCol; | 3561 pQuery->nextColumn = iCol; |
| 3748 continue; | 3562 continue; |
| 3749 } | 3563 } |
| 3750 if( !inPhrase && pQuery->nTerms>0 && nToken==2 | 3564 if( !inPhrase && pQuery->nTerms>0 && nToken==2 |
| 3751 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ | 3565 && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ |
| 3752 pQuery->nextIsOr = 1; | 3566 pQuery->nextIsOr = 1; |
| 3753 continue; | 3567 continue; |
| 3754 } | 3568 } |
| 3755 | |
| 3756 /* | |
| 3757 * The ICU tokenizer considers '*' a break character, so the code below | |
| 3758 * sets isPrefix correctly, but since that code doesn't eat the '*', the | |
| 3759 * ICU tokenizer returns it as the next token. So eat it here until a | |
| 3760 * better solution presents itself. | |
| 3761 */ | |
| 3762 if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && | |
| 3763 iEndLast==iBegin){ | |
| 3764 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | |
| 3765 continue; | |
| 3766 } | |
| 3767 iEndLast = iEnd; | |
| 3768 | |
| 3769 queryAdd(pQuery, pToken, nToken); | 3569 queryAdd(pQuery, pToken, nToken); |
| 3770 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ | 3570 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ |
| 3771 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; | 3571 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; |
| 3772 } | 3572 } |
| 3773 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ | 3573 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ |
| 3774 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | 3574 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; |
| 3775 } | 3575 } |
| 3776 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; | 3576 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; |
| 3777 if( inPhrase ){ | 3577 if( inPhrase ){ |
| 3778 nTerm++; | 3578 nTerm++; |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3898 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ | 3698 while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){ |
| 3899 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); | 3699 rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); |
| 3900 iNext += aTerm[iNext].nPhrase + 1; | 3700 iNext += aTerm[iNext].nPhrase + 1; |
| 3901 if( rc ){ | 3701 if( rc ){ |
| 3902 if( i!=nNot ) dataBufferDestroy(&left); | 3702 if( i!=nNot ) dataBufferDestroy(&left); |
| 3903 dataBufferDestroy(&right); | 3703 dataBufferDestroy(&right); |
| 3904 queryClear(pQuery); | 3704 queryClear(pQuery); |
| 3905 return rc; | 3705 return rc; |
| 3906 } | 3706 } |
| 3907 dataBufferInit(&new, 0); | 3707 dataBufferInit(&new, 0); |
| 3908 rc = docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); | 3708 docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); |
| 3909 dataBufferDestroy(&right); | 3709 dataBufferDestroy(&right); |
| 3910 dataBufferDestroy(&or); | 3710 dataBufferDestroy(&or); |
| 3911 if( rc!=SQLITE_OK ){ | |
| 3912 if( i!=nNot ) dataBufferDestroy(&left); | |
| 3913 queryClear(pQuery); | |
| 3914 dataBufferDestroy(&new); | |
| 3915 return rc; | |
| 3916 } | |
| 3917 right = new; | 3711 right = new; |
| 3918 } | 3712 } |
| 3919 if( i==nNot ){ /* first term processed. */ | 3713 if( i==nNot ){ /* first term processed. */ |
| 3920 left = right; | 3714 left = right; |
| 3921 }else{ | 3715 }else{ |
| 3922 dataBufferInit(&new, 0); | 3716 dataBufferInit(&new, 0); |
| 3923 rc = docListAndMerge(left.pData, left.nData, | 3717 docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new); |
| 3924 right.pData, right.nData, &new); | |
| 3925 dataBufferDestroy(&right); | 3718 dataBufferDestroy(&right); |
| 3926 dataBufferDestroy(&left); | 3719 dataBufferDestroy(&left); |
| 3927 if( rc!=SQLITE_OK ){ | |
| 3928 queryClear(pQuery); | |
| 3929 dataBufferDestroy(&new); | |
| 3930 return rc; | |
| 3931 } | |
| 3932 left = new; | 3720 left = new; |
| 3933 } | 3721 } |
| 3934 } | 3722 } |
| 3935 | 3723 |
| 3936 if( nNot==pQuery->nTerms ){ | 3724 if( nNot==pQuery->nTerms ){ |
| 3937 /* We do not yet know how to handle a query of only NOT terms */ | 3725 /* We do not yet know how to handle a query of only NOT terms */ |
| 3938 return SQLITE_ERROR; | 3726 return SQLITE_ERROR; |
| 3939 } | 3727 } |
| 3940 | 3728 |
| 3941 /* Do the EXCEPT terms */ | 3729 /* Do the EXCEPT terms */ |
| 3942 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){ | 3730 for(i=0; i<pQuery->nTerms; i += aTerm[i].nPhrase + 1){ |
| 3943 if( !aTerm[i].isNot ) continue; | 3731 if( !aTerm[i].isNot ) continue; |
| 3944 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); | 3732 rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); |
| 3945 if( rc ){ | 3733 if( rc ){ |
| 3946 queryClear(pQuery); | 3734 queryClear(pQuery); |
| 3947 dataBufferDestroy(&left); | 3735 dataBufferDestroy(&left); |
| 3948 return rc; | 3736 return rc; |
| 3949 } | 3737 } |
| 3950 dataBufferInit(&new, 0); | 3738 dataBufferInit(&new, 0); |
| 3951 rc = docListExceptMerge(left.pData, left.nData, | 3739 docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new); |
| 3952 right.pData, right.nData, &new); | |
| 3953 dataBufferDestroy(&right); | 3740 dataBufferDestroy(&right); |
| 3954 dataBufferDestroy(&left); | 3741 dataBufferDestroy(&left); |
| 3955 if( rc!=SQLITE_OK ){ | |
| 3956 queryClear(pQuery); | |
| 3957 dataBufferDestroy(&new); | |
| 3958 return rc; | |
| 3959 } | |
| 3960 left = new; | 3742 left = new; |
| 3961 } | 3743 } |
| 3962 | 3744 |
| 3963 *pResult = left; | 3745 *pResult = left; |
| 3964 return rc; | 3746 return rc; |
| 3965 } | 3747 } |
| 3966 | 3748 |
| 3967 /* | 3749 /* |
| 3968 ** This is the xFilter interface for the virtual table. See | 3750 ** This is the xFilter interface for the virtual table. See |
| 3969 ** the virtual table xFilter method documentation for additional | 3751 ** the virtual table xFilter method documentation for additional |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4043 if( c->result.nData!=0 ){ | 3825 if( c->result.nData!=0 ){ |
| 4044 /* This case happens if the same cursor is used repeatedly. */ | 3826 /* This case happens if the same cursor is used repeatedly. */ |
| 4045 dlrDestroy(&c->reader); | 3827 dlrDestroy(&c->reader); |
| 4046 dataBufferReset(&c->result); | 3828 dataBufferReset(&c->result); |
| 4047 }else{ | 3829 }else{ |
| 4048 dataBufferInit(&c->result, 0); | 3830 dataBufferInit(&c->result, 0); |
| 4049 } | 3831 } |
| 4050 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q
); | 3832 rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q
); |
| 4051 if( rc!=SQLITE_OK ) return rc; | 3833 if( rc!=SQLITE_OK ) return rc; |
| 4052 if( c->result.nData!=0 ){ | 3834 if( c->result.nData!=0 ){ |
| 4053 rc = dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); | 3835 dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); |
| 4054 if( rc!=SQLITE_OK ) return rc; | |
| 4055 } | 3836 } |
| 4056 break; | 3837 break; |
| 4057 } | 3838 } |
| 4058 } | 3839 } |
| 4059 | 3840 |
| 4060 return fulltextNext(pCursor); | 3841 return fulltextNext(pCursor); |
| 4061 } | 3842 } |
| 4062 | 3843 |
| 4063 /* This is the xEof method of the virtual table. The SQLite core | 3844 /* This is the xEof method of the virtual table. The SQLite core |
| 4064 ** calls this routine to find out if it has reached the end of | 3845 ** calls this routine to find out if it has reached the end of |
| (...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4545 DataBuffer term; /* previous term, for decoding term delta. */ | 4326 DataBuffer term; /* previous term, for decoding term delta. */ |
| 4546 | 4327 |
| 4547 sqlite_int64 iBlockid; | 4328 sqlite_int64 iBlockid; |
| 4548 } InteriorReader; | 4329 } InteriorReader; |
| 4549 | 4330 |
| 4550 static void interiorReaderDestroy(InteriorReader *pReader){ | 4331 static void interiorReaderDestroy(InteriorReader *pReader){ |
| 4551 dataBufferDestroy(&pReader->term); | 4332 dataBufferDestroy(&pReader->term); |
| 4552 SCRAMBLE(pReader); | 4333 SCRAMBLE(pReader); |
| 4553 } | 4334 } |
| 4554 | 4335 |
| 4555 static int interiorReaderInit(const char *pData, int nData, | 4336 /* TODO(shess) The assertions are great, but what if we're in NDEBUG |
| 4556 InteriorReader *pReader){ | 4337 ** and the blob is empty or otherwise contains suspect data? |
| 4338 */ |
| 4339 static void interiorReaderInit(const char *pData, int nData, |
| 4340 InteriorReader *pReader){ |
| 4557 int n, nTerm; | 4341 int n, nTerm; |
| 4558 | 4342 |
| 4559 /* These conditions are checked and met by the callers. */ | 4343 /* Require at least the leading flag byte */ |
| 4560 assert( nData>0 ); | 4344 assert( nData>0 ); |
| 4561 assert( pData[0]!='\0' ); | 4345 assert( pData[0]!='\0' ); |
| 4562 | 4346 |
| 4563 CLEAR(pReader); | 4347 CLEAR(pReader); |
| 4564 | 4348 |
| 4565 /* Decode the base blockid, and set the cursor to the first term. */ | 4349 /* Decode the base blockid, and set the cursor to the first term. */ |
| 4566 n = getVarintSafe(pData+1, &pReader->iBlockid, nData-1); | 4350 n = getVarint(pData+1, &pReader->iBlockid); |
| 4567 if( !n ) return SQLITE_CORRUPT_BKPT; | 4351 assert( 1+n<=nData ); |
| 4568 pReader->pData = pData+1+n; | 4352 pReader->pData = pData+1+n; |
| 4569 pReader->nData = nData-(1+n); | 4353 pReader->nData = nData-(1+n); |
| 4570 | 4354 |
| 4571 /* A single-child interior node (such as when a leaf node was too | 4355 /* A single-child interior node (such as when a leaf node was too |
| 4572 ** large for the segment directory) won't have any terms. | 4356 ** large for the segment directory) won't have any terms. |
| 4573 ** Otherwise, decode the first term. | 4357 ** Otherwise, decode the first term. |
| 4574 */ | 4358 */ |
| 4575 if( pReader->nData==0 ){ | 4359 if( pReader->nData==0 ){ |
| 4576 dataBufferInit(&pReader->term, 0); | 4360 dataBufferInit(&pReader->term, 0); |
| 4577 }else{ | 4361 }else{ |
| 4578 n = getVarint32Safe(pReader->pData, &nTerm, pReader->nData); | 4362 n = getVarint32(pReader->pData, &nTerm); |
| 4579 if( !n || nTerm<0 || nTerm>pReader->nData-n) return SQLITE_CORRUPT_BKPT; | |
| 4580 dataBufferInit(&pReader->term, nTerm); | 4363 dataBufferInit(&pReader->term, nTerm); |
| 4581 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); | 4364 dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); |
| 4365 assert( n+nTerm<=pReader->nData ); |
| 4582 pReader->pData += n+nTerm; | 4366 pReader->pData += n+nTerm; |
| 4583 pReader->nData -= n+nTerm; | 4367 pReader->nData -= n+nTerm; |
| 4584 } | 4368 } |
| 4585 return SQLITE_OK; | |
| 4586 } | 4369 } |
| 4587 | 4370 |
| 4588 static int interiorReaderAtEnd(InteriorReader *pReader){ | 4371 static int interiorReaderAtEnd(InteriorReader *pReader){ |
| 4589 return pReader->term.nData<=0; | 4372 return pReader->term.nData==0; |
| 4590 } | 4373 } |
| 4591 | 4374 |
| 4592 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ | 4375 static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ |
| 4593 return pReader->iBlockid; | 4376 return pReader->iBlockid; |
| 4594 } | 4377 } |
| 4595 | 4378 |
| 4596 static int interiorReaderTermBytes(InteriorReader *pReader){ | 4379 static int interiorReaderTermBytes(InteriorReader *pReader){ |
| 4597 assert( !interiorReaderAtEnd(pReader) ); | 4380 assert( !interiorReaderAtEnd(pReader) ); |
| 4598 return pReader->term.nData; | 4381 return pReader->term.nData; |
| 4599 } | 4382 } |
| 4600 static const char *interiorReaderTerm(InteriorReader *pReader){ | 4383 static const char *interiorReaderTerm(InteriorReader *pReader){ |
| 4601 assert( !interiorReaderAtEnd(pReader) ); | 4384 assert( !interiorReaderAtEnd(pReader) ); |
| 4602 return pReader->term.pData; | 4385 return pReader->term.pData; |
| 4603 } | 4386 } |
| 4604 | 4387 |
| 4605 /* Step forward to the next term in the node. */ | 4388 /* Step forward to the next term in the node. */ |
| 4606 static int interiorReaderStep(InteriorReader *pReader){ | 4389 static void interiorReaderStep(InteriorReader *pReader){ |
| 4607 assert( !interiorReaderAtEnd(pReader) ); | 4390 assert( !interiorReaderAtEnd(pReader) ); |
| 4608 | 4391 |
| 4609 /* If the last term has been read, signal eof, else construct the | 4392 /* If the last term has been read, signal eof, else construct the |
| 4610 ** next term. | 4393 ** next term. |
| 4611 */ | 4394 */ |
| 4612 if( pReader->nData==0 ){ | 4395 if( pReader->nData==0 ){ |
| 4613 dataBufferReset(&pReader->term); | 4396 dataBufferReset(&pReader->term); |
| 4614 }else{ | 4397 }else{ |
| 4615 int n, nPrefix, nSuffix; | 4398 int n, nPrefix, nSuffix; |
| 4616 | 4399 |
| 4617 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); | 4400 n = getVarint32(pReader->pData, &nPrefix); |
| 4618 if( !n ) return SQLITE_CORRUPT_BKPT; | 4401 n += getVarint32(pReader->pData+n, &nSuffix); |
| 4619 pReader->nData -= n; | |
| 4620 pReader->pData += n; | |
| 4621 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); | |
| 4622 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 4623 pReader->nData -= n; | |
| 4624 pReader->pData += n; | |
| 4625 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; | |
| 4626 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; | |
| 4627 | 4402 |
| 4628 /* Truncate the current term and append suffix data. */ | 4403 /* Truncate the current term and append suffix data. */ |
| 4629 pReader->term.nData = nPrefix; | 4404 pReader->term.nData = nPrefix; |
| 4630 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); | 4405 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); |
| 4631 | 4406 |
| 4632 pReader->pData += nSuffix; | 4407 assert( n+nSuffix<=pReader->nData ); |
| 4633 pReader->nData -= nSuffix; | 4408 pReader->pData += n+nSuffix; |
| 4409 pReader->nData -= n+nSuffix; |
| 4634 } | 4410 } |
| 4635 pReader->iBlockid++; | 4411 pReader->iBlockid++; |
| 4636 return SQLITE_OK; | |
| 4637 } | 4412 } |
| 4638 | 4413 |
| 4639 /* Compare the current term to pTerm[nTerm], returning strcmp-style | 4414 /* Compare the current term to pTerm[nTerm], returning strcmp-style |
| 4640 ** results. If isPrefix, equality means equal through nTerm bytes. | 4415 ** results. If isPrefix, equality means equal through nTerm bytes. |
| 4641 */ | 4416 */ |
| 4642 static int interiorReaderTermCmp(InteriorReader *pReader, | 4417 static int interiorReaderTermCmp(InteriorReader *pReader, |
| 4643 const char *pTerm, int nTerm, int isPrefix){ | 4418 const char *pTerm, int nTerm, int isPrefix){ |
| 4644 const char *pReaderTerm = interiorReaderTerm(pReader); | 4419 const char *pReaderTerm = interiorReaderTerm(pReader); |
| 4645 int nReaderTerm = interiorReaderTermBytes(pReader); | 4420 int nReaderTerm = interiorReaderTermBytes(pReader); |
| 4646 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; | 4421 int c, n = nReaderTerm<nTerm ? nReaderTerm : nTerm; |
| (...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4998 | 4773 |
| 4999 /* Estimate the length of the merged doclist so we can leave space | 4774 /* Estimate the length of the merged doclist so we can leave space |
| 5000 ** to encode it. | 4775 ** to encode it. |
| 5001 */ | 4776 */ |
| 5002 for(i=0, nData=0; i<nReaders; i++){ | 4777 for(i=0, nData=0; i<nReaders; i++){ |
| 5003 nData += dlrAllDataBytes(&pReaders[i]); | 4778 nData += dlrAllDataBytes(&pReaders[i]); |
| 5004 } | 4779 } |
| 5005 n = putVarint(c, nData); | 4780 n = putVarint(c, nData); |
| 5006 dataBufferAppend(&pWriter->data, c, n); | 4781 dataBufferAppend(&pWriter->data, c, n); |
| 5007 | 4782 |
| 5008 rc = docListMerge(&pWriter->data, pReaders, nReaders); | 4783 docListMerge(&pWriter->data, pReaders, nReaders); |
| 5009 if( rc!= SQLITE_OK ) return rc; | |
| 5010 ASSERT_VALID_DOCLIST(DL_DEFAULT, | 4784 ASSERT_VALID_DOCLIST(DL_DEFAULT, |
| 5011 pWriter->data.pData+iDoclistData+n, | 4785 pWriter->data.pData+iDoclistData+n, |
| 5012 pWriter->data.nData-iDoclistData-n, NULL); | 4786 pWriter->data.nData-iDoclistData-n, NULL); |
| 5013 | 4787 |
| 5014 /* The actual amount of doclist data at this point could be smaller | 4788 /* The actual amount of doclist data at this point could be smaller |
| 5015 ** than the length we encoded. Additionally, the space required to | 4789 ** than the length we encoded. Additionally, the space required to |
| 5016 ** encode this length could be smaller. For small doclists, this is | 4790 ** encode this length could be smaller. For small doclists, this is |
| 5017 ** not a big deal, we can just use memmove() to adjust things. | 4791 ** not a big deal, we can just use memmove() to adjust things. |
| 5018 */ | 4792 */ |
| 5019 nActualData = pWriter->data.nData-(iDoclistData+n); | 4793 nActualData = pWriter->data.nData-(iDoclistData+n); |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5109 */ | 4883 */ |
| 5110 /* TODO(shess) Revise writeZeroSegment() so that doclists are | 4884 /* TODO(shess) Revise writeZeroSegment() so that doclists are |
| 5111 ** constructed directly in pWriter->data. | 4885 ** constructed directly in pWriter->data. |
| 5112 */ | 4886 */ |
| 5113 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, | 4887 static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, |
| 5114 const char *pTerm, int nTerm, | 4888 const char *pTerm, int nTerm, |
| 5115 const char *pData, int nData){ | 4889 const char *pData, int nData){ |
| 5116 int rc; | 4890 int rc; |
| 5117 DLReader reader; | 4891 DLReader reader; |
| 5118 | 4892 |
| 5119 rc = dlrInit(&reader, DL_DEFAULT, pData, nData); | 4893 dlrInit(&reader, DL_DEFAULT, pData, nData); |
| 5120 if( rc!=SQLITE_OK ) return rc; | |
| 5121 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); | 4894 rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); |
| 5122 dlrDestroy(&reader); | 4895 dlrDestroy(&reader); |
| 5123 | 4896 |
| 5124 return rc; | 4897 return rc; |
| 5125 } | 4898 } |
| 5126 | 4899 |
| 5127 | 4900 |
| 5128 /****************************************************************/ | 4901 /****************************************************************/ |
| 5129 /* LeafReader is used to iterate over an individual leaf node. */ | 4902 /* LeafReader is used to iterate over an individual leaf node. */ |
| 5130 typedef struct LeafReader { | 4903 typedef struct LeafReader { |
| (...skipping 24 matching lines...) Expand all Loading... |
| 5155 /* Access the doclist data for the current term. */ | 4928 /* Access the doclist data for the current term. */ |
| 5156 static int leafReaderDataBytes(LeafReader *pReader){ | 4929 static int leafReaderDataBytes(LeafReader *pReader){ |
| 5157 int nData; | 4930 int nData; |
| 5158 assert( pReader->term.nData>0 ); | 4931 assert( pReader->term.nData>0 ); |
| 5159 getVarint32(pReader->pData, &nData); | 4932 getVarint32(pReader->pData, &nData); |
| 5160 return nData; | 4933 return nData; |
| 5161 } | 4934 } |
| 5162 static const char *leafReaderData(LeafReader *pReader){ | 4935 static const char *leafReaderData(LeafReader *pReader){ |
| 5163 int n, nData; | 4936 int n, nData; |
| 5164 assert( pReader->term.nData>0 ); | 4937 assert( pReader->term.nData>0 ); |
| 5165 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); | 4938 n = getVarint32(pReader->pData, &nData); |
| 5166 if( !n || nData>pReader->nData-n ) return NULL; | |
| 5167 return pReader->pData+n; | 4939 return pReader->pData+n; |
| 5168 } | 4940 } |
| 5169 | 4941 |
| 5170 static int leafReaderInit(const char *pData, int nData, LeafReader *pReader){ | 4942 static void leafReaderInit(const char *pData, int nData, |
| 4943 LeafReader *pReader){ |
| 5171 int nTerm, n; | 4944 int nTerm, n; |
| 5172 | 4945 |
| 5173 /* All callers check this precondition. */ | |
| 5174 assert( nData>0 ); | 4946 assert( nData>0 ); |
| 5175 assert( pData[0]=='\0' ); | 4947 assert( pData[0]=='\0' ); |
| 5176 | 4948 |
| 5177 CLEAR(pReader); | 4949 CLEAR(pReader); |
| 5178 | 4950 |
| 5179 /* Read the first term, skipping the header byte. */ | 4951 /* Read the first term, skipping the header byte. */ |
| 5180 n = getVarint32Safe(pData+1, &nTerm, nData-1); | 4952 n = getVarint32(pData+1, &nTerm); |
| 5181 if( !n || nTerm<0 || nTerm>nData-1-n ) return SQLITE_CORRUPT_BKPT; | |
| 5182 dataBufferInit(&pReader->term, nTerm); | 4953 dataBufferInit(&pReader->term, nTerm); |
| 5183 dataBufferReplace(&pReader->term, pData+1+n, nTerm); | 4954 dataBufferReplace(&pReader->term, pData+1+n, nTerm); |
| 5184 | 4955 |
| 5185 /* Position after the first term. */ | 4956 /* Position after the first term. */ |
| 4957 assert( 1+n+nTerm<nData ); |
| 5186 pReader->pData = pData+1+n+nTerm; | 4958 pReader->pData = pData+1+n+nTerm; |
| 5187 pReader->nData = nData-1-n-nTerm; | 4959 pReader->nData = nData-1-n-nTerm; |
| 5188 return SQLITE_OK; | |
| 5189 } | 4960 } |
| 5190 | 4961 |
| 5191 /* Step the reader forward to the next term. */ | 4962 /* Step the reader forward to the next term. */ |
| 5192 static int leafReaderStep(LeafReader *pReader){ | 4963 static void leafReaderStep(LeafReader *pReader){ |
| 5193 int n, nData, nPrefix, nSuffix; | 4964 int n, nData, nPrefix, nSuffix; |
| 5194 assert( !leafReaderAtEnd(pReader) ); | 4965 assert( !leafReaderAtEnd(pReader) ); |
| 5195 | 4966 |
| 5196 /* Skip previous entry's data block. */ | 4967 /* Skip previous entry's data block. */ |
| 5197 n = getVarint32Safe(pReader->pData, &nData, pReader->nData); | 4968 n = getVarint32(pReader->pData, &nData); |
| 5198 if( !n || nData<0 || nData>pReader->nData-n ) return SQLITE_CORRUPT_BKPT; | 4969 assert( n+nData<=pReader->nData ); |
| 5199 pReader->pData += n+nData; | 4970 pReader->pData += n+nData; |
| 5200 pReader->nData -= n+nData; | 4971 pReader->nData -= n+nData; |
| 5201 | 4972 |
| 5202 if( !leafReaderAtEnd(pReader) ){ | 4973 if( !leafReaderAtEnd(pReader) ){ |
| 5203 /* Construct the new term using a prefix from the old term plus a | 4974 /* Construct the new term using a prefix from the old term plus a |
| 5204 ** suffix from the leaf data. | 4975 ** suffix from the leaf data. |
| 5205 */ | 4976 */ |
| 5206 n = getVarint32Safe(pReader->pData, &nPrefix, pReader->nData); | 4977 n = getVarint32(pReader->pData, &nPrefix); |
| 5207 if( !n ) return SQLITE_CORRUPT_BKPT; | 4978 n += getVarint32(pReader->pData+n, &nSuffix); |
| 5208 pReader->nData -= n; | 4979 assert( n+nSuffix<pReader->nData ); |
| 5209 pReader->pData += n; | |
| 5210 n = getVarint32Safe(pReader->pData, &nSuffix, pReader->nData); | |
| 5211 if( !n ) return SQLITE_CORRUPT_BKPT; | |
| 5212 pReader->nData -= n; | |
| 5213 pReader->pData += n; | |
| 5214 if( nSuffix<0 || nSuffix>pReader->nData ) return SQLITE_CORRUPT_BKPT; | |
| 5215 if( nPrefix<0 || nPrefix>pReader->term.nData ) return SQLITE_CORRUPT_BKPT; | |
| 5216 pReader->term.nData = nPrefix; | 4980 pReader->term.nData = nPrefix; |
| 5217 dataBufferAppend(&pReader->term, pReader->pData, nSuffix); | 4981 dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); |
| 5218 | 4982 |
| 5219 pReader->pData += nSuffix; | 4983 pReader->pData += n+nSuffix; |
| 5220 pReader->nData -= nSuffix; | 4984 pReader->nData -= n+nSuffix; |
| 5221 } | 4985 } |
| 5222 return SQLITE_OK; | |
| 5223 } | 4986 } |
| 5224 | 4987 |
| 5225 /* strcmp-style comparison of pReader's current term against pTerm. | 4988 /* strcmp-style comparison of pReader's current term against pTerm. |
| 5226 ** If isPrefix, equality means equal through nTerm bytes. | 4989 ** If isPrefix, equality means equal through nTerm bytes. |
| 5227 */ | 4990 */ |
| 5228 static int leafReaderTermCmp(LeafReader *pReader, | 4991 static int leafReaderTermCmp(LeafReader *pReader, |
| 5229 const char *pTerm, int nTerm, int isPrefix){ | 4992 const char *pTerm, int nTerm, int isPrefix){ |
| 5230 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; | 4993 int c, n = pReader->term.nData<nTerm ? pReader->term.nData : nTerm; |
| 5231 if( n==0 ){ | 4994 if( n==0 ){ |
| 5232 if( pReader->term.nData>0 ) return -1; | 4995 if( pReader->term.nData>0 ) return -1; |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5305 } | 5068 } |
| 5306 leafReaderDestroy(&pReader->leafReader); | 5069 leafReaderDestroy(&pReader->leafReader); |
| 5307 dataBufferDestroy(&pReader->rootData); | 5070 dataBufferDestroy(&pReader->rootData); |
| 5308 SCRAMBLE(pReader); | 5071 SCRAMBLE(pReader); |
| 5309 } | 5072 } |
| 5310 | 5073 |
| 5311 /* Initialize pReader with the given root data (if iStartBlockid==0 | 5074 /* Initialize pReader with the given root data (if iStartBlockid==0 |
| 5312 ** the leaf data was entirely contained in the root), or from the | 5075 ** the leaf data was entirely contained in the root), or from the |
| 5313 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. | 5076 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive. |
| 5314 */ | 5077 */ |
| 5315 /* TODO(shess): Figure out a means of indicating how many leaves are | |
| 5316 ** expected, for purposes of detecting corruption. | |
| 5317 */ | |
| 5318 static int leavesReaderInit(fulltext_vtab *v, | 5078 static int leavesReaderInit(fulltext_vtab *v, |
| 5319 int idx, | 5079 int idx, |
| 5320 sqlite_int64 iStartBlockid, | 5080 sqlite_int64 iStartBlockid, |
| 5321 sqlite_int64 iEndBlockid, | 5081 sqlite_int64 iEndBlockid, |
| 5322 const char *pRootData, int nRootData, | 5082 const char *pRootData, int nRootData, |
| 5323 LeavesReader *pReader){ | 5083 LeavesReader *pReader){ |
| 5324 CLEAR(pReader); | 5084 CLEAR(pReader); |
| 5325 pReader->idx = idx; | 5085 pReader->idx = idx; |
| 5326 | 5086 |
| 5327 dataBufferInit(&pReader->rootData, 0); | 5087 dataBufferInit(&pReader->rootData, 0); |
| 5328 if( iStartBlockid==0 ){ | 5088 if( iStartBlockid==0 ){ |
| 5329 int rc; | |
| 5330 /* Corrupt if this can't be a leaf node. */ | |
| 5331 if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){ | |
| 5332 return SQLITE_CORRUPT_BKPT; | |
| 5333 } | |
| 5334 /* Entire leaf level fit in root data. */ | 5089 /* Entire leaf level fit in root data. */ |
| 5335 dataBufferReplace(&pReader->rootData, pRootData, nRootData); | 5090 dataBufferReplace(&pReader->rootData, pRootData, nRootData); |
| 5336 rc = leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, | 5091 leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, |
| 5337 &pReader->leafReader); | 5092 &pReader->leafReader); |
| 5338 if( rc!=SQLITE_OK ){ | |
| 5339 dataBufferDestroy(&pReader->rootData); | |
| 5340 return rc; | |
| 5341 } | |
| 5342 }else{ | 5093 }else{ |
| 5343 sqlite3_stmt *s; | 5094 sqlite3_stmt *s; |
| 5344 int rc = sql_get_leaf_statement(v, idx, &s); | 5095 int rc = sql_get_leaf_statement(v, idx, &s); |
| 5345 if( rc!=SQLITE_OK ) return rc; | 5096 if( rc!=SQLITE_OK ) return rc; |
| 5346 | 5097 |
| 5347 rc = sqlite3_bind_int64(s, 1, iStartBlockid); | 5098 rc = sqlite3_bind_int64(s, 1, iStartBlockid); |
| 5348 if( rc!=SQLITE_OK ) goto err; | 5099 if( rc!=SQLITE_OK ) return rc; |
| 5349 | 5100 |
| 5350 rc = sqlite3_bind_int64(s, 2, iEndBlockid); | 5101 rc = sqlite3_bind_int64(s, 2, iEndBlockid); |
| 5351 if( rc!=SQLITE_OK ) goto err; | 5102 if( rc!=SQLITE_OK ) return rc; |
| 5352 | 5103 |
| 5353 rc = sqlite3_step(s); | 5104 rc = sqlite3_step(s); |
| 5354 | |
| 5355 /* Corrupt if interior node referenced missing leaf node. */ | |
| 5356 if( rc==SQLITE_DONE ){ | 5105 if( rc==SQLITE_DONE ){ |
| 5357 rc = SQLITE_CORRUPT_BKPT; | 5106 pReader->eof = 1; |
| 5358 goto err; | 5107 return SQLITE_OK; |
| 5359 } | 5108 } |
| 5360 | 5109 if( rc!=SQLITE_ROW ) return rc; |
| 5361 if( rc!=SQLITE_ROW ) goto err; | |
| 5362 rc = SQLITE_OK; | |
| 5363 | |
| 5364 /* Corrupt if leaf data isn't a blob. */ | |
| 5365 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ | |
| 5366 rc = SQLITE_CORRUPT_BKPT; | |
| 5367 }else{ | |
| 5368 const char *pLeafData = sqlite3_column_blob(s, 0); | |
| 5369 int nLeafData = sqlite3_column_bytes(s, 0); | |
| 5370 | |
| 5371 /* Corrupt if this can't be a leaf node. */ | |
| 5372 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ | |
| 5373 rc = SQLITE_CORRUPT_BKPT; | |
| 5374 }else{ | |
| 5375 rc = leafReaderInit(pLeafData, nLeafData, &pReader->leafReader); | |
| 5376 } | |
| 5377 } | |
| 5378 | |
| 5379 err: | |
| 5380 if( rc!=SQLITE_OK ){ | |
| 5381 if( idx==-1 ){ | |
| 5382 sqlite3_finalize(s); | |
| 5383 }else{ | |
| 5384 sqlite3_reset(s); | |
| 5385 } | |
| 5386 return rc; | |
| 5387 } | |
| 5388 | 5110 |
| 5389 pReader->pStmt = s; | 5111 pReader->pStmt = s; |
| 5112 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), |
| 5113 sqlite3_column_bytes(pReader->pStmt, 0), |
| 5114 &pReader->leafReader); |
| 5390 } | 5115 } |
| 5391 return SQLITE_OK; | 5116 return SQLITE_OK; |
| 5392 } | 5117 } |
| 5393 | 5118 |
| 5394 /* Step the current leaf forward to the next term. If we reach the | 5119 /* Step the current leaf forward to the next term. If we reach the |
| 5395 ** end of the current leaf, step forward to the next leaf block. | 5120 ** end of the current leaf, step forward to the next leaf block. |
| 5396 */ | 5121 */ |
| 5397 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ | 5122 static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ |
| 5398 int rc; | |
| 5399 assert( !leavesReaderAtEnd(pReader) ); | 5123 assert( !leavesReaderAtEnd(pReader) ); |
| 5400 rc = leafReaderStep(&pReader->leafReader); | 5124 leafReaderStep(&pReader->leafReader); |
| 5401 if( rc!=SQLITE_OK ) return rc; | |
| 5402 | 5125 |
| 5403 if( leafReaderAtEnd(&pReader->leafReader) ){ | 5126 if( leafReaderAtEnd(&pReader->leafReader) ){ |
| 5127 int rc; |
| 5404 if( pReader->rootData.pData ){ | 5128 if( pReader->rootData.pData ){ |
| 5405 pReader->eof = 1; | 5129 pReader->eof = 1; |
| 5406 return SQLITE_OK; | 5130 return SQLITE_OK; |
| 5407 } | 5131 } |
| 5408 rc = sqlite3_step(pReader->pStmt); | 5132 rc = sqlite3_step(pReader->pStmt); |
| 5409 if( rc!=SQLITE_ROW ){ | 5133 if( rc!=SQLITE_ROW ){ |
| 5410 pReader->eof = 1; | 5134 pReader->eof = 1; |
| 5411 return rc==SQLITE_DONE ? SQLITE_OK : rc; | 5135 return rc==SQLITE_DONE ? SQLITE_OK : rc; |
| 5412 } | 5136 } |
| 5413 | 5137 leafReaderDestroy(&pReader->leafReader); |
| 5414 /* Corrupt if leaf data isn't a blob. */ | 5138 leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), |
| 5415 if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){ | 5139 sqlite3_column_bytes(pReader->pStmt, 0), |
| 5416 return SQLITE_CORRUPT_BKPT; | 5140 &pReader->leafReader); |
| 5417 }else{ | |
| 5418 LeafReader tmp; | |
| 5419 const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0); | |
| 5420 int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0); | |
| 5421 | |
| 5422 /* Corrupt if this can't be a leaf node. */ | |
| 5423 if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){ | |
| 5424 return SQLITE_CORRUPT_BKPT; | |
| 5425 } | |
| 5426 | |
| 5427 rc = leafReaderInit(pLeafData, nLeafData, &tmp); | |
| 5428 if( rc!=SQLITE_OK ) return rc; | |
| 5429 leafReaderDestroy(&pReader->leafReader); | |
| 5430 pReader->leafReader = tmp; | |
| 5431 } | |
| 5432 } | 5141 } |
| 5433 return SQLITE_OK; | 5142 return SQLITE_OK; |
| 5434 } | 5143 } |
| 5435 | 5144 |
| 5436 /* Order LeavesReaders by their term, ignoring idx. Readers at eof | 5145 /* Order LeavesReaders by their term, ignoring idx. Readers at eof |
| 5437 ** always sort to the end. | 5146 ** always sort to the end. |
| 5438 */ | 5147 */ |
| 5439 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ | 5148 static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ |
| 5440 if( leavesReaderAtEnd(lr1) ){ | 5149 if( leavesReaderAtEnd(lr1) ){ |
| 5441 if( leavesReaderAtEnd(lr2) ) return 0; | 5150 if( leavesReaderAtEnd(lr2) ) return 0; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5482 | 5191 |
| 5483 rc = sqlite3_bind_int(s, 1, iLevel); | 5192 rc = sqlite3_bind_int(s, 1, iLevel); |
| 5484 if( rc!=SQLITE_OK ) return rc; | 5193 if( rc!=SQLITE_OK ) return rc; |
| 5485 | 5194 |
| 5486 i = 0; | 5195 i = 0; |
| 5487 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 5196 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
| 5488 sqlite_int64 iStart = sqlite3_column_int64(s, 0); | 5197 sqlite_int64 iStart = sqlite3_column_int64(s, 0); |
| 5489 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); | 5198 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |
| 5490 const char *pRootData = sqlite3_column_blob(s, 2); | 5199 const char *pRootData = sqlite3_column_blob(s, 2); |
| 5491 int nRootData = sqlite3_column_bytes(s, 2); | 5200 int nRootData = sqlite3_column_bytes(s, 2); |
| 5492 sqlite_int64 iIndex = sqlite3_column_int64(s, 3); | |
| 5493 | 5201 |
| 5494 /* Corrupt if we get back different types than we stored. */ | 5202 assert( i<MERGE_COUNT ); |
| 5495 /* Also corrupt if the index is not sequential starting at 0. */ | |
| 5496 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
| 5497 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
| 5498 sqlite3_column_type(s, 2)!=SQLITE_BLOB || | |
| 5499 i!=iIndex || | |
| 5500 i>=MERGE_COUNT ){ | |
| 5501 rc = SQLITE_CORRUPT_BKPT; | |
| 5502 break; | |
| 5503 } | |
| 5504 | |
| 5505 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, | 5203 rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData, |
| 5506 &pReaders[i]); | 5204 &pReaders[i]); |
| 5507 if( rc!=SQLITE_OK ) break; | 5205 if( rc!=SQLITE_OK ) break; |
| 5508 | 5206 |
| 5509 i++; | 5207 i++; |
| 5510 } | 5208 } |
| 5511 if( rc!=SQLITE_DONE ){ | 5209 if( rc!=SQLITE_DONE ){ |
| 5512 while( i-->0 ){ | 5210 while( i-->0 ){ |
| 5513 leavesReaderDestroy(&pReaders[i]); | 5211 leavesReaderDestroy(&pReaders[i]); |
| 5514 } | 5212 } |
| 5515 sqlite3_reset(s); /* So we don't leave a lock. */ | |
| 5516 return rc; | 5213 return rc; |
| 5517 } | 5214 } |
| 5518 | 5215 |
| 5519 *piReaders = i; | 5216 *piReaders = i; |
| 5520 | 5217 |
| 5521 /* Leave our results sorted by term, then age. */ | 5218 /* Leave our results sorted by term, then age. */ |
| 5522 while( i-- ){ | 5219 while( i-- ){ |
| 5523 leavesReaderReorder(pReaders+i, *piReaders-i); | 5220 leavesReaderReorder(pReaders+i, *piReaders-i); |
| 5524 } | 5221 } |
| 5525 return SQLITE_OK; | 5222 return SQLITE_OK; |
| 5526 } | 5223 } |
| 5527 | 5224 |
| 5528 /* Merge doclists from pReaders[nReaders] into a single doclist, which | 5225 /* Merge doclists from pReaders[nReaders] into a single doclist, which |
| 5529 ** is written to pWriter. Assumes pReaders is ordered oldest to | 5226 ** is written to pWriter. Assumes pReaders is ordered oldest to |
| 5530 ** newest. | 5227 ** newest. |
| 5531 */ | 5228 */ |
| 5532 /* TODO(shess) Consider putting this inline in segmentMerge(). */ | 5229 /* TODO(shess) Consider putting this inline in segmentMerge(). */ |
| 5533 static int leavesReadersMerge(fulltext_vtab *v, | 5230 static int leavesReadersMerge(fulltext_vtab *v, |
| 5534 LeavesReader *pReaders, int nReaders, | 5231 LeavesReader *pReaders, int nReaders, |
| 5535 LeafWriter *pWriter){ | 5232 LeafWriter *pWriter){ |
| 5536 DLReader dlReaders[MERGE_COUNT]; | 5233 DLReader dlReaders[MERGE_COUNT]; |
| 5537 const char *pTerm = leavesReaderTerm(pReaders); | 5234 const char *pTerm = leavesReaderTerm(pReaders); |
| 5538 int i, nTerm = leavesReaderTermBytes(pReaders); | 5235 int i, nTerm = leavesReaderTermBytes(pReaders); |
| 5539 int rc; | |
| 5540 | 5236 |
| 5541 assert( nReaders<=MERGE_COUNT ); | 5237 assert( nReaders<=MERGE_COUNT ); |
| 5542 | 5238 |
| 5543 for(i=0; i<nReaders; i++){ | 5239 for(i=0; i<nReaders; i++){ |
| 5544 const char *pData = leavesReaderData(pReaders+i); | 5240 dlrInit(&dlReaders[i], DL_DEFAULT, |
| 5545 if( pData==NULL ){ | 5241 leavesReaderData(pReaders+i), |
| 5546 rc = SQLITE_CORRUPT_BKPT; | 5242 leavesReaderDataBytes(pReaders+i)); |
| 5547 break; | |
| 5548 } | |
| 5549 rc = dlrInit(&dlReaders[i], DL_DEFAULT, | |
| 5550 pData, | |
| 5551 leavesReaderDataBytes(pReaders+i)); | |
| 5552 if( rc!=SQLITE_OK ) break; | |
| 5553 } | |
| 5554 if( rc!=SQLITE_OK ){ | |
| 5555 while( i-->0 ){ | |
| 5556 dlrDestroy(&dlReaders[i]); | |
| 5557 } | |
| 5558 return rc; | |
| 5559 } | 5243 } |
| 5560 | 5244 |
| 5561 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); | 5245 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); |
| 5562 } | 5246 } |
| 5563 | 5247 |
| 5564 /* Forward ref due to mutual recursion with segdirNextIndex(). */ | 5248 /* Forward ref due to mutual recursion with segdirNextIndex(). */ |
| 5565 static int segmentMerge(fulltext_vtab *v, int iLevel); | 5249 static int segmentMerge(fulltext_vtab *v, int iLevel); |
| 5566 | 5250 |
| 5567 /* Put the next available index at iLevel into *pidx. If iLevel | 5251 /* Put the next available index at iLevel into *pidx. If iLevel |
| 5568 ** already has MERGE_COUNT segments, they are merged to a higher | 5252 ** already has MERGE_COUNT segments, they are merged to a higher |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5602 if( rc!=SQLITE_OK ) return rc; | 5286 if( rc!=SQLITE_OK ) return rc; |
| 5603 | 5287 |
| 5604 /* TODO(shess) This assumes that we'll always see exactly | 5288 /* TODO(shess) This assumes that we'll always see exactly |
| 5605 ** MERGE_COUNT segments to merge at a given level. That will be | 5289 ** MERGE_COUNT segments to merge at a given level. That will be |
| 5606 ** broken if we allow the developer to request preemptive or | 5290 ** broken if we allow the developer to request preemptive or |
| 5607 ** deferred merging. | 5291 ** deferred merging. |
| 5608 */ | 5292 */ |
| 5609 memset(&lrs, '\0', sizeof(lrs)); | 5293 memset(&lrs, '\0', sizeof(lrs)); |
| 5610 rc = leavesReadersInit(v, iLevel, lrs, &i); | 5294 rc = leavesReadersInit(v, iLevel, lrs, &i); |
| 5611 if( rc!=SQLITE_OK ) return rc; | 5295 if( rc!=SQLITE_OK ) return rc; |
| 5296 assert( i==MERGE_COUNT ); |
| 5612 | 5297 |
| 5613 leafWriterInit(iLevel+1, idx, &writer); | 5298 leafWriterInit(iLevel+1, idx, &writer); |
| 5614 | 5299 |
| 5615 if( i!=MERGE_COUNT ){ | |
| 5616 rc = SQLITE_CORRUPT_BKPT; | |
| 5617 goto err; | |
| 5618 } | |
| 5619 | |
| 5620 /* Since leavesReaderReorder() pushes readers at eof to the end, | 5300 /* Since leavesReaderReorder() pushes readers at eof to the end, |
| 5621 ** when the first reader is empty, all will be empty. | 5301 ** when the first reader is empty, all will be empty. |
| 5622 */ | 5302 */ |
| 5623 while( !leavesReaderAtEnd(lrs) ){ | 5303 while( !leavesReaderAtEnd(lrs) ){ |
| 5624 /* Figure out how many readers share their next term. */ | 5304 /* Figure out how many readers share their next term. */ |
| 5625 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ | 5305 for(i=1; i<MERGE_COUNT && !leavesReaderAtEnd(lrs+i); i++){ |
| 5626 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; | 5306 if( 0!=leavesReaderTermCmp(lrs, lrs+i) ) break; |
| 5627 } | 5307 } |
| 5628 | 5308 |
| 5629 rc = leavesReadersMerge(v, lrs, i, &writer); | 5309 rc = leavesReadersMerge(v, lrs, i, &writer); |
| (...skipping 22 matching lines...) Expand all Loading... |
| 5652 | 5332 |
| 5653 err: | 5333 err: |
| 5654 for(i=0; i<MERGE_COUNT; i++){ | 5334 for(i=0; i<MERGE_COUNT; i++){ |
| 5655 leavesReaderDestroy(&lrs[i]); | 5335 leavesReaderDestroy(&lrs[i]); |
| 5656 } | 5336 } |
| 5657 leafWriterDestroy(&writer); | 5337 leafWriterDestroy(&writer); |
| 5658 return rc; | 5338 return rc; |
| 5659 } | 5339 } |
| 5660 | 5340 |
| 5661 /* Accumulate the union of *acc and *pData into *acc. */ | 5341 /* Accumulate the union of *acc and *pData into *acc. */ |
| 5662 static int docListAccumulateUnion(DataBuffer *acc, | 5342 static void docListAccumulateUnion(DataBuffer *acc, |
| 5663 const char *pData, int nData) { | 5343 const char *pData, int nData) { |
| 5664 DataBuffer tmp = *acc; | 5344 DataBuffer tmp = *acc; |
| 5665 int rc; | |
| 5666 dataBufferInit(acc, tmp.nData+nData); | 5345 dataBufferInit(acc, tmp.nData+nData); |
| 5667 rc = docListUnion(tmp.pData, tmp.nData, pData, nData, acc); | 5346 docListUnion(tmp.pData, tmp.nData, pData, nData, acc); |
| 5668 dataBufferDestroy(&tmp); | 5347 dataBufferDestroy(&tmp); |
| 5669 return rc; | |
| 5670 } | 5348 } |
| 5671 | 5349 |
| 5672 /* TODO(shess) It might be interesting to explore different merge | 5350 /* TODO(shess) It might be interesting to explore different merge |
| 5673 ** strategies, here. For instance, since this is a sorted merge, we | 5351 ** strategies, here. For instance, since this is a sorted merge, we |
| 5674 ** could easily merge many doclists in parallel. With some | 5352 ** could easily merge many doclists in parallel. With some |
| 5675 ** comprehension of the storage format, we could merge all of the | 5353 ** comprehension of the storage format, we could merge all of the |
| 5676 ** doclists within a leaf node directly from the leaf node's storage. | 5354 ** doclists within a leaf node directly from the leaf node's storage. |
| 5677 ** It may be worthwhile to merge smaller doclists before larger | 5355 ** It may be worthwhile to merge smaller doclists before larger |
| 5678 ** doclists, since they can be traversed more quickly - but the | 5356 ** doclists, since they can be traversed more quickly - but the |
| 5679 ** results may have less overlap, making them more expensive in a | 5357 ** results may have less overlap, making them more expensive in a |
| (...skipping 21 matching lines...) Expand all Loading... |
| 5701 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); | 5379 for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); |
| 5702 rc=leavesReaderStep(v, pReader)){ | 5380 rc=leavesReaderStep(v, pReader)){ |
| 5703 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is | 5381 /* TODO(shess) Really want leavesReaderTermCmp(), but that name is |
| 5704 ** already taken to compare the terms of two LeavesReaders. Think | 5382 ** already taken to compare the terms of two LeavesReaders. Think |
| 5705 ** on a better name. [Meanwhile, break encapsulation rather than | 5383 ** on a better name. [Meanwhile, break encapsulation rather than |
| 5706 ** use a confusing name.] | 5384 ** use a confusing name.] |
| 5707 */ | 5385 */ |
| 5708 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); | 5386 int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); |
| 5709 if( c>0 ) break; /* Past any possible matches. */ | 5387 if( c>0 ) break; /* Past any possible matches. */ |
| 5710 if( c==0 ){ | 5388 if( c==0 ){ |
| 5711 int iBuffer, nData; | |
| 5712 const char *pData = leavesReaderData(pReader); | 5389 const char *pData = leavesReaderData(pReader); |
| 5713 if( pData==NULL ){ | 5390 int iBuffer, nData = leavesReaderDataBytes(pReader); |
| 5714 rc = SQLITE_CORRUPT_BKPT; | |
| 5715 break; | |
| 5716 } | |
| 5717 nData = leavesReaderDataBytes(pReader); | |
| 5718 | 5391 |
| 5719 /* Find the first empty buffer. */ | 5392 /* Find the first empty buffer. */ |
| 5720 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ | 5393 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |
| 5721 if( 0==pBuffers[iBuffer].nData ) break; | 5394 if( 0==pBuffers[iBuffer].nData ) break; |
| 5722 } | 5395 } |
| 5723 | 5396 |
| 5724 /* Out of buffers, add an empty one. */ | 5397 /* Out of buffers, add an empty one. */ |
| 5725 if( iBuffer==nBuffers ){ | 5398 if( iBuffer==nBuffers ){ |
| 5726 if( nBuffers==nMaxBuffers ){ | 5399 if( nBuffers==nMaxBuffers ){ |
| 5727 DataBuffer *p; | 5400 DataBuffer *p; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 5753 dataBufferReplace(&(pBuffers[0]), pData, nData); | 5426 dataBufferReplace(&(pBuffers[0]), pData, nData); |
| 5754 }else{ | 5427 }else{ |
| 5755 /* pAcc is the empty buffer the merged data will end up in. */ | 5428 /* pAcc is the empty buffer the merged data will end up in. */ |
| 5756 DataBuffer *pAcc = &(pBuffers[iBuffer]); | 5429 DataBuffer *pAcc = &(pBuffers[iBuffer]); |
| 5757 DataBuffer *p = &(pBuffers[0]); | 5430 DataBuffer *p = &(pBuffers[0]); |
| 5758 | 5431 |
| 5759 /* Handle position 0 specially to avoid need to prime pAcc | 5432 /* Handle position 0 specially to avoid need to prime pAcc |
| 5760 ** with pData/nData. | 5433 ** with pData/nData. |
| 5761 */ | 5434 */ |
| 5762 dataBufferSwap(p, pAcc); | 5435 dataBufferSwap(p, pAcc); |
| 5763 rc = docListAccumulateUnion(pAcc, pData, nData); | 5436 docListAccumulateUnion(pAcc, pData, nData); |
| 5764 if( rc!=SQLITE_OK ) goto err; | |
| 5765 | 5437 |
| 5766 /* Accumulate remaining doclists into pAcc. */ | 5438 /* Accumulate remaining doclists into pAcc. */ |
| 5767 for(++p; p<pAcc; ++p){ | 5439 for(++p; p<pAcc; ++p){ |
| 5768 rc = docListAccumulateUnion(pAcc, p->pData, p->nData); | 5440 docListAccumulateUnion(pAcc, p->pData, p->nData); |
| 5769 if( rc!=SQLITE_OK ) goto err; | |
| 5770 | 5441 |
| 5771 /* dataBufferReset() could allow a large doclist to blow up | 5442 /* dataBufferReset() could allow a large doclist to blow up |
| 5772 ** our memory requirements. | 5443 ** our memory requirements. |
| 5773 */ | 5444 */ |
| 5774 if( p->nCapacity<1024 ){ | 5445 if( p->nCapacity<1024 ){ |
| 5775 dataBufferReset(p); | 5446 dataBufferReset(p); |
| 5776 }else{ | 5447 }else{ |
| 5777 dataBufferDestroy(p); | 5448 dataBufferDestroy(p); |
| 5778 dataBufferInit(p, 0); | 5449 dataBufferInit(p, 0); |
| 5779 } | 5450 } |
| 5780 } | 5451 } |
| 5781 } | 5452 } |
| 5782 } | 5453 } |
| 5783 } | 5454 } |
| 5784 | 5455 |
| 5785 /* Union all the doclists together into *out. */ | 5456 /* Union all the doclists together into *out. */ |
| 5786 /* TODO(shess) What if *out is big? Sigh. */ | 5457 /* TODO(shess) What if *out is big? Sigh. */ |
| 5787 if( rc==SQLITE_OK && nBuffers>0 ){ | 5458 if( rc==SQLITE_OK && nBuffers>0 ){ |
| 5788 int iBuffer; | 5459 int iBuffer; |
| 5789 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ | 5460 for(iBuffer=0; iBuffer<nBuffers; ++iBuffer){ |
| 5790 if( pBuffers[iBuffer].nData>0 ){ | 5461 if( pBuffers[iBuffer].nData>0 ){ |
| 5791 if( out->nData==0 ){ | 5462 if( out->nData==0 ){ |
| 5792 dataBufferSwap(out, &(pBuffers[iBuffer])); | 5463 dataBufferSwap(out, &(pBuffers[iBuffer])); |
| 5793 }else{ | 5464 }else{ |
| 5794 rc = docListAccumulateUnion(out, pBuffers[iBuffer].pData, | 5465 docListAccumulateUnion(out, pBuffers[iBuffer].pData, |
| 5795 pBuffers[iBuffer].nData); | 5466 pBuffers[iBuffer].nData); |
| 5796 if( rc!=SQLITE_OK ) break; | |
| 5797 } | 5467 } |
| 5798 } | 5468 } |
| 5799 } | 5469 } |
| 5800 } | 5470 } |
| 5801 | 5471 |
| 5802 err: | |
| 5803 while( nBuffers-- ){ | 5472 while( nBuffers-- ){ |
| 5804 dataBufferDestroy(&(pBuffers[nBuffers])); | 5473 dataBufferDestroy(&(pBuffers[nBuffers])); |
| 5805 } | 5474 } |
| 5806 if( pBuffers!=NULL ) sqlite3_free(pBuffers); | 5475 if( pBuffers!=NULL ) sqlite3_free(pBuffers); |
| 5807 | 5476 |
| 5808 return rc; | 5477 return rc; |
| 5809 } | 5478 } |
| 5810 | 5479 |
| 5811 /* Call loadSegmentLeavesInt() with pData/nData as input. */ | 5480 /* Call loadSegmentLeavesInt() with pData/nData as input. */ |
| 5812 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, | 5481 static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5851 ** nodes which could include pTerm/nTerm/isPrefix. Note that the | 5520 ** nodes which could include pTerm/nTerm/isPrefix. Note that the |
| 5852 ** interior node terms logically come between the blocks, so there is | 5521 ** interior node terms logically come between the blocks, so there is |
| 5853 ** one more blockid than there are terms (that block contains terms >= | 5522 ** one more blockid than there are terms (that block contains terms >= |
| 5854 ** the last interior-node term). | 5523 ** the last interior-node term). |
| 5855 */ | 5524 */ |
| 5856 /* TODO(shess) The calling code may already know that the end child is | 5525 /* TODO(shess) The calling code may already know that the end child is |
| 5857 ** not worth calculating, because the end may be in a later sibling | 5526 ** not worth calculating, because the end may be in a later sibling |
| 5858 ** node. Consider whether breaking symmetry is worthwhile. I suspect | 5527 ** node. Consider whether breaking symmetry is worthwhile. I suspect |
| 5859 ** it is not worthwhile. | 5528 ** it is not worthwhile. |
| 5860 */ | 5529 */ |
| 5861 static int getChildrenContaining(const char *pData, int nData, | 5530 static void getChildrenContaining(const char *pData, int nData, |
| 5862 const char *pTerm, int nTerm, int isPrefix, | 5531 const char *pTerm, int nTerm, int isPrefix, |
| 5863 sqlite_int64 *piStartChild, | 5532 sqlite_int64 *piStartChild, |
| 5864 sqlite_int64 *piEndChild){ | 5533 sqlite_int64 *piEndChild){ |
| 5865 InteriorReader reader; | 5534 InteriorReader reader; |
| 5866 int rc; | |
| 5867 | 5535 |
| 5868 assert( nData>1 ); | 5536 assert( nData>1 ); |
| 5869 assert( *pData!='\0' ); | 5537 assert( *pData!='\0' ); |
| 5870 rc = interiorReaderInit(pData, nData, &reader); | 5538 interiorReaderInit(pData, nData, &reader); |
| 5871 if( rc!=SQLITE_OK ) return rc; | |
| 5872 | 5539 |
| 5873 /* Scan for the first child which could contain pTerm/nTerm. */ | 5540 /* Scan for the first child which could contain pTerm/nTerm. */ |
| 5874 while( !interiorReaderAtEnd(&reader) ){ | 5541 while( !interiorReaderAtEnd(&reader) ){ |
| 5875 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; | 5542 if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; |
| 5876 rc = interiorReaderStep(&reader); | 5543 interiorReaderStep(&reader); |
| 5877 if( rc!=SQLITE_OK ){ | |
| 5878 interiorReaderDestroy(&reader); | |
| 5879 return rc; | |
| 5880 } | |
| 5881 } | 5544 } |
| 5882 *piStartChild = interiorReaderCurrentBlockid(&reader); | 5545 *piStartChild = interiorReaderCurrentBlockid(&reader); |
| 5883 | 5546 |
| 5884 /* Keep scanning to find a term greater than our term, using prefix | 5547 /* Keep scanning to find a term greater than our term, using prefix |
| 5885 ** comparison if indicated. If isPrefix is false, this will be the | 5548 ** comparison if indicated. If isPrefix is false, this will be the |
| 5886 ** same blockid as the starting block. | 5549 ** same blockid as the starting block. |
| 5887 */ | 5550 */ |
| 5888 while( !interiorReaderAtEnd(&reader) ){ | 5551 while( !interiorReaderAtEnd(&reader) ){ |
| 5889 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; | 5552 if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; |
| 5890 rc = interiorReaderStep(&reader); | 5553 interiorReaderStep(&reader); |
| 5891 if( rc!=SQLITE_OK ){ | |
| 5892 interiorReaderDestroy(&reader); | |
| 5893 return rc; | |
| 5894 } | |
| 5895 } | 5554 } |
| 5896 *piEndChild = interiorReaderCurrentBlockid(&reader); | 5555 *piEndChild = interiorReaderCurrentBlockid(&reader); |
| 5897 | 5556 |
| 5898 interiorReaderDestroy(&reader); | 5557 interiorReaderDestroy(&reader); |
| 5899 | 5558 |
| 5900 /* Children must ascend, and if !prefix, both must be the same. */ | 5559 /* Children must ascend, and if !prefix, both must be the same. */ |
| 5901 assert( *piEndChild>=*piStartChild ); | 5560 assert( *piEndChild>=*piStartChild ); |
| 5902 assert( isPrefix || *piStartChild==*piEndChild ); | 5561 assert( isPrefix || *piStartChild==*piEndChild ); |
| 5903 return rc; | |
| 5904 } | 5562 } |
| 5905 | 5563 |
| 5906 /* Read block at iBlockid and pass it with other params to | 5564 /* Read block at iBlockid and pass it with other params to |
| 5907 ** getChildrenContaining(). | 5565 ** getChildrenContaining(). |
| 5908 */ | 5566 */ |
| 5909 static int loadAndGetChildrenContaining( | 5567 static int loadAndGetChildrenContaining( |
| 5910 fulltext_vtab *v, | 5568 fulltext_vtab *v, |
| 5911 sqlite_int64 iBlockid, | 5569 sqlite_int64 iBlockid, |
| 5912 const char *pTerm, int nTerm, int isPrefix, | 5570 const char *pTerm, int nTerm, int isPrefix, |
| 5913 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild | 5571 sqlite_int64 *piStartChild, sqlite_int64 *piEndChild |
| 5914 ){ | 5572 ){ |
| 5915 sqlite3_stmt *s = NULL; | 5573 sqlite3_stmt *s = NULL; |
| 5916 int rc; | 5574 int rc; |
| 5917 | 5575 |
| 5918 assert( iBlockid!=0 ); | 5576 assert( iBlockid!=0 ); |
| 5919 assert( pTerm!=NULL ); | 5577 assert( pTerm!=NULL ); |
| 5920 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ | 5578 assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ |
| 5921 assert( piStartChild!=NULL ); | 5579 assert( piStartChild!=NULL ); |
| 5922 assert( piEndChild!=NULL ); | 5580 assert( piEndChild!=NULL ); |
| 5923 | 5581 |
| 5924 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); | 5582 rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); |
| 5925 if( rc!=SQLITE_OK ) return rc; | 5583 if( rc!=SQLITE_OK ) return rc; |
| 5926 | 5584 |
| 5927 rc = sqlite3_bind_int64(s, 1, iBlockid); | 5585 rc = sqlite3_bind_int64(s, 1, iBlockid); |
| 5928 if( rc!=SQLITE_OK ) return rc; | 5586 if( rc!=SQLITE_OK ) return rc; |
| 5929 | 5587 |
| 5930 rc = sqlite3_step(s); | 5588 rc = sqlite3_step(s); |
| 5931 /* Corrupt if interior node references missing child node. */ | 5589 if( rc==SQLITE_DONE ) return SQLITE_ERROR; |
| 5932 if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT; | |
| 5933 if( rc!=SQLITE_ROW ) return rc; | 5590 if( rc!=SQLITE_ROW ) return rc; |
| 5934 | 5591 |
| 5935 /* Corrupt if child node isn't a blob. */ | 5592 getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), |
| 5936 if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){ | 5593 pTerm, nTerm, isPrefix, piStartChild, piEndChild); |
| 5937 sqlite3_reset(s); /* So we don't leave a lock. */ | |
| 5938 return SQLITE_CORRUPT_BKPT; | |
| 5939 }else{ | |
| 5940 const char *pData = sqlite3_column_blob(s, 0); | |
| 5941 int nData = sqlite3_column_bytes(s, 0); | |
| 5942 | |
| 5943 /* Corrupt if child is not a valid interior node. */ | |
| 5944 if( pData==NULL || nData<1 || pData[0]=='\0' ){ | |
| 5945 sqlite3_reset(s); /* So we don't leave a lock. */ | |
| 5946 return SQLITE_CORRUPT_BKPT; | |
| 5947 } | |
| 5948 | |
| 5949 rc = getChildrenContaining(pData, nData, pTerm, nTerm, | |
| 5950 isPrefix, piStartChild, piEndChild); | |
| 5951 if( rc!=SQLITE_OK ){ | |
| 5952 sqlite3_reset(s); | |
| 5953 return rc; | |
| 5954 } | |
| 5955 } | |
| 5956 | 5594 |
| 5957 /* We expect only one row. We must execute another sqlite3_step() | 5595 /* We expect only one row. We must execute another sqlite3_step() |
| 5958 * to complete the iteration; otherwise the table will remain | 5596 * to complete the iteration; otherwise the table will remain |
| 5959 * locked. */ | 5597 * locked. */ |
| 5960 rc = sqlite3_step(s); | 5598 rc = sqlite3_step(s); |
| 5961 if( rc==SQLITE_ROW ) return SQLITE_ERROR; | 5599 if( rc==SQLITE_ROW ) return SQLITE_ERROR; |
| 5962 if( rc!=SQLITE_DONE ) return rc; | 5600 if( rc!=SQLITE_DONE ) return rc; |
| 5963 | 5601 |
| 5964 return SQLITE_OK; | 5602 return SQLITE_OK; |
| 5965 } | 5603 } |
| 5966 | 5604 |
| 5967 /* Traverse the tree represented by pData[nData] looking for | 5605 /* Traverse the tree represented by pData[nData] looking for |
| 5968 ** pTerm[nTerm], placing its doclist into *out. This is internal to | 5606 ** pTerm[nTerm], placing its doclist into *out. This is internal to |
| 5969 ** loadSegment() to make error-handling cleaner. | 5607 ** loadSegment() to make error-handling cleaner. |
| 5970 */ | 5608 */ |
| 5971 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, | 5609 static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, |
| 5972 sqlite_int64 iLeavesEnd, | 5610 sqlite_int64 iLeavesEnd, |
| 5973 const char *pTerm, int nTerm, int isPrefix, | 5611 const char *pTerm, int nTerm, int isPrefix, |
| 5974 DataBuffer *out){ | 5612 DataBuffer *out){ |
| 5975 /* Special case where root is a leaf. */ | 5613 /* Special case where root is a leaf. */ |
| 5976 if( *pData=='\0' ){ | 5614 if( *pData=='\0' ){ |
| 5977 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); | 5615 return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); |
| 5978 }else{ | 5616 }else{ |
| 5979 int rc; | 5617 int rc; |
| 5980 sqlite_int64 iStartChild, iEndChild; | 5618 sqlite_int64 iStartChild, iEndChild; |
| 5981 | 5619 |
| 5982 /* Process pData as an interior node, then loop down the tree | 5620 /* Process pData as an interior node, then loop down the tree |
| 5983 ** until we find the set of leaf nodes to scan for the term. | 5621 ** until we find the set of leaf nodes to scan for the term. |
| 5984 */ | 5622 */ |
| 5985 rc = getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, | 5623 getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, |
| 5986 &iStartChild, &iEndChild); | 5624 &iStartChild, &iEndChild); |
| 5987 if( rc!=SQLITE_OK ) return rc; | |
| 5988 while( iStartChild>iLeavesEnd ){ | 5625 while( iStartChild>iLeavesEnd ){ |
| 5989 sqlite_int64 iNextStart, iNextEnd; | 5626 sqlite_int64 iNextStart, iNextEnd; |
| 5990 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, | 5627 rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, |
| 5991 &iNextStart, &iNextEnd); | 5628 &iNextStart, &iNextEnd); |
| 5992 if( rc!=SQLITE_OK ) return rc; | 5629 if( rc!=SQLITE_OK ) return rc; |
| 5993 | 5630 |
| 5994 /* If we've branched, follow the end branch, too. */ | 5631 /* If we've branched, follow the end branch, too. */ |
| 5995 if( iStartChild!=iEndChild ){ | 5632 if( iStartChild!=iEndChild ){ |
| 5996 sqlite_int64 iDummy; | 5633 sqlite_int64 iDummy; |
| 5997 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, | 5634 rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6029 ** within a segment, with pairwise merges across segments, or across | 5666 ** within a segment, with pairwise merges across segments, or across |
| 6030 ** all segments at once. | 5667 ** all segments at once. |
| 6031 */ | 5668 */ |
| 6032 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, | 5669 static int loadSegment(fulltext_vtab *v, const char *pData, int nData, |
| 6033 sqlite_int64 iLeavesEnd, | 5670 sqlite_int64 iLeavesEnd, |
| 6034 const char *pTerm, int nTerm, int isPrefix, | 5671 const char *pTerm, int nTerm, int isPrefix, |
| 6035 DataBuffer *out){ | 5672 DataBuffer *out){ |
| 6036 DataBuffer result; | 5673 DataBuffer result; |
| 6037 int rc; | 5674 int rc; |
| 6038 | 5675 |
| 6039 /* Corrupt if segment root can't be valid. */ | 5676 assert( nData>1 ); |
| 6040 if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT; | |
| 6041 | 5677 |
| 6042 /* This code should never be called with buffered updates. */ | 5678 /* This code should never be called with buffered updates. */ |
| 6043 assert( v->nPendingData<0 ); | 5679 assert( v->nPendingData<0 ); |
| 6044 | 5680 |
| 6045 dataBufferInit(&result, 0); | 5681 dataBufferInit(&result, 0); |
| 6046 rc = loadSegmentInt(v, pData, nData, iLeavesEnd, | 5682 rc = loadSegmentInt(v, pData, nData, iLeavesEnd, |
| 6047 pTerm, nTerm, isPrefix, &result); | 5683 pTerm, nTerm, isPrefix, &result); |
| 6048 if( rc==SQLITE_OK && result.nData>0 ){ | 5684 if( rc==SQLITE_OK && result.nData>0 ){ |
| 6049 if( out->nData==0 ){ | 5685 if( out->nData==0 ){ |
| 6050 DataBuffer tmp = *out; | 5686 DataBuffer tmp = *out; |
| 6051 *out = result; | 5687 *out = result; |
| 6052 result = tmp; | 5688 result = tmp; |
| 6053 }else{ | 5689 }else{ |
| 6054 DataBuffer merged; | 5690 DataBuffer merged; |
| 6055 DLReader readers[2]; | 5691 DLReader readers[2]; |
| 6056 | 5692 |
| 6057 rc = dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); | 5693 dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); |
| 6058 if( rc==SQLITE_OK ){ | 5694 dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); |
| 6059 rc = dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); | 5695 dataBufferInit(&merged, out->nData+result.nData); |
| 6060 if( rc==SQLITE_OK ){ | 5696 docListMerge(&merged, readers, 2); |
| 6061 dataBufferInit(&merged, out->nData+result.nData); | 5697 dataBufferDestroy(out); |
| 6062 rc = docListMerge(&merged, readers, 2); | 5698 *out = merged; |
| 6063 dataBufferDestroy(out); | 5699 dlrDestroy(&readers[0]); |
| 6064 *out = merged; | 5700 dlrDestroy(&readers[1]); |
| 6065 dlrDestroy(&readers[1]); | |
| 6066 } | |
| 6067 dlrDestroy(&readers[0]); | |
| 6068 } | |
| 6069 } | 5701 } |
| 6070 } | 5702 } |
| 6071 | |
| 6072 dataBufferDestroy(&result); | 5703 dataBufferDestroy(&result); |
| 6073 return rc; | 5704 return rc; |
| 6074 } | 5705 } |
| 6075 | 5706 |
| 6076 /* Scan the database and merge together the posting lists for the term | 5707 /* Scan the database and merge together the posting lists for the term |
| 6077 ** into *out. | 5708 ** into *out. |
| 6078 */ | 5709 */ |
| 6079 static int termSelect(fulltext_vtab *v, int iColumn, | 5710 static int termSelect(fulltext_vtab *v, int iColumn, |
| 6080 const char *pTerm, int nTerm, int isPrefix, | 5711 const char *pTerm, int nTerm, int isPrefix, |
| 6081 DocListType iType, DataBuffer *out){ | 5712 DocListType iType, DataBuffer *out){ |
| 6082 DataBuffer doclist; | 5713 DataBuffer doclist; |
| 6083 sqlite3_stmt *s; | 5714 sqlite3_stmt *s; |
| 6084 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); | 5715 int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); |
| 6085 if( rc!=SQLITE_OK ) return rc; | 5716 if( rc!=SQLITE_OK ) return rc; |
| 6086 | 5717 |
| 6087 /* This code should never be called with buffered updates. */ | 5718 /* This code should never be called with buffered updates. */ |
| 6088 assert( v->nPendingData<0 ); | 5719 assert( v->nPendingData<0 ); |
| 6089 | 5720 |
| 6090 dataBufferInit(&doclist, 0); | 5721 dataBufferInit(&doclist, 0); |
| 6091 | 5722 |
| 6092 /* Traverse the segments from oldest to newest so that newer doclist | 5723 /* Traverse the segments from oldest to newest so that newer doclist |
| 6093 ** elements for given docids overwrite older elements. | 5724 ** elements for given docids overwrite older elements. |
| 6094 */ | 5725 */ |
| 6095 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 5726 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
| 6096 const char *pData = sqlite3_column_blob(s, 2); | 5727 const char *pData = sqlite3_column_blob(s, 2); |
| 6097 const int nData = sqlite3_column_bytes(s, 2); | 5728 const int nData = sqlite3_column_bytes(s, 2); |
| 6098 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); | 5729 const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); |
| 6099 | |
| 6100 /* Corrupt if we get back different types than we stored. */ | |
| 6101 if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
| 6102 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
| 6103 rc = SQLITE_CORRUPT_BKPT; | |
| 6104 goto err; | |
| 6105 } | |
| 6106 | |
| 6107 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, | 5730 rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, |
| 6108 &doclist); | 5731 &doclist); |
| 6109 if( rc!=SQLITE_OK ) goto err; | 5732 if( rc!=SQLITE_OK ) goto err; |
| 6110 } | 5733 } |
| 6111 if( rc==SQLITE_DONE ){ | 5734 if( rc==SQLITE_DONE ){ |
| 6112 rc = SQLITE_OK; | |
| 6113 if( doclist.nData!=0 ){ | 5735 if( doclist.nData!=0 ){ |
| 6114 /* TODO(shess) The old term_select_all() code applied the column | 5736 /* TODO(shess) The old term_select_all() code applied the column |
| 6115 ** restrict as we merged segments, leading to smaller buffers. | 5737 ** restrict as we merged segments, leading to smaller buffers. |
| 6116 ** This is probably worthwhile to bring back, once the new storage | 5738 ** This is probably worthwhile to bring back, once the new storage |
| 6117 ** system is checked in. | 5739 ** system is checked in. |
| 6118 */ | 5740 */ |
| 6119 if( iColumn==v->nColumn) iColumn = -1; | 5741 if( iColumn==v->nColumn) iColumn = -1; |
| 6120 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, | 5742 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |
| 6121 iColumn, iType, out); | 5743 iColumn, iType, out); |
| 6122 } | 5744 } |
| 5745 rc = SQLITE_OK; |
| 6123 } | 5746 } |
| 6124 | 5747 |
| 6125 err: | 5748 err: |
| 6126 sqlite3_reset(s); /* So we don't leave a lock. */ | |
| 6127 dataBufferDestroy(&doclist); | 5749 dataBufferDestroy(&doclist); |
| 6128 return rc; | 5750 return rc; |
| 6129 } | 5751 } |
| 6130 | 5752 |
| 6131 /****************************************************************/ | 5753 /****************************************************************/ |
| 6132 /* Used to hold hashtable data for sorting. */ | 5754 /* Used to hold hashtable data for sorting. */ |
| 6133 typedef struct TermData { | 5755 typedef struct TermData { |
| 6134 const char *pTerm; | 5756 const char *pTerm; |
| 6135 int nTerm; | 5757 int nTerm; |
| 6136 DLCollector *pCollector; | 5758 DLCollector *pCollector; |
| (...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6458 /* optimize() helper function. Put the readers in order and iterate | 6080 /* optimize() helper function. Put the readers in order and iterate |
| 6459 ** through them, merging doclists for matching terms into pWriter. | 6081 ** through them, merging doclists for matching terms into pWriter. |
| 6460 ** Returns SQLITE_OK on success, or the SQLite error code which | 6082 ** Returns SQLITE_OK on success, or the SQLite error code which |
| 6461 ** prevented success. | 6083 ** prevented success. |
| 6462 */ | 6084 */ |
| 6463 static int optimizeInternal(fulltext_vtab *v, | 6085 static int optimizeInternal(fulltext_vtab *v, |
| 6464 OptLeavesReader *readers, int nReaders, | 6086 OptLeavesReader *readers, int nReaders, |
| 6465 LeafWriter *pWriter){ | 6087 LeafWriter *pWriter){ |
| 6466 int i, rc = SQLITE_OK; | 6088 int i, rc = SQLITE_OK; |
| 6467 DataBuffer doclist, merged, tmp; | 6089 DataBuffer doclist, merged, tmp; |
| 6468 const char *pData; | |
| 6469 | 6090 |
| 6470 /* Order the readers. */ | 6091 /* Order the readers. */ |
| 6471 i = nReaders; | 6092 i = nReaders; |
| 6472 while( i-- > 0 ){ | 6093 while( i-- > 0 ){ |
| 6473 optLeavesReaderReorder(&readers[i], nReaders-i); | 6094 optLeavesReaderReorder(&readers[i], nReaders-i); |
| 6474 } | 6095 } |
| 6475 | 6096 |
| 6476 dataBufferInit(&doclist, LEAF_MAX); | 6097 dataBufferInit(&doclist, LEAF_MAX); |
| 6477 dataBufferInit(&merged, LEAF_MAX); | 6098 dataBufferInit(&merged, LEAF_MAX); |
| 6478 | 6099 |
| 6479 /* Exhausted readers bubble to the end, so when the first reader is | 6100 /* Exhausted readers bubble to the end, so when the first reader is |
| 6480 ** at eof, all are at eof. | 6101 ** at eof, all are at eof. |
| 6481 */ | 6102 */ |
| 6482 while( !optLeavesReaderAtEnd(&readers[0]) ){ | 6103 while( !optLeavesReaderAtEnd(&readers[0]) ){ |
| 6483 | 6104 |
| 6484 /* Figure out how many readers share the next term. */ | 6105 /* Figure out how many readers share the next term. */ |
| 6485 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ | 6106 for(i=1; i<nReaders && !optLeavesReaderAtEnd(&readers[i]); i++){ |
| 6486 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; | 6107 if( 0!=optLeavesReaderTermCmp(&readers[0], &readers[i]) ) break; |
| 6487 } | 6108 } |
| 6488 | 6109 |
| 6489 pData = optLeavesReaderData(&readers[0]); | |
| 6490 if( pData==NULL ){ | |
| 6491 rc = SQLITE_CORRUPT_BKPT; | |
| 6492 break; | |
| 6493 } | |
| 6494 | |
| 6495 /* Special-case for no merge. */ | 6110 /* Special-case for no merge. */ |
| 6496 if( i==1 ){ | 6111 if( i==1 ){ |
| 6497 /* Trim deletions from the doclist. */ | 6112 /* Trim deletions from the doclist. */ |
| 6498 dataBufferReset(&merged); | 6113 dataBufferReset(&merged); |
| 6499 rc = docListTrim(DL_DEFAULT, | 6114 docListTrim(DL_DEFAULT, |
| 6500 pData, | 6115 optLeavesReaderData(&readers[0]), |
| 6501 optLeavesReaderDataBytes(&readers[0]), | 6116 optLeavesReaderDataBytes(&readers[0]), |
| 6502 -1, DL_DEFAULT, &merged); | 6117 -1, DL_DEFAULT, &merged); |
| 6503 if( rc!= SQLITE_OK ) break; | |
| 6504 }else{ | 6118 }else{ |
| 6505 DLReader dlReaders[MERGE_COUNT]; | 6119 DLReader dlReaders[MERGE_COUNT]; |
| 6506 int iReader, nReaders; | 6120 int iReader, nReaders; |
| 6507 | 6121 |
| 6508 /* Prime the pipeline with the first reader's doclist. After | 6122 /* Prime the pipeline with the first reader's doclist. After |
| 6509 ** one pass index 0 will reference the accumulated doclist. | 6123 ** one pass index 0 will reference the accumulated doclist. |
| 6510 */ | 6124 */ |
| 6511 rc = dlrInit(&dlReaders[0], DL_DEFAULT, | 6125 dlrInit(&dlReaders[0], DL_DEFAULT, |
| 6512 pData, | 6126 optLeavesReaderData(&readers[0]), |
| 6513 optLeavesReaderDataBytes(&readers[0])); | 6127 optLeavesReaderDataBytes(&readers[0])); |
| 6514 if( rc!=SQLITE_OK ) break; | |
| 6515 iReader = 1; | 6128 iReader = 1; |
| 6516 | 6129 |
| 6517 assert( iReader<i ); /* Must execute the loop at least once. */ | 6130 assert( iReader<i ); /* Must execute the loop at least once. */ |
| 6518 while( iReader<i ){ | 6131 while( iReader<i ){ |
| 6519 /* Merge 16 inputs per pass. */ | 6132 /* Merge 16 inputs per pass. */ |
| 6520 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; | 6133 for( nReaders=1; iReader<i && nReaders<MERGE_COUNT; |
| 6521 iReader++, nReaders++ ){ | 6134 iReader++, nReaders++ ){ |
| 6522 pData = optLeavesReaderData(&readers[iReader]); | 6135 dlrInit(&dlReaders[nReaders], DL_DEFAULT, |
| 6523 if( pData == NULL ){ | 6136 optLeavesReaderData(&readers[iReader]), |
| 6524 rc = SQLITE_CORRUPT_BKPT; | 6137 optLeavesReaderDataBytes(&readers[iReader])); |
| 6525 break; | |
| 6526 } | |
| 6527 rc = dlrInit(&dlReaders[nReaders], DL_DEFAULT, | |
| 6528 pData, | |
| 6529 optLeavesReaderDataBytes(&readers[iReader])); | |
| 6530 if( rc != SQLITE_OK ) break; | |
| 6531 } | 6138 } |
| 6532 | 6139 |
| 6533 /* Merge doclists and swap result into accumulator. */ | 6140 /* Merge doclists and swap result into accumulator. */ |
| 6534 if( rc==SQLITE_OK ){ | 6141 dataBufferReset(&merged); |
| 6535 dataBufferReset(&merged); | 6142 docListMerge(&merged, dlReaders, nReaders); |
| 6536 rc = docListMerge(&merged, dlReaders, nReaders); | 6143 tmp = merged; |
| 6537 tmp = merged; | 6144 merged = doclist; |
| 6538 merged = doclist; | 6145 doclist = tmp; |
| 6539 doclist = tmp; | |
| 6540 } | |
| 6541 | 6146 |
| 6542 while( nReaders-- > 0 ){ | 6147 while( nReaders-- > 0 ){ |
| 6543 dlrDestroy(&dlReaders[nReaders]); | 6148 dlrDestroy(&dlReaders[nReaders]); |
| 6544 } | 6149 } |
| 6545 | 6150 |
| 6546 if( rc!=SQLITE_OK ) goto err; | |
| 6547 | |
| 6548 /* Accumulated doclist to reader 0 for next pass. */ | 6151 /* Accumulated doclist to reader 0 for next pass. */ |
| 6549 rc = dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); | 6152 dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); |
| 6550 if( rc!=SQLITE_OK ) goto err; | |
| 6551 } | 6153 } |
| 6552 | 6154 |
| 6553 /* Destroy reader that was left in the pipeline. */ | 6155 /* Destroy reader that was left in the pipeline. */ |
| 6554 dlrDestroy(&dlReaders[0]); | 6156 dlrDestroy(&dlReaders[0]); |
| 6555 | 6157 |
| 6556 /* Trim deletions from the doclist. */ | 6158 /* Trim deletions from the doclist. */ |
| 6557 dataBufferReset(&merged); | 6159 dataBufferReset(&merged); |
| 6558 rc = docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, | 6160 docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, |
| 6559 -1, DL_DEFAULT, &merged); | 6161 -1, DL_DEFAULT, &merged); |
| 6560 if( rc!=SQLITE_OK ) goto err; | |
| 6561 } | 6162 } |
| 6562 | 6163 |
| 6563 /* Only pass doclists with hits (skip if all hits deleted). */ | 6164 /* Only pass doclists with hits (skip if all hits deleted). */ |
| 6564 if( merged.nData>0 ){ | 6165 if( merged.nData>0 ){ |
| 6565 rc = leafWriterStep(v, pWriter, | 6166 rc = leafWriterStep(v, pWriter, |
| 6566 optLeavesReaderTerm(&readers[0]), | 6167 optLeavesReaderTerm(&readers[0]), |
| 6567 optLeavesReaderTermBytes(&readers[0]), | 6168 optLeavesReaderTermBytes(&readers[0]), |
| 6568 merged.pData, merged.nData); | 6169 merged.pData, merged.nData); |
| 6569 if( rc!=SQLITE_OK ) goto err; | 6170 if( rc!=SQLITE_OK ) goto err; |
| 6570 } | 6171 } |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6630 */ | 6231 */ |
| 6631 leafWriterInit(iMaxLevel, 0, &writer); | 6232 leafWriterInit(iMaxLevel, 0, &writer); |
| 6632 | 6233 |
| 6633 i = 0; | 6234 i = 0; |
| 6634 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ | 6235 while( (rc = sqlite3_step(s))==SQLITE_ROW ){ |
| 6635 sqlite_int64 iStart = sqlite3_column_int64(s, 0); | 6236 sqlite_int64 iStart = sqlite3_column_int64(s, 0); |
| 6636 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); | 6237 sqlite_int64 iEnd = sqlite3_column_int64(s, 1); |
| 6637 const char *pRootData = sqlite3_column_blob(s, 2); | 6238 const char *pRootData = sqlite3_column_blob(s, 2); |
| 6638 int nRootData = sqlite3_column_bytes(s, 2); | 6239 int nRootData = sqlite3_column_bytes(s, 2); |
| 6639 | 6240 |
| 6640 /* Corrupt if we get back different types than we stored. */ | |
| 6641 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
| 6642 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
| 6643 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
| 6644 rc = SQLITE_CORRUPT_BKPT; | |
| 6645 break; | |
| 6646 } | |
| 6647 | |
| 6648 assert( i<nReaders ); | 6241 assert( i<nReaders ); |
| 6649 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, | 6242 rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData, |
| 6650 &readers[i].reader); | 6243 &readers[i].reader); |
| 6651 if( rc!=SQLITE_OK ) break; | 6244 if( rc!=SQLITE_OK ) break; |
| 6652 | 6245 |
| 6653 readers[i].segment = i; | 6246 readers[i].segment = i; |
| 6654 i++; | 6247 i++; |
| 6655 } | 6248 } |
| 6656 | 6249 |
| 6657 /* If we managed to successfully read them all, optimize them. */ | 6250 /* If we managed to successfully read them all, optimize them. */ |
| 6658 if( rc==SQLITE_DONE ){ | 6251 if( rc==SQLITE_DONE ){ |
| 6659 assert( i==nReaders ); | 6252 assert( i==nReaders ); |
| 6660 rc = optimizeInternal(v, readers, nReaders, &writer); | 6253 rc = optimizeInternal(v, readers, nReaders, &writer); |
| 6661 }else{ | |
| 6662 sqlite3_reset(s); /* So we don't leave a lock. */ | |
| 6663 } | 6254 } |
| 6664 | 6255 |
| 6665 while( i-- > 0 ){ | 6256 while( i-- > 0 ){ |
| 6666 leavesReaderDestroy(&readers[i].reader); | 6257 leavesReaderDestroy(&readers[i].reader); |
| 6667 } | 6258 } |
| 6668 sqlite3_free(readers); | 6259 sqlite3_free(readers); |
| 6669 | 6260 |
| 6670 /* If we've successfully gotten to here, delete the old segments | 6261 /* If we've successfully gotten to here, delete the old segments |
| 6671 ** and flush the interior structure of the new segment. | 6262 ** and flush the interior structure of the new segment. |
| 6672 */ | 6263 */ |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6716 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of | 6307 ** iStartBlockid and iEndBlockid, inclusive, or by the contents of |
| 6717 ** pRootData if iStartBlockid is 0 (in which case the entire segment | 6308 ** pRootData if iStartBlockid is 0 (in which case the entire segment |
| 6718 ** fit in a leaf). | 6309 ** fit in a leaf). |
| 6719 */ | 6310 */ |
| 6720 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, | 6311 static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, |
| 6721 fts2Hash *pTerms){ | 6312 fts2Hash *pTerms){ |
| 6722 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); | 6313 const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); |
| 6723 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); | 6314 const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); |
| 6724 const char *pRootData = sqlite3_column_blob(s, 2); | 6315 const char *pRootData = sqlite3_column_blob(s, 2); |
| 6725 const int nRootData = sqlite3_column_bytes(s, 2); | 6316 const int nRootData = sqlite3_column_bytes(s, 2); |
| 6726 int rc; | |
| 6727 LeavesReader reader; | 6317 LeavesReader reader; |
| 6728 | 6318 int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, |
| 6729 /* Corrupt if we get back different types than we stored. */ | 6319 pRootData, nRootData, &reader); |
| 6730 if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER || | |
| 6731 sqlite3_column_type(s, 1)!=SQLITE_INTEGER || | |
| 6732 sqlite3_column_type(s, 2)!=SQLITE_BLOB ){ | |
| 6733 return SQLITE_CORRUPT_BKPT; | |
| 6734 } | |
| 6735 | |
| 6736 rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, | |
| 6737 pRootData, nRootData, &reader); | |
| 6738 if( rc!=SQLITE_OK ) return rc; | 6320 if( rc!=SQLITE_OK ) return rc; |
| 6739 | 6321 |
| 6740 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ | 6322 while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ |
| 6741 const char *pTerm = leavesReaderTerm(&reader); | 6323 const char *pTerm = leavesReaderTerm(&reader); |
| 6742 const int nTerm = leavesReaderTermBytes(&reader); | 6324 const int nTerm = leavesReaderTermBytes(&reader); |
| 6743 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); | 6325 void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); |
| 6744 void *newValue = (void *)((char *)oldValue+1); | 6326 void *newValue = (void *)((char *)oldValue+1); |
| 6745 | 6327 |
| 6746 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, | 6328 /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, |
| 6747 ** the data value passed is returned in case of malloc failure. | 6329 ** the data value passed is returned in case of malloc failure. |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6889 } | 6471 } |
| 6890 } | 6472 } |
| 6891 | 6473 |
| 6892 /* Expand the DL_DEFAULT doclist in pData into a text result in | 6474 /* Expand the DL_DEFAULT doclist in pData into a text result in |
| 6893 ** pContext. | 6475 ** pContext. |
| 6894 */ | 6476 */ |
| 6895 static void createDoclistResult(sqlite3_context *pContext, | 6477 static void createDoclistResult(sqlite3_context *pContext, |
| 6896 const char *pData, int nData){ | 6478 const char *pData, int nData){ |
| 6897 DataBuffer dump; | 6479 DataBuffer dump; |
| 6898 DLReader dlReader; | 6480 DLReader dlReader; |
| 6899 int rc; | |
| 6900 | 6481 |
| 6901 assert( pData!=NULL && nData>0 ); | 6482 assert( pData!=NULL && nData>0 ); |
| 6902 | 6483 |
| 6903 rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); | |
| 6904 if( rc!=SQLITE_OK ) return; | |
| 6905 dataBufferInit(&dump, 0); | 6484 dataBufferInit(&dump, 0); |
| 6906 for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ | 6485 dlrInit(&dlReader, DL_DEFAULT, pData, nData); |
| 6486 for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ |
| 6907 char buf[256]; | 6487 char buf[256]; |
| 6908 PLReader plReader; | 6488 PLReader plReader; |
| 6909 | 6489 |
| 6910 rc = plrInit(&plReader, &dlReader); | 6490 plrInit(&plReader, &dlReader); |
| 6911 if( rc!=SQLITE_OK ) break; | |
| 6912 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ | 6491 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ |
| 6913 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); | 6492 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); |
| 6914 dataBufferAppend(&dump, buf, strlen(buf)); | 6493 dataBufferAppend(&dump, buf, strlen(buf)); |
| 6915 }else{ | 6494 }else{ |
| 6916 int iColumn = plrColumn(&plReader); | 6495 int iColumn = plrColumn(&plReader); |
| 6917 | 6496 |
| 6918 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", | 6497 sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", |
| 6919 dlrDocid(&dlReader), iColumn); | 6498 dlrDocid(&dlReader), iColumn); |
| 6920 dataBufferAppend(&dump, buf, strlen(buf)); | 6499 dataBufferAppend(&dump, buf, strlen(buf)); |
| 6921 | 6500 |
| 6922 for( ; !plrAtEnd(&plReader); rc = plrStep(&plReader) ){ | 6501 for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ |
| 6923 if( rc!=SQLITE_OK ) break; | |
| 6924 if( plrColumn(&plReader)!=iColumn ){ | 6502 if( plrColumn(&plReader)!=iColumn ){ |
| 6925 iColumn = plrColumn(&plReader); | 6503 iColumn = plrColumn(&plReader); |
| 6926 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); | 6504 sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); |
| 6927 assert( dump.nData>0 ); | 6505 assert( dump.nData>0 ); |
| 6928 dump.nData--; /* Overwrite trailing space. */ | 6506 dump.nData--; /* Overwrite trailing space. */ |
| 6929 assert( dump.pData[dump.nData]==' '); | 6507 assert( dump.pData[dump.nData]==' '); |
| 6930 dataBufferAppend(&dump, buf, strlen(buf)); | 6508 dataBufferAppend(&dump, buf, strlen(buf)); |
| 6931 } | 6509 } |
| 6932 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ | 6510 if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ |
| 6933 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", | 6511 sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", |
| 6934 plrPosition(&plReader), | 6512 plrPosition(&plReader), |
| 6935 plrStartOffset(&plReader), plrEndOffset(&plReader)); | 6513 plrStartOffset(&plReader), plrEndOffset(&plReader)); |
| 6936 }else if( DL_DEFAULT==DL_POSITIONS ){ | 6514 }else if( DL_DEFAULT==DL_POSITIONS ){ |
| 6937 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); | 6515 sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); |
| 6938 }else{ | 6516 }else{ |
| 6939 assert( NULL=="Unhandled DL_DEFAULT value"); | 6517 assert( NULL=="Unhandled DL_DEFAULT value"); |
| 6940 } | 6518 } |
| 6941 dataBufferAppend(&dump, buf, strlen(buf)); | 6519 dataBufferAppend(&dump, buf, strlen(buf)); |
| 6942 } | 6520 } |
| 6943 plrDestroy(&plReader); | 6521 plrDestroy(&plReader); |
| 6944 if( rc!= SQLITE_OK ) break; | |
| 6945 | 6522 |
| 6946 assert( dump.nData>0 ); | 6523 assert( dump.nData>0 ); |
| 6947 dump.nData--; /* Overwrite trailing space. */ | 6524 dump.nData--; /* Overwrite trailing space. */ |
| 6948 assert( dump.pData[dump.nData]==' '); | 6525 assert( dump.pData[dump.nData]==' '); |
| 6949 dataBufferAppend(&dump, "]] ", 3); | 6526 dataBufferAppend(&dump, "]] ", 3); |
| 6950 } | 6527 } |
| 6951 } | 6528 } |
| 6952 dlrDestroy(&dlReader); | 6529 dlrDestroy(&dlReader); |
| 6953 if( rc!=SQLITE_OK ){ | |
| 6954 dataBufferDestroy(&dump); | |
| 6955 return; | |
| 6956 } | |
| 6957 | 6530 |
| 6958 assert( dump.nData>0 ); | 6531 assert( dump.nData>0 ); |
| 6959 dump.nData--; /* Overwrite trailing space. */ | 6532 dump.nData--; /* Overwrite trailing space. */ |
| 6960 assert( dump.pData[dump.nData]==' '); | 6533 assert( dump.pData[dump.nData]==' '); |
| 6961 dump.pData[dump.nData] = '\0'; | 6534 dump.pData[dump.nData] = '\0'; |
| 6962 assert( dump.nData>0 ); | 6535 assert( dump.nData>0 ); |
| 6963 | 6536 |
| 6964 /* Passes ownership of dump's buffer to pContext. */ | 6537 /* Passes ownership of dump's buffer to pContext. */ |
| 6965 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); | 6538 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); |
| 6966 dump.pData = NULL; | 6539 dump.pData = NULL; |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7240 ){ | 6813 ){ |
| 7241 rc = SQLITE_NOMEM; | 6814 rc = SQLITE_NOMEM; |
| 7242 } | 6815 } |
| 7243 } | 6816 } |
| 7244 | 6817 |
| 7245 /* Create the virtual table wrapper around the hash-table and overload | 6818 /* Create the virtual table wrapper around the hash-table and overload |
| 7246 ** the two scalar functions. If this is successful, register the | 6819 ** the two scalar functions. If this is successful, register the |
| 7247 ** module with sqlite. | 6820 ** module with sqlite. |
| 7248 */ | 6821 */ |
| 7249 if( SQLITE_OK==rc | 6822 if( SQLITE_OK==rc |
| 7250 #if GEARS_FTS2_CHANGES && !SQLITE_TEST | |
| 7251 /* fts2_tokenizer() disabled for security reasons. */ | |
| 7252 #else | |
| 7253 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) | 6823 && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) |
| 7254 #endif | |
| 7255 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) | 6824 && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) |
| 7256 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) | 6825 && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) |
| 7257 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) | 6826 && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) |
| 7258 #ifdef SQLITE_TEST | 6827 #ifdef SQLITE_TEST |
| 7259 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) | 6828 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) |
| 7260 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) | 6829 && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) |
| 7261 #endif | 6830 #endif |
| 7262 ){ | 6831 ){ |
| 7263 return sqlite3_create_module_v2( | 6832 return sqlite3_create_module_v2( |
| 7264 db, "fts2", &fts2Module, (void *)pHash, hashDestroy | 6833 db, "fts2", &fts2Module, (void *)pHash, hashDestroy |
| (...skipping 17 matching lines...) Expand all Loading... |
| 7282 sqlite3 *db, | 6851 sqlite3 *db, |
| 7283 char **pzErrMsg, | 6852 char **pzErrMsg, |
| 7284 const sqlite3_api_routines *pApi | 6853 const sqlite3_api_routines *pApi |
| 7285 ){ | 6854 ){ |
| 7286 SQLITE_EXTENSION_INIT2(pApi) | 6855 SQLITE_EXTENSION_INIT2(pApi) |
| 7287 return sqlite3Fts2Init(db); | 6856 return sqlite3Fts2Init(db); |
| 7288 } | 6857 } |
| 7289 #endif | 6858 #endif |
| 7290 | 6859 |
| 7291 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ | 6860 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ |
| OLD | NEW |