OLD | NEW |
1 /* fts2 has a design flaw which can lead to database corruption (see | 1 /* fts2 has a design flaw which can lead to database corruption (see |
2 ** below). It is recommended not to use it any longer, instead use | 2 ** below). It is recommended not to use it any longer, instead use |
3 ** fts3 (or higher). If you believe that your use of fts2 is safe, | 3 ** fts3 (or higher). If you believe that your use of fts2 is safe, |
4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. | 4 ** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. |
5 */ | 5 */ |
6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ | 6 #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ |
7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) | 7 && !defined(SQLITE_ENABLE_BROKEN_FTS2) |
8 #error fts2 has a design flaw and has been deprecated. | 8 #error fts2 has a design flaw and has been deprecated. |
9 #endif | 9 #endif |
10 /* The flaw is that fts2 uses the content table's unaliased rowid as | 10 /* The flaw is that fts2 uses the content table's unaliased rowid as |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 #endif | 317 #endif |
318 | 318 |
319 #include <assert.h> | 319 #include <assert.h> |
320 #include <stdlib.h> | 320 #include <stdlib.h> |
321 #include <stdio.h> | 321 #include <stdio.h> |
322 #include <string.h> | 322 #include <string.h> |
323 #include "fts2.h" | 323 #include "fts2.h" |
324 #include "fts2_hash.h" | 324 #include "fts2_hash.h" |
325 #include "fts2_tokenizer.h" | 325 #include "fts2_tokenizer.h" |
326 #include "sqlite3.h" | 326 #include "sqlite3.h" |
327 #ifndef SQLITE_CORE | 327 #ifndef SQLITE_CORE |
328 # include "sqlite3ext.h" | 328 # include "sqlite3ext.h" |
329 SQLITE_EXTENSION_INIT1 | 329 SQLITE_EXTENSION_INIT1 |
330 #endif | 330 #endif |
331 | 331 |
332 | 332 |
333 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it | 333 /* TODO(shess) MAN, this thing needs some refactoring. At minimum, it |
334 ** would be nice to order the file better, perhaps something along the | 334 ** would be nice to order the file better, perhaps something along the |
335 ** lines of: | 335 ** lines of: |
336 ** | 336 ** |
337 ** - utility functions | 337 ** - utility functions |
(...skipping 3420 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3758 * sets isPrefix correctly, but since that code doesn't eat the '*', the | 3758 * sets isPrefix correctly, but since that code doesn't eat the '*', the |
3759 * ICU tokenizer returns it as the next token. So eat it here until a | 3759 * ICU tokenizer returns it as the next token. So eat it here until a |
3760 * better solution presents itself. | 3760 * better solution presents itself. |
3761 */ | 3761 */ |
3762 if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && | 3762 if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && |
3763 iEndLast==iBegin){ | 3763 iEndLast==iBegin){ |
3764 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | 3764 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; |
3765 continue; | 3765 continue; |
3766 } | 3766 } |
3767 iEndLast = iEnd; | 3767 iEndLast = iEnd; |
3768 | 3768 |
3769 queryAdd(pQuery, pToken, nToken); | 3769 queryAdd(pQuery, pToken, nToken); |
3770 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ | 3770 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ |
3771 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; | 3771 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; |
3772 } | 3772 } |
3773 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ | 3773 if( iEnd<nSegment && pSegment[iEnd]=='*' ){ |
3774 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | 3774 pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; |
3775 } | 3775 } |
3776 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; | 3776 pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; |
3777 if( inPhrase ){ | 3777 if( inPhrase ){ |
3778 nTerm++; | 3778 nTerm++; |
(...skipping 1502 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5281 | 5281 |
5282 /* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus | 5282 /* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus |
5283 ** leaving the statement handle open, which locks the table. | 5283 ** leaving the statement handle open, which locks the table. |
5284 */ | 5284 */ |
5285 /* TODO(shess) This "solution" is not satisfactory. Really, there | 5285 /* TODO(shess) This "solution" is not satisfactory. Really, there |
5286 ** should be check-in function for all statement handles which | 5286 ** should be check-in function for all statement handles which |
5287 ** arranges to call sqlite3_reset(). This most likely will require | 5287 ** arranges to call sqlite3_reset(). This most likely will require |
5288 ** modification to control flow all over the place, though, so for now | 5288 ** modification to control flow all over the place, though, so for now |
5289 ** just punt. | 5289 ** just punt. |
5290 ** | 5290 ** |
5291 ** Note the the current system assumes that segment merges will run to | 5291 ** Note the current system assumes that segment merges will run to |
5292 ** completion, which is why this particular probably hasn't arisen in | 5292 ** completion, which is why this particular probably hasn't arisen in |
5293 ** this case. Probably a brittle assumption. | 5293 ** this case. Probably a brittle assumption. |
5294 */ | 5294 */ |
5295 static int leavesReaderReset(LeavesReader *pReader){ | 5295 static int leavesReaderReset(LeavesReader *pReader){ |
5296 return sqlite3_reset(pReader->pStmt); | 5296 return sqlite3_reset(pReader->pStmt); |
5297 } | 5297 } |
5298 | 5298 |
5299 static void leavesReaderDestroy(LeavesReader *pReader){ | 5299 static void leavesReaderDestroy(LeavesReader *pReader){ |
5300 /* If idx is -1, that means we're using a non-cached statement | 5300 /* If idx is -1, that means we're using a non-cached statement |
5301 ** handle in the optimize() case, so we need to release it. | 5301 ** handle in the optimize() case, so we need to release it. |
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5545 if( pData==NULL ){ | 5545 if( pData==NULL ){ |
5546 rc = SQLITE_CORRUPT_BKPT; | 5546 rc = SQLITE_CORRUPT_BKPT; |
5547 break; | 5547 break; |
5548 } | 5548 } |
5549 rc = dlrInit(&dlReaders[i], DL_DEFAULT, | 5549 rc = dlrInit(&dlReaders[i], DL_DEFAULT, |
5550 pData, | 5550 pData, |
5551 leavesReaderDataBytes(pReaders+i)); | 5551 leavesReaderDataBytes(pReaders+i)); |
5552 if( rc!=SQLITE_OK ) break; | 5552 if( rc!=SQLITE_OK ) break; |
5553 } | 5553 } |
5554 if( rc!=SQLITE_OK ){ | 5554 if( rc!=SQLITE_OK ){ |
5555 while( i-->0 ){ | 5555 while( i-->0 ){ |
5556 dlrDestroy(&dlReaders[i]); | 5556 dlrDestroy(&dlReaders[i]); |
5557 } | 5557 } |
5558 return rc; | 5558 return rc; |
5559 } | 5559 } |
5560 | 5560 |
5561 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); | 5561 return leafWriterStepMerge(v, pWriter, pTerm, nTerm, dlReaders, nReaders); |
5562 } | 5562 } |
5563 | 5563 |
5564 /* Forward ref due to mutual recursion with segdirNextIndex(). */ | 5564 /* Forward ref due to mutual recursion with segdirNextIndex(). */ |
5565 static int segmentMerge(fulltext_vtab *v, int iLevel); | 5565 static int segmentMerge(fulltext_vtab *v, int iLevel); |
(...skipping 1328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6894 */ | 6894 */ |
6895 static void createDoclistResult(sqlite3_context *pContext, | 6895 static void createDoclistResult(sqlite3_context *pContext, |
6896 const char *pData, int nData){ | 6896 const char *pData, int nData){ |
6897 DataBuffer dump; | 6897 DataBuffer dump; |
6898 DLReader dlReader; | 6898 DLReader dlReader; |
6899 int rc; | 6899 int rc; |
6900 | 6900 |
6901 assert( pData!=NULL && nData>0 ); | 6901 assert( pData!=NULL && nData>0 ); |
6902 | 6902 |
6903 rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); | 6903 rc = dlrInit(&dlReader, DL_DEFAULT, pData, nData); |
6904 if( rc!=SQLITE_OK ) return rc; | 6904 if( rc!=SQLITE_OK ) return; |
6905 dataBufferInit(&dump, 0); | 6905 dataBufferInit(&dump, 0); |
6906 for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ | 6906 for( ; rc==SQLITE_OK && !dlrAtEnd(&dlReader); rc = dlrStep(&dlReader) ){ |
6907 char buf[256]; | 6907 char buf[256]; |
6908 PLReader plReader; | 6908 PLReader plReader; |
6909 | 6909 |
6910 rc = plrInit(&plReader, &dlReader); | 6910 rc = plrInit(&plReader, &dlReader); |
6911 if( rc!=SQLITE_OK ) break; | 6911 if( rc!=SQLITE_OK ) break; |
6912 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ | 6912 if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ |
6913 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); | 6913 sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); |
6914 dataBufferAppend(&dump, buf, strlen(buf)); | 6914 dataBufferAppend(&dump, buf, strlen(buf)); |
(...skipping 30 matching lines...) Expand all Loading... |
6945 | 6945 |
6946 assert( dump.nData>0 ); | 6946 assert( dump.nData>0 ); |
6947 dump.nData--; /* Overwrite trailing space. */ | 6947 dump.nData--; /* Overwrite trailing space. */ |
6948 assert( dump.pData[dump.nData]==' '); | 6948 assert( dump.pData[dump.nData]==' '); |
6949 dataBufferAppend(&dump, "]] ", 3); | 6949 dataBufferAppend(&dump, "]] ", 3); |
6950 } | 6950 } |
6951 } | 6951 } |
6952 dlrDestroy(&dlReader); | 6952 dlrDestroy(&dlReader); |
6953 if( rc!=SQLITE_OK ){ | 6953 if( rc!=SQLITE_OK ){ |
6954 dataBufferDestroy(&dump); | 6954 dataBufferDestroy(&dump); |
6955 return rc; | 6955 return; |
6956 } | 6956 } |
6957 | 6957 |
6958 assert( dump.nData>0 ); | 6958 assert( dump.nData>0 ); |
6959 dump.nData--; /* Overwrite trailing space. */ | 6959 dump.nData--; /* Overwrite trailing space. */ |
6960 assert( dump.pData[dump.nData]==' '); | 6960 assert( dump.pData[dump.nData]==' '); |
6961 dump.pData[dump.nData] = '\0'; | 6961 dump.pData[dump.nData] = '\0'; |
6962 assert( dump.nData>0 ); | 6962 assert( dump.nData>0 ); |
6963 | 6963 |
6964 /* Passes ownership of dump's buffer to pContext. */ | 6964 /* Passes ownership of dump's buffer to pContext. */ |
6965 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); | 6965 sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); |
6966 dump.pData = NULL; | 6966 dump.pData = NULL; |
6967 dump.nData = dump.nCapacity = 0; | 6967 dump.nData = dump.nCapacity = 0; |
6968 return SQLITE_OK; | |
6969 } | 6968 } |
6970 | 6969 |
6971 /* Implements dump_doclist() for use in inspecting the fts2 index from | 6970 /* Implements dump_doclist() for use in inspecting the fts2 index from |
6972 ** tests. TEXT result containing a string representation of the | 6971 ** tests. TEXT result containing a string representation of the |
6973 ** doclist for the indicated term. dump_doclist(t, term, level, idx) | 6972 ** doclist for the indicated term. dump_doclist(t, term, level, idx) |
6974 ** dumps the doclist for term from the segment specified by level, idx | 6973 ** dumps the doclist for term from the segment specified by level, idx |
6975 ** (in %_segdir), while dump_doclist(t, term) dumps the logical | 6974 ** (in %_segdir), while dump_doclist(t, term) dumps the logical |
6976 ** doclist for the term across all segments. The per-segment doclist | 6975 ** doclist for the term across all segments. The per-segment doclist |
6977 ** can contain deletions, while the full-index doclist will not | 6976 ** can contain deletions, while the full-index doclist will not |
6978 ** (deletions are omitted). | 6977 ** (deletions are omitted). |
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7200 ** Function ...PorterTokenizerModule() sets *pModule to point to the | 7199 ** Function ...PorterTokenizerModule() sets *pModule to point to the |
7201 ** porter tokenizer/stemmer implementation. | 7200 ** porter tokenizer/stemmer implementation. |
7202 */ | 7201 */ |
7203 void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 7202 void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); |
7204 void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 7203 void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); |
7205 void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); | 7204 void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); |
7206 | 7205 |
7207 int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *); | 7206 int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *); |
7208 | 7207 |
7209 /* | 7208 /* |
7210 ** Initialise the fts2 extension. If this extension is built as part | 7209 ** Initialize the fts2 extension. If this extension is built as part |
7211 ** of the sqlite library, then this function is called directly by | 7210 ** of the sqlite library, then this function is called directly by |
7212 ** SQLite. If fts2 is built as a dynamically loadable extension, this | 7211 ** SQLite. If fts2 is built as a dynamically loadable extension, this |
7213 ** function is called by the sqlite3_extension_init() entry point. | 7212 ** function is called by the sqlite3_extension_init() entry point. |
7214 */ | 7213 */ |
7215 int sqlite3Fts2Init(sqlite3 *db){ | 7214 int sqlite3Fts2Init(sqlite3 *db){ |
7216 int rc = SQLITE_OK; | 7215 int rc = SQLITE_OK; |
7217 fts2Hash *pHash = 0; | 7216 fts2Hash *pHash = 0; |
7218 const sqlite3_tokenizer_module *pSimple = 0; | 7217 const sqlite3_tokenizer_module *pSimple = 0; |
7219 const sqlite3_tokenizer_module *pPorter = 0; | 7218 const sqlite3_tokenizer_module *pPorter = 0; |
7220 const sqlite3_tokenizer_module *pIcu = 0; | 7219 const sqlite3_tokenizer_module *pIcu = 0; |
7221 | 7220 |
7222 sqlite3Fts2SimpleTokenizerModule(&pSimple); | 7221 sqlite3Fts2SimpleTokenizerModule(&pSimple); |
7223 sqlite3Fts2PorterTokenizerModule(&pPorter); | 7222 sqlite3Fts2PorterTokenizerModule(&pPorter); |
7224 #ifdef SQLITE_ENABLE_ICU | 7223 #ifdef SQLITE_ENABLE_ICU |
7225 sqlite3Fts2IcuTokenizerModule(&pIcu); | 7224 sqlite3Fts2IcuTokenizerModule(&pIcu); |
7226 #endif | 7225 #endif |
7227 | 7226 |
7228 /* Allocate and initialise the hash-table used to store tokenizers. */ | 7227 /* Allocate and initialize the hash-table used to store tokenizers. */ |
7229 pHash = sqlite3_malloc(sizeof(fts2Hash)); | 7228 pHash = sqlite3_malloc(sizeof(fts2Hash)); |
7230 if( !pHash ){ | 7229 if( !pHash ){ |
7231 rc = SQLITE_NOMEM; | 7230 rc = SQLITE_NOMEM; |
7232 }else{ | 7231 }else{ |
7233 sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); | 7232 sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); |
7234 } | 7233 } |
7235 | 7234 |
7236 /* Load the built-in tokenizers into the hash table */ | 7235 /* Load the built-in tokenizers into the hash table */ |
7237 if( rc==SQLITE_OK ){ | 7236 if( rc==SQLITE_OK ){ |
7238 if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple) | 7237 if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple) |
(...skipping 30 matching lines...) Expand all Loading... |
7269 /* An error has occurred. Delete the hash table and return the error code. */ | 7268 /* An error has occurred. Delete the hash table and return the error code. */ |
7270 assert( rc!=SQLITE_OK ); | 7269 assert( rc!=SQLITE_OK ); |
7271 if( pHash ){ | 7270 if( pHash ){ |
7272 sqlite3Fts2HashClear(pHash); | 7271 sqlite3Fts2HashClear(pHash); |
7273 sqlite3_free(pHash); | 7272 sqlite3_free(pHash); |
7274 } | 7273 } |
7275 return rc; | 7274 return rc; |
7276 } | 7275 } |
7277 | 7276 |
7278 #if !SQLITE_CORE | 7277 #if !SQLITE_CORE |
7279 int sqlite3_extension_init( | 7278 #ifdef _WIN32 |
| 7279 __declspec(dllexport) |
| 7280 #endif |
| 7281 int sqlite3_fts2_init( |
7280 sqlite3 *db, | 7282 sqlite3 *db, |
7281 char **pzErrMsg, | 7283 char **pzErrMsg, |
7282 const sqlite3_api_routines *pApi | 7284 const sqlite3_api_routines *pApi |
7283 ){ | 7285 ){ |
7284 SQLITE_EXTENSION_INIT2(pApi) | 7286 SQLITE_EXTENSION_INIT2(pApi) |
7285 return sqlite3Fts2Init(db); | 7287 return sqlite3Fts2Init(db); |
7286 } | 7288 } |
7287 #endif | 7289 #endif |
7288 | 7290 |
7289 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ | 7291 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ |
OLD | NEW |