OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2009 Nov 12 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 */ |
| 14 |
| 15 #ifndef _FTSINT_H |
| 16 #define _FTSINT_H |
| 17 |
| 18 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
| 19 # define NDEBUG 1 |
| 20 #endif |
| 21 |
| 22 #include "sqlite3.h" |
| 23 #include "fts3_tokenizer.h" |
| 24 #include "fts3_hash.h" |
| 25 |
| 26 /* |
| 27 ** This constant controls how often segments are merged. Once there are |
| 28 ** FTS3_MERGE_COUNT segments of level N, they are merged into a single |
| 29 ** segment of level N+1. |
| 30 */ |
| 31 #define FTS3_MERGE_COUNT 16 |
| 32 |
| 33 /* |
| 34 ** This is the maximum amount of data (in bytes) to store in the |
| 35 ** Fts3Table.pendingTerms hash table. Normally, the hash table is |
| 36 ** populated as documents are inserted/updated/deleted in a transaction |
| 37 ** and used to create a new segment when the transaction is committed. |
| 38 ** However if this limit is reached midway through a transaction, a new |
| 39 ** segment is created and the hash table cleared immediately. |
| 40 */ |
| 41 #define FTS3_MAX_PENDING_DATA (1*1024*1024) |
| 42 |
| 43 /* |
| 44 ** Macro to return the number of elements in an array. SQLite has a |
| 45 ** similar macro called ArraySize(). Use a different name to avoid |
| 46 ** a collision when building an amalgamation with built-in FTS3. |
| 47 */ |
| 48 #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) |
| 49 |
| 50 /* |
| 51 ** Maximum length of a varint encoded integer. The varint format is different |
| 52 ** from that used by SQLite, so the maximum length is 10, not 9. |
| 53 */ |
| 54 #define FTS3_VARINT_MAX 10 |
| 55 |
| 56 /* |
| 57 ** The testcase() macro is only used by the amalgamation. If undefined, |
| 58 ** make it a no-op. |
| 59 */ |
| 60 #ifndef testcase |
| 61 # define testcase(X) |
| 62 #endif |
| 63 |
| 64 /* |
| 65 ** Terminator values for position-lists and column-lists. |
| 66 */ |
| 67 #define POS_COLUMN (1) /* Column-list terminator */ |
| 68 #define POS_END (0) /* Position-list terminator */ |
| 69 |
| 70 /* |
| 71 ** This section provides definitions to allow the |
| 72 ** FTS3 extension to be compiled outside of the |
| 73 ** amalgamation. |
| 74 */ |
| 75 #ifndef SQLITE_AMALGAMATION |
| 76 /* |
| 77 ** Macros indicating that conditional expressions are always true or |
| 78 ** false. |
| 79 */ |
| 80 #ifdef SQLITE_COVERAGE_TEST |
| 81 # define ALWAYS(x) (1) |
| 82 # define NEVER(X) (0) |
| 83 #else |
| 84 # define ALWAYS(x) (x) |
| 85 # define NEVER(X) (x) |
| 86 #endif |
| 87 |
| 88 /* |
| 89 ** Internal types used by SQLite. |
| 90 */ |
| 91 typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ |
| 92 typedef short int i16; /* 2-byte (or larger) signed integer */ |
| 93 typedef unsigned int u32; /* 4-byte unsigned integer */ |
| 94 typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ |
| 95 /* |
| 96 ** Macro used to suppress compiler warnings for unused parameters. |
| 97 */ |
| 98 #define UNUSED_PARAMETER(x) (void)(x) |
| 99 #endif |
| 100 |
| 101 typedef struct Fts3Table Fts3Table; |
| 102 typedef struct Fts3Cursor Fts3Cursor; |
| 103 typedef struct Fts3Expr Fts3Expr; |
| 104 typedef struct Fts3Phrase Fts3Phrase; |
| 105 typedef struct Fts3PhraseToken Fts3PhraseToken; |
| 106 |
| 107 typedef struct Fts3SegFilter Fts3SegFilter; |
| 108 typedef struct Fts3DeferredToken Fts3DeferredToken; |
| 109 typedef struct Fts3SegReader Fts3SegReader; |
| 110 typedef struct Fts3SegReaderCursor Fts3SegReaderCursor; |
| 111 |
| 112 /* |
| 113 ** A connection to a fulltext index is an instance of the following |
| 114 ** structure. The xCreate and xConnect methods create an instance |
| 115 ** of this structure and xDestroy and xDisconnect free that instance. |
| 116 ** All other methods receive a pointer to the structure as one of their |
| 117 ** arguments. |
| 118 */ |
| 119 struct Fts3Table { |
| 120 sqlite3_vtab base; /* Base class used by SQLite core */ |
| 121 sqlite3 *db; /* The database connection */ |
| 122 const char *zDb; /* logical database name */ |
| 123 const char *zName; /* virtual table name */ |
| 124 int nColumn; /* number of named columns in virtual table */ |
| 125 char **azColumn; /* column names. malloced */ |
| 126 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ |
| 127 |
| 128 /* Precompiled statements used by the implementation. Each of these |
| 129 ** statements is run and reset within a single virtual table API call. |
| 130 */ |
| 131 sqlite3_stmt *aStmt[24]; |
| 132 |
| 133 char *zReadExprlist; |
| 134 char *zWriteExprlist; |
| 135 |
| 136 int nNodeSize; /* Soft limit for node size */ |
| 137 u8 bHasStat; /* True if %_stat table exists */ |
| 138 u8 bHasDocsize; /* True if %_docsize table exists */ |
| 139 int nPgsz; /* Page size for host database */ |
| 140 char *zSegmentsTbl; /* Name of %_segments table */ |
| 141 sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ |
| 142 |
| 143 /* The following hash table is used to buffer pending index updates during |
| 144 ** transactions. Variable nPendingData estimates the memory size of the |
| 145 ** pending data, including hash table overhead, but not malloc overhead. |
| 146 ** When nPendingData exceeds nMaxPendingData, the buffer is flushed |
| 147 ** automatically. Variable iPrevDocid is the docid of the most recently |
| 148 ** inserted record. |
| 149 */ |
| 150 int nMaxPendingData; |
| 151 int nPendingData; |
| 152 sqlite_int64 iPrevDocid; |
| 153 Fts3Hash pendingTerms; |
| 154 }; |
| 155 |
| 156 /* |
| 157 ** When the core wants to read from the virtual table, it creates a |
| 158 ** virtual table cursor (an instance of the following structure) using |
| 159 ** the xOpen method. Cursors are destroyed using the xClose method. |
| 160 */ |
| 161 struct Fts3Cursor { |
| 162 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
| 163 i16 eSearch; /* Search strategy (see below) */ |
| 164 u8 isEof; /* True if at End Of Results */ |
| 165 u8 isRequireSeek; /* True if must seek pStmt to %_content row */ |
| 166 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ |
| 167 Fts3Expr *pExpr; /* Parsed MATCH query string */ |
| 168 int nPhrase; /* Number of matchable phrases in query */ |
| 169 Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ |
| 170 sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ |
| 171 char *pNextId; /* Pointer into the body of aDoclist */ |
| 172 char *aDoclist; /* List of docids for full-text queries */ |
| 173 int nDoclist; /* Size of buffer at aDoclist */ |
| 174 int eEvalmode; /* An FTS3_EVAL_XX constant */ |
| 175 int nRowAvg; /* Average size of database rows, in pages */ |
| 176 |
| 177 int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ |
| 178 u32 *aMatchinfo; /* Information about most recent match */ |
| 179 int nMatchinfo; /* Number of elements in aMatchinfo[] */ |
| 180 char *zMatchinfo; /* Matchinfo specification */ |
| 181 }; |
| 182 |
| 183 #define FTS3_EVAL_FILTER 0 |
| 184 #define FTS3_EVAL_NEXT 1 |
| 185 #define FTS3_EVAL_MATCHINFO 2 |
| 186 |
| 187 /* |
| 188 ** The Fts3Cursor.eSearch member is always set to one of the following. |
| 189 ** Actualy, Fts3Cursor.eSearch can be greater than or equal to |
| 190 ** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index |
| 191 ** of the column to be searched. For example, in |
| 192 ** |
| 193 ** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); |
| 194 ** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; |
| 195 ** |
| 196 ** Because the LHS of the MATCH operator is 2nd column "b", |
| 197 ** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, |
| 198 ** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" |
| 199 ** indicating that all columns should be searched, |
| 200 ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. |
| 201 */ |
| 202 #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ |
| 203 #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ |
| 204 #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ |
| 205 |
| 206 /* |
| 207 ** A "phrase" is a sequence of one or more tokens that must match in |
| 208 ** sequence. A single token is the base case and the most common case. |
| 209 ** For a sequence of tokens contained in double-quotes (i.e. "one two three") |
| 210 ** nToken will be the number of tokens in the string. |
| 211 ** |
| 212 ** The nDocMatch and nMatch variables contain data that may be used by the |
| 213 ** matchinfo() function. They are populated when the full-text index is |
| 214 ** queried for hits on the phrase. If one or more tokens in the phrase |
| 215 ** are deferred, the nDocMatch and nMatch variables are populated based |
| 216 ** on the assumption that the |
| 217 */ |
| 218 struct Fts3PhraseToken { |
| 219 char *z; /* Text of the token */ |
| 220 int n; /* Number of bytes in buffer z */ |
| 221 int isPrefix; /* True if token ends with a "*" character */ |
| 222 int bFulltext; /* True if full-text index was used */ |
| 223 Fts3SegReaderCursor *pSegcsr; /* Segment-reader for this token */ |
| 224 Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ |
| 225 }; |
| 226 |
| 227 struct Fts3Phrase { |
| 228 /* Variables populated by fts3_expr.c when parsing a MATCH expression */ |
| 229 int nToken; /* Number of tokens in the phrase */ |
| 230 int iColumn; /* Index of column this phrase must match */ |
| 231 int isNot; /* Phrase prefixed by unary not (-) operator */ |
| 232 Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ |
| 233 }; |
| 234 |
| 235 /* |
| 236 ** A tree of these objects forms the RHS of a MATCH operator. |
| 237 ** |
| 238 ** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded |
| 239 ** is true, then aDoclist points to a malloced buffer, size nDoclist bytes, |
| 240 ** containing the results of the NEAR or phrase query in FTS3 doclist |
| 241 ** format. As usual, the initial "Length" field found in doclists stored |
| 242 ** on disk is omitted from this buffer. |
| 243 ** |
| 244 ** Variable pCurrent always points to the start of a docid field within |
| 245 ** aDoclist. Since the doclist is usually scanned in docid order, this can |
| 246 ** be used to accelerate seeking to the required docid within the doclist. |
| 247 */ |
| 248 struct Fts3Expr { |
| 249 int eType; /* One of the FTSQUERY_XXX values defined below */ |
| 250 int nNear; /* Valid if eType==FTSQUERY_NEAR */ |
| 251 Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ |
| 252 Fts3Expr *pLeft; /* Left operand */ |
| 253 Fts3Expr *pRight; /* Right operand */ |
| 254 Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ |
| 255 |
| 256 int isLoaded; /* True if aDoclist/nDoclist are initialized. */ |
| 257 char *aDoclist; /* Buffer containing doclist */ |
| 258 int nDoclist; /* Size of aDoclist in bytes */ |
| 259 |
| 260 sqlite3_int64 iCurrent; |
| 261 char *pCurrent; |
| 262 }; |
| 263 |
| 264 /* |
| 265 ** Candidate values for Fts3Query.eType. Note that the order of the first |
| 266 ** four values is in order of precedence when parsing expressions. For |
| 267 ** example, the following: |
| 268 ** |
| 269 ** "a OR b AND c NOT d NEAR e" |
| 270 ** |
| 271 ** is equivalent to: |
| 272 ** |
| 273 ** "a OR (b AND (c NOT (d NEAR e)))" |
| 274 */ |
| 275 #define FTSQUERY_NEAR 1 |
| 276 #define FTSQUERY_NOT 2 |
| 277 #define FTSQUERY_AND 3 |
| 278 #define FTSQUERY_OR 4 |
| 279 #define FTSQUERY_PHRASE 5 |
| 280 |
| 281 |
| 282 /* fts3_write.c */ |
| 283 int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); |
| 284 int sqlite3Fts3PendingTermsFlush(Fts3Table *); |
| 285 void sqlite3Fts3PendingTermsClear(Fts3Table *); |
| 286 int sqlite3Fts3Optimize(Fts3Table *); |
| 287 int sqlite3Fts3SegReaderNew(int, sqlite3_int64, |
| 288 sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); |
| 289 int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**); |
| 290 void sqlite3Fts3SegReaderFree(Fts3SegReader *); |
| 291 int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *); |
| 292 int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **); |
| 293 int sqlite3Fts3ReadLock(Fts3Table *); |
| 294 int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*); |
| 295 |
| 296 int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); |
| 297 int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); |
| 298 |
| 299 void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); |
| 300 int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); |
| 301 int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); |
| 302 void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); |
| 303 char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); |
| 304 void sqlite3Fts3SegmentsClose(Fts3Table *); |
| 305 |
| 306 #define FTS3_SEGCURSOR_PENDING -1 |
| 307 #define FTS3_SEGCURSOR_ALL -2 |
| 308 |
| 309 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*); |
| 310 int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *); |
| 311 void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *); |
| 312 int sqlite3Fts3SegReaderCursor( |
| 313 Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *); |
| 314 |
| 315 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ |
| 316 #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 |
| 317 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 |
| 318 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 |
| 319 #define FTS3_SEGMENT_PREFIX 0x00000008 |
| 320 #define FTS3_SEGMENT_SCAN 0x00000010 |
| 321 |
| 322 /* Type passed as 4th argument to SegmentReaderIterate() */ |
| 323 struct Fts3SegFilter { |
| 324 const char *zTerm; |
| 325 int nTerm; |
| 326 int iCol; |
| 327 int flags; |
| 328 }; |
| 329 |
| 330 struct Fts3SegReaderCursor { |
| 331 /* Used internally by sqlite3Fts3SegReaderXXX() calls */ |
| 332 Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ |
| 333 int nSegment; /* Size of apSegment array */ |
| 334 int nAdvance; /* How many seg-readers to advance */ |
| 335 Fts3SegFilter *pFilter; /* Pointer to filter object */ |
| 336 char *aBuffer; /* Buffer to merge doclists in */ |
| 337 int nBuffer; /* Allocated size of aBuffer[] in bytes */ |
| 338 |
| 339 /* Cost of running this iterator. Used by fts3.c only. */ |
| 340 int nCost; |
| 341 |
| 342 /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ |
| 343 char *zTerm; /* Pointer to term buffer */ |
| 344 int nTerm; /* Size of zTerm in bytes */ |
| 345 char *aDoclist; /* Pointer to doclist buffer */ |
| 346 int nDoclist; /* Size of aDoclist[] in bytes */ |
| 347 }; |
| 348 |
| 349 /* fts3.c */ |
| 350 int sqlite3Fts3PutVarint(char *, sqlite3_int64); |
| 351 int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); |
| 352 int sqlite3Fts3GetVarint32(const char *, int *); |
| 353 int sqlite3Fts3VarintLen(sqlite3_uint64); |
| 354 void sqlite3Fts3Dequote(char *); |
| 355 |
| 356 char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); |
| 357 int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); |
| 358 int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); |
| 359 int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); |
| 360 |
| 361 /* fts3_tokenizer.c */ |
| 362 const char *sqlite3Fts3NextToken(const char *, int *); |
| 363 int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); |
| 364 int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, |
| 365 sqlite3_tokenizer **, char ** |
| 366 ); |
| 367 int sqlite3Fts3IsIdChar(char); |
| 368 |
| 369 /* fts3_snippet.c */ |
| 370 void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); |
| 371 void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, |
| 372 const char *, const char *, int, int |
| 373 ); |
| 374 void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); |
| 375 |
| 376 /* fts3_expr.c */ |
| 377 int sqlite3Fts3ExprParse(sqlite3_tokenizer *, |
| 378 char **, int, int, const char *, int, Fts3Expr ** |
| 379 ); |
| 380 void sqlite3Fts3ExprFree(Fts3Expr *); |
| 381 #ifdef SQLITE_TEST |
| 382 int sqlite3Fts3ExprInitTestInterface(sqlite3 *db); |
| 383 #endif |
| 384 |
| 385 /* fts3_aux.c */ |
| 386 int sqlite3Fts3InitAux(sqlite3 *db); |
| 387 |
| 388 #endif /* _FTSINT_H */ |
OLD | NEW |