| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ** 2009 Oct 23 | |
| 3 ** | |
| 4 ** The author disclaims copyright to this source code. In place of | |
| 5 ** a legal notice, here is a blessing: | |
| 6 ** | |
| 7 ** May you do good and not evil. | |
| 8 ** May you find forgiveness for yourself and forgive others. | |
| 9 ** May you share freely, never taking more than you give. | |
| 10 ** | |
| 11 ****************************************************************************** | |
| 12 */ | |
| 13 | |
| 14 #include "fts3Int.h" | |
| 15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | |
| 16 | |
| 17 #include <string.h> | |
| 18 #include <assert.h> | |
| 19 | |
| 20 /* | |
| 21 ** Characters that may appear in the second argument to matchinfo(). | |
| 22 */ | |
| 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ | |
| 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ | |
| 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ | |
| 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ | |
| 27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ | |
| 28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */ | |
| 29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ | |
| 30 | |
| 31 /* | |
| 32 ** The default value for the second argument to matchinfo(). | |
| 33 */ | |
| 34 #define FTS3_MATCHINFO_DEFAULT "pcx" | |
| 35 | |
| 36 | |
| 37 /* | |
| 38 ** Used as an fts3ExprIterate() context when loading phrase doclists to | |
| 39 ** Fts3Expr.aDoclist[]/nDoclist. | |
| 40 */ | |
| 41 typedef struct LoadDoclistCtx LoadDoclistCtx; | |
| 42 struct LoadDoclistCtx { | |
| 43 Fts3Cursor *pCsr; /* FTS3 Cursor */ | |
| 44 int nPhrase; /* Number of phrases seen so far */ | |
| 45 int nToken; /* Number of tokens seen so far */ | |
| 46 }; | |
| 47 | |
| 48 /* | |
| 49 ** The following types are used as part of the implementation of the | |
| 50 ** fts3BestSnippet() routine. | |
| 51 */ | |
| 52 typedef struct SnippetIter SnippetIter; | |
| 53 typedef struct SnippetPhrase SnippetPhrase; | |
| 54 typedef struct SnippetFragment SnippetFragment; | |
| 55 | |
| 56 struct SnippetIter { | |
| 57 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ | |
| 58 int iCol; /* Extract snippet from this column */ | |
| 59 int nSnippet; /* Requested snippet length (in tokens) */ | |
| 60 int nPhrase; /* Number of phrases in query */ | |
| 61 SnippetPhrase *aPhrase; /* Array of size nPhrase */ | |
| 62 int iCurrent; /* First token of current snippet */ | |
| 63 }; | |
| 64 | |
| 65 struct SnippetPhrase { | |
| 66 int nToken; /* Number of tokens in phrase */ | |
| 67 char *pList; /* Pointer to start of phrase position list */ | |
| 68 int iHead; /* Next value in position list */ | |
| 69 char *pHead; /* Position list data following iHead */ | |
| 70 int iTail; /* Next value in trailing position list */ | |
| 71 char *pTail; /* Position list data following iTail */ | |
| 72 }; | |
| 73 | |
| 74 struct SnippetFragment { | |
| 75 int iCol; /* Column snippet is extracted from */ | |
| 76 int iPos; /* Index of first token in snippet */ | |
| 77 u64 covered; /* Mask of query phrases covered */ | |
| 78 u64 hlmask; /* Mask of snippet terms to highlight */ | |
| 79 }; | |
| 80 | |
| 81 /* | |
| 82 ** This type is used as an fts3ExprIterate() context object while | |
| 83 ** accumulating the data returned by the matchinfo() function. | |
| 84 */ | |
| 85 typedef struct MatchInfo MatchInfo; | |
| 86 struct MatchInfo { | |
| 87 Fts3Cursor *pCursor; /* FTS3 Cursor */ | |
| 88 int nCol; /* Number of columns in table */ | |
| 89 int nPhrase; /* Number of matchable phrases in query */ | |
| 90 sqlite3_int64 nDoc; /* Number of docs in database */ | |
| 91 u32 *aMatchinfo; /* Pre-allocated buffer */ | |
| 92 }; | |
| 93 | |
| 94 | |
| 95 | |
| 96 /* | |
| 97 ** The snippet() and offsets() functions both return text values. An instance | |
| 98 ** of the following structure is used to accumulate those values while the | |
| 99 ** functions are running. See fts3StringAppend() for details. | |
| 100 */ | |
| 101 typedef struct StrBuffer StrBuffer; | |
| 102 struct StrBuffer { | |
| 103 char *z; /* Pointer to buffer containing string */ | |
| 104 int n; /* Length of z in bytes (excl. nul-term) */ | |
| 105 int nAlloc; /* Allocated size of buffer z in bytes */ | |
| 106 }; | |
| 107 | |
| 108 | |
| 109 /* | |
| 110 ** This function is used to help iterate through a position-list. A position | |
| 111 ** list is a list of unique integers, sorted from smallest to largest. Each | |
| 112 ** element of the list is represented by an FTS3 varint that takes the value | |
| 113 ** of the difference between the current element and the previous one plus | |
| 114 ** two. For example, to store the position-list: | |
| 115 ** | |
| 116 ** 4 9 113 | |
| 117 ** | |
| 118 ** the three varints: | |
| 119 ** | |
| 120 ** 6 7 106 | |
| 121 ** | |
| 122 ** are encoded. | |
| 123 ** | |
| 124 ** When this function is called, *pp points to the start of an element of | |
| 125 ** the list. *piPos contains the value of the previous entry in the list. | |
| 126 ** After it returns, *piPos contains the value of the next element of the | |
| 127 ** list and *pp is advanced to the following varint. | |
| 128 */ | |
| 129 static void fts3GetDeltaPosition(char **pp, int *piPos){ | |
| 130 int iVal; | |
| 131 *pp += fts3GetVarint32(*pp, &iVal); | |
| 132 *piPos += (iVal-2); | |
| 133 } | |
| 134 | |
| 135 /* | |
| 136 ** Helper function for fts3ExprIterate() (see below). | |
| 137 */ | |
| 138 static int fts3ExprIterate2( | |
| 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | |
| 140 int *piPhrase, /* Pointer to phrase counter */ | |
| 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | |
| 142 void *pCtx /* Second argument to pass to callback */ | |
| 143 ){ | |
| 144 int rc; /* Return code */ | |
| 145 int eType = pExpr->eType; /* Type of expression node pExpr */ | |
| 146 | |
| 147 if( eType!=FTSQUERY_PHRASE ){ | |
| 148 assert( pExpr->pLeft && pExpr->pRight ); | |
| 149 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); | |
| 150 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ | |
| 151 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); | |
| 152 } | |
| 153 }else{ | |
| 154 rc = x(pExpr, *piPhrase, pCtx); | |
| 155 (*piPhrase)++; | |
| 156 } | |
| 157 return rc; | |
| 158 } | |
| 159 | |
| 160 /* | |
| 161 ** Iterate through all phrase nodes in an FTS3 query, except those that | |
| 162 ** are part of a sub-tree that is the right-hand-side of a NOT operator. | |
| 163 ** For each phrase node found, the supplied callback function is invoked. | |
| 164 ** | |
| 165 ** If the callback function returns anything other than SQLITE_OK, | |
| 166 ** the iteration is abandoned and the error code returned immediately. | |
| 167 ** Otherwise, SQLITE_OK is returned after a callback has been made for | |
| 168 ** all eligible phrase nodes. | |
| 169 */ | |
| 170 static int fts3ExprIterate( | |
| 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | |
| 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | |
| 173 void *pCtx /* Second argument to pass to callback */ | |
| 174 ){ | |
| 175 int iPhrase = 0; /* Variable used as the phrase counter */ | |
| 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); | |
| 177 } | |
| 178 | |
| 179 /* | |
| 180 ** This is an fts3ExprIterate() callback used while loading the doclists | |
| 181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also | |
| 182 ** fts3ExprLoadDoclists(). | |
| 183 */ | |
| 184 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
| 185 int rc = SQLITE_OK; | |
| 186 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
| 187 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; | |
| 188 | |
| 189 UNUSED_PARAMETER(iPhrase); | |
| 190 | |
| 191 p->nPhrase++; | |
| 192 p->nToken += pPhrase->nToken; | |
| 193 | |
| 194 return rc; | |
| 195 } | |
| 196 | |
| 197 /* | |
| 198 ** Load the doclists for each phrase in the query associated with FTS3 cursor | |
| 199 ** pCsr. | |
| 200 ** | |
| 201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable | |
| 202 ** phrases in the expression (all phrases except those directly or | |
| 203 ** indirectly descended from the right-hand-side of a NOT operator). If | |
| 204 ** pnToken is not NULL, then it is set to the number of tokens in all | |
| 205 ** matchable phrases of the expression. | |
| 206 */ | |
| 207 static int fts3ExprLoadDoclists( | |
| 208 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ | |
| 209 int *pnPhrase, /* OUT: Number of phrases in query */ | |
| 210 int *pnToken /* OUT: Number of tokens in query */ | |
| 211 ){ | |
| 212 int rc; /* Return Code */ | |
| 213 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ | |
| 214 sCtx.pCsr = pCsr; | |
| 215 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); | |
| 216 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; | |
| 217 if( pnToken ) *pnToken = sCtx.nToken; | |
| 218 return rc; | |
| 219 } | |
| 220 | |
| 221 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
| 222 (*(int *)ctx)++; | |
| 223 UNUSED_PARAMETER(pExpr); | |
| 224 UNUSED_PARAMETER(iPhrase); | |
| 225 return SQLITE_OK; | |
| 226 } | |
| 227 static int fts3ExprPhraseCount(Fts3Expr *pExpr){ | |
| 228 int nPhrase = 0; | |
| 229 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); | |
| 230 return nPhrase; | |
| 231 } | |
| 232 | |
| 233 /* | |
| 234 ** Advance the position list iterator specified by the first two | |
| 235 ** arguments so that it points to the first element with a value greater | |
| 236 ** than or equal to parameter iNext. | |
| 237 */ | |
| 238 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ | |
| 239 char *pIter = *ppIter; | |
| 240 if( pIter ){ | |
| 241 int iIter = *piIter; | |
| 242 | |
| 243 while( iIter<iNext ){ | |
| 244 if( 0==(*pIter & 0xFE) ){ | |
| 245 iIter = -1; | |
| 246 pIter = 0; | |
| 247 break; | |
| 248 } | |
| 249 fts3GetDeltaPosition(&pIter, &iIter); | |
| 250 } | |
| 251 | |
| 252 *piIter = iIter; | |
| 253 *ppIter = pIter; | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 /* | |
| 258 ** Advance the snippet iterator to the next candidate snippet. | |
| 259 */ | |
| 260 static int fts3SnippetNextCandidate(SnippetIter *pIter){ | |
| 261 int i; /* Loop counter */ | |
| 262 | |
| 263 if( pIter->iCurrent<0 ){ | |
| 264 /* The SnippetIter object has just been initialized. The first snippet | |
| 265 ** candidate always starts at offset 0 (even if this candidate has a | |
| 266 ** score of 0.0). | |
| 267 */ | |
| 268 pIter->iCurrent = 0; | |
| 269 | |
| 270 /* Advance the 'head' iterator of each phrase to the first offset that | |
| 271 ** is greater than or equal to (iNext+nSnippet). | |
| 272 */ | |
| 273 for(i=0; i<pIter->nPhrase; i++){ | |
| 274 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
| 275 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); | |
| 276 } | |
| 277 }else{ | |
| 278 int iStart; | |
| 279 int iEnd = 0x7FFFFFFF; | |
| 280 | |
| 281 for(i=0; i<pIter->nPhrase; i++){ | |
| 282 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
| 283 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ | |
| 284 iEnd = pPhrase->iHead; | |
| 285 } | |
| 286 } | |
| 287 if( iEnd==0x7FFFFFFF ){ | |
| 288 return 1; | |
| 289 } | |
| 290 | |
| 291 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; | |
| 292 for(i=0; i<pIter->nPhrase; i++){ | |
| 293 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
| 294 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); | |
| 295 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); | |
| 296 } | |
| 297 } | |
| 298 | |
| 299 return 0; | |
| 300 } | |
| 301 | |
| 302 /* | |
| 303 ** Retrieve information about the current candidate snippet of snippet | |
| 304 ** iterator pIter. | |
| 305 */ | |
| 306 static void fts3SnippetDetails( | |
| 307 SnippetIter *pIter, /* Snippet iterator */ | |
| 308 u64 mCovered, /* Bitmask of phrases already covered */ | |
| 309 int *piToken, /* OUT: First token of proposed snippet */ | |
| 310 int *piScore, /* OUT: "Score" for this snippet */ | |
| 311 u64 *pmCover, /* OUT: Bitmask of phrases covered */ | |
| 312 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ | |
| 313 ){ | |
| 314 int iStart = pIter->iCurrent; /* First token of snippet */ | |
| 315 int iScore = 0; /* Score of this snippet */ | |
| 316 int i; /* Loop counter */ | |
| 317 u64 mCover = 0; /* Mask of phrases covered by this snippet */ | |
| 318 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ | |
| 319 | |
| 320 for(i=0; i<pIter->nPhrase; i++){ | |
| 321 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
| 322 if( pPhrase->pTail ){ | |
| 323 char *pCsr = pPhrase->pTail; | |
| 324 int iCsr = pPhrase->iTail; | |
| 325 | |
| 326 while( iCsr<(iStart+pIter->nSnippet) ){ | |
| 327 int j; | |
| 328 u64 mPhrase = (u64)1 << i; | |
| 329 u64 mPos = (u64)1 << (iCsr - iStart); | |
| 330 assert( iCsr>=iStart ); | |
| 331 if( (mCover|mCovered)&mPhrase ){ | |
| 332 iScore++; | |
| 333 }else{ | |
| 334 iScore += 1000; | |
| 335 } | |
| 336 mCover |= mPhrase; | |
| 337 | |
| 338 for(j=0; j<pPhrase->nToken; j++){ | |
| 339 mHighlight |= (mPos>>j); | |
| 340 } | |
| 341 | |
| 342 if( 0==(*pCsr & 0x0FE) ) break; | |
| 343 fts3GetDeltaPosition(&pCsr, &iCsr); | |
| 344 } | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 /* Set the output variables before returning. */ | |
| 349 *piToken = iStart; | |
| 350 *piScore = iScore; | |
| 351 *pmCover = mCover; | |
| 352 *pmHighlight = mHighlight; | |
| 353 } | |
| 354 | |
| 355 /* | |
| 356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). | |
| 357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. | |
| 358 */ | |
| 359 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
| 360 SnippetIter *p = (SnippetIter *)ctx; | |
| 361 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; | |
| 362 char *pCsr; | |
| 363 int rc; | |
| 364 | |
| 365 pPhrase->nToken = pExpr->pPhrase->nToken; | |
| 366 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); | |
| 367 assert( rc==SQLITE_OK || pCsr==0 ); | |
| 368 if( pCsr ){ | |
| 369 int iFirst = 0; | |
| 370 pPhrase->pList = pCsr; | |
| 371 fts3GetDeltaPosition(&pCsr, &iFirst); | |
| 372 assert( iFirst>=0 ); | |
| 373 pPhrase->pHead = pCsr; | |
| 374 pPhrase->pTail = pCsr; | |
| 375 pPhrase->iHead = iFirst; | |
| 376 pPhrase->iTail = iFirst; | |
| 377 }else{ | |
| 378 assert( rc!=SQLITE_OK || ( | |
| 379 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 | |
| 380 )); | |
| 381 } | |
| 382 | |
| 383 return rc; | |
| 384 } | |
| 385 | |
| 386 /* | |
| 387 ** Select the fragment of text consisting of nFragment contiguous tokens | |
| 388 ** from column iCol that represent the "best" snippet. The best snippet | |
| 389 ** is the snippet with the highest score, where scores are calculated | |
| 390 ** by adding: | |
| 391 ** | |
| 392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. | |
| 393 ** | |
| 394 ** (b) +1000 points for the first occurrence of each matchable phrase in | |
| 395 ** the snippet for which the corresponding mCovered bit is not set. | |
| 396 ** | |
| 397 ** The selected snippet parameters are stored in structure *pFragment before | |
| 398 ** returning. The score of the selected snippet is stored in *piScore | |
| 399 ** before returning. | |
| 400 */ | |
| 401 static int fts3BestSnippet( | |
| 402 int nSnippet, /* Desired snippet length */ | |
| 403 Fts3Cursor *pCsr, /* Cursor to create snippet for */ | |
| 404 int iCol, /* Index of column to create snippet from */ | |
| 405 u64 mCovered, /* Mask of phrases already covered */ | |
| 406 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ | |
| 407 SnippetFragment *pFragment, /* OUT: Best snippet found */ | |
| 408 int *piScore /* OUT: Score of snippet pFragment */ | |
| 409 ){ | |
| 410 int rc; /* Return Code */ | |
| 411 int nList; /* Number of phrases in expression */ | |
| 412 SnippetIter sIter; /* Iterates through snippet candidates */ | |
| 413 int nByte; /* Number of bytes of space to allocate */ | |
| 414 int iBestScore = -1; /* Best snippet score found so far */ | |
| 415 int i; /* Loop counter */ | |
| 416 | |
| 417 memset(&sIter, 0, sizeof(sIter)); | |
| 418 | |
| 419 /* Iterate through the phrases in the expression to count them. The same | |
| 420 ** callback makes sure the doclists are loaded for each phrase. | |
| 421 */ | |
| 422 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); | |
| 423 if( rc!=SQLITE_OK ){ | |
| 424 return rc; | |
| 425 } | |
| 426 | |
| 427 /* Now that it is known how many phrases there are, allocate and zero | |
| 428 ** the required space using malloc(). | |
| 429 */ | |
| 430 nByte = sizeof(SnippetPhrase) * nList; | |
| 431 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); | |
| 432 if( !sIter.aPhrase ){ | |
| 433 return SQLITE_NOMEM; | |
| 434 } | |
| 435 memset(sIter.aPhrase, 0, nByte); | |
| 436 | |
| 437 /* Initialize the contents of the SnippetIter object. Then iterate through | |
| 438 ** the set of phrases in the expression to populate the aPhrase[] array. | |
| 439 */ | |
| 440 sIter.pCsr = pCsr; | |
| 441 sIter.iCol = iCol; | |
| 442 sIter.nSnippet = nSnippet; | |
| 443 sIter.nPhrase = nList; | |
| 444 sIter.iCurrent = -1; | |
| 445 (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); | |
| 446 | |
| 447 /* Set the *pmSeen output variable. */ | |
| 448 for(i=0; i<nList; i++){ | |
| 449 if( sIter.aPhrase[i].pHead ){ | |
| 450 *pmSeen |= (u64)1 << i; | |
| 451 } | |
| 452 } | |
| 453 | |
| 454 /* Loop through all candidate snippets. Store the best snippet in | |
| 455 ** *pFragment. Store its associated 'score' in iBestScore. | |
| 456 */ | |
| 457 pFragment->iCol = iCol; | |
| 458 while( !fts3SnippetNextCandidate(&sIter) ){ | |
| 459 int iPos; | |
| 460 int iScore; | |
| 461 u64 mCover; | |
| 462 u64 mHighlight; | |
| 463 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight); | |
| 464 assert( iScore>=0 ); | |
| 465 if( iScore>iBestScore ){ | |
| 466 pFragment->iPos = iPos; | |
| 467 pFragment->hlmask = mHighlight; | |
| 468 pFragment->covered = mCover; | |
| 469 iBestScore = iScore; | |
| 470 } | |
| 471 } | |
| 472 | |
| 473 sqlite3_free(sIter.aPhrase); | |
| 474 *piScore = iBestScore; | |
| 475 return SQLITE_OK; | |
| 476 } | |
| 477 | |
| 478 | |
| 479 /* | |
| 480 ** Append a string to the string-buffer passed as the first argument. | |
| 481 ** | |
| 482 ** If nAppend is negative, then the length of the string zAppend is | |
| 483 ** determined using strlen(). | |
| 484 */ | |
| 485 static int fts3StringAppend( | |
| 486 StrBuffer *pStr, /* Buffer to append to */ | |
| 487 const char *zAppend, /* Pointer to data to append to buffer */ | |
| 488 int nAppend /* Size of zAppend in bytes (or -1) */ | |
| 489 ){ | |
| 490 if( nAppend<0 ){ | |
| 491 nAppend = (int)strlen(zAppend); | |
| 492 } | |
| 493 | |
| 494 /* If there is insufficient space allocated at StrBuffer.z, use realloc() | |
| 495 ** to grow the buffer until so that it is big enough to accomadate the | |
| 496 ** appended data. | |
| 497 */ | |
| 498 if( pStr->n+nAppend+1>=pStr->nAlloc ){ | |
| 499 int nAlloc = pStr->nAlloc+nAppend+100; | |
| 500 char *zNew = sqlite3_realloc(pStr->z, nAlloc); | |
| 501 if( !zNew ){ | |
| 502 return SQLITE_NOMEM; | |
| 503 } | |
| 504 pStr->z = zNew; | |
| 505 pStr->nAlloc = nAlloc; | |
| 506 } | |
| 507 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); | |
| 508 | |
| 509 /* Append the data to the string buffer. */ | |
| 510 memcpy(&pStr->z[pStr->n], zAppend, nAppend); | |
| 511 pStr->n += nAppend; | |
| 512 pStr->z[pStr->n] = '\0'; | |
| 513 | |
| 514 return SQLITE_OK; | |
| 515 } | |
| 516 | |
| 517 /* | |
| 518 ** The fts3BestSnippet() function often selects snippets that end with a | |
| 519 ** query term. That is, the final term of the snippet is always a term | |
| 520 ** that requires highlighting. For example, if 'X' is a highlighted term | |
| 521 ** and '.' is a non-highlighted term, BestSnippet() may select: | |
| 522 ** | |
| 523 ** ........X.....X | |
| 524 ** | |
| 525 ** This function "shifts" the beginning of the snippet forward in the | |
| 526 ** document so that there are approximately the same number of | |
| 527 ** non-highlighted terms to the right of the final highlighted term as there | |
| 528 ** are to the left of the first highlighted term. For example, to this: | |
| 529 ** | |
| 530 ** ....X.....X.... | |
| 531 ** | |
| 532 ** This is done as part of extracting the snippet text, not when selecting | |
| 533 ** the snippet. Snippet selection is done based on doclists only, so there | |
| 534 ** is no way for fts3BestSnippet() to know whether or not the document | |
| 535 ** actually contains terms that follow the final highlighted term. | |
| 536 */ | |
| 537 static int fts3SnippetShift( | |
| 538 Fts3Table *pTab, /* FTS3 table snippet comes from */ | |
| 539 int iLangid, /* Language id to use in tokenizing */ | |
| 540 int nSnippet, /* Number of tokens desired for snippet */ | |
| 541 const char *zDoc, /* Document text to extract snippet from */ | |
| 542 int nDoc, /* Size of buffer zDoc in bytes */ | |
| 543 int *piPos, /* IN/OUT: First token of snippet */ | |
| 544 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ | |
| 545 ){ | |
| 546 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ | |
| 547 | |
| 548 if( hlmask ){ | |
| 549 int nLeft; /* Tokens to the left of first highlight */ | |
| 550 int nRight; /* Tokens to the right of last highlight */ | |
| 551 int nDesired; /* Ideal number of tokens to shift forward */ | |
| 552 | |
| 553 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); | |
| 554 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); | |
| 555 nDesired = (nLeft-nRight)/2; | |
| 556 | |
| 557 /* Ideally, the start of the snippet should be pushed forward in the | |
| 558 ** document nDesired tokens. This block checks if there are actually | |
| 559 ** nDesired tokens to the right of the snippet. If so, *piPos and | |
| 560 ** *pHlMask are updated to shift the snippet nDesired tokens to the | |
| 561 ** right. Otherwise, the snippet is shifted by the number of tokens | |
| 562 ** available. | |
| 563 */ | |
| 564 if( nDesired>0 ){ | |
| 565 int nShift; /* Number of tokens to shift snippet by */ | |
| 566 int iCurrent = 0; /* Token counter */ | |
| 567 int rc; /* Return Code */ | |
| 568 sqlite3_tokenizer_module *pMod; | |
| 569 sqlite3_tokenizer_cursor *pC; | |
| 570 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | |
| 571 | |
| 572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) | |
| 573 ** or more tokens in zDoc/nDoc. | |
| 574 */ | |
| 575 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); | |
| 576 if( rc!=SQLITE_OK ){ | |
| 577 return rc; | |
| 578 } | |
| 579 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ | |
| 580 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; | |
| 581 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); | |
| 582 } | |
| 583 pMod->xClose(pC); | |
| 584 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } | |
| 585 | |
| 586 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; | |
| 587 assert( nShift<=nDesired ); | |
| 588 if( nShift>0 ){ | |
| 589 *piPos += nShift; | |
| 590 *pHlmask = hlmask >> nShift; | |
| 591 } | |
| 592 } | |
| 593 } | |
| 594 return SQLITE_OK; | |
| 595 } | |
| 596 | |
| 597 /* | |
| 598 ** Extract the snippet text for fragment pFragment from cursor pCsr and | |
| 599 ** append it to string buffer pOut. | |
| 600 */ | |
| 601 static int fts3SnippetText( | |
| 602 Fts3Cursor *pCsr, /* FTS3 Cursor */ | |
| 603 SnippetFragment *pFragment, /* Snippet to extract */ | |
| 604 int iFragment, /* Fragment number */ | |
| 605 int isLast, /* True for final fragment in snippet */ | |
| 606 int nSnippet, /* Number of tokens in extracted snippet */ | |
| 607 const char *zOpen, /* String inserted before highlighted term */ | |
| 608 const char *zClose, /* String inserted after highlighted term */ | |
| 609 const char *zEllipsis, /* String inserted between snippets */ | |
| 610 StrBuffer *pOut /* Write output here */ | |
| 611 ){ | |
| 612 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 613 int rc; /* Return code */ | |
| 614 const char *zDoc; /* Document text to extract snippet from */ | |
| 615 int nDoc; /* Size of zDoc in bytes */ | |
| 616 int iCurrent = 0; /* Current token number of document */ | |
| 617 int iEnd = 0; /* Byte offset of end of current token */ | |
| 618 int isShiftDone = 0; /* True after snippet is shifted */ | |
| 619 int iPos = pFragment->iPos; /* First token of snippet */ | |
| 620 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ | |
| 621 int iCol = pFragment->iCol+1; /* Query column to extract text from */ | |
| 622 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ | |
| 623 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ | |
| 624 | |
| 625 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); | |
| 626 if( zDoc==0 ){ | |
| 627 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ | |
| 628 return SQLITE_NOMEM; | |
| 629 } | |
| 630 return SQLITE_OK; | |
| 631 } | |
| 632 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); | |
| 633 | |
| 634 /* Open a token cursor on the document. */ | |
| 635 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | |
| 636 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); | |
| 637 if( rc!=SQLITE_OK ){ | |
| 638 return rc; | |
| 639 } | |
| 640 | |
| 641 while( rc==SQLITE_OK ){ | |
| 642 const char *ZDUMMY; /* Dummy argument used with tokenizer */ | |
| 643 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ | |
| 644 int iBegin = 0; /* Offset in zDoc of start of token */ | |
| 645 int iFin = 0; /* Offset in zDoc of end of token */ | |
| 646 int isHighlight = 0; /* True for highlighted terms */ | |
| 647 | |
| 648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere | |
| 649 ** in the FTS code the variable that the third argument to xNext points to | |
| 650 ** is initialized to zero before the first (*but not necessarily | |
| 651 ** subsequent*) call to xNext(). This is done for a particular application | |
| 652 ** that needs to know whether or not the tokenizer is being used for | |
| 653 ** snippet generation or for some other purpose. | |
| 654 ** | |
| 655 ** Extreme care is required when writing code to depend on this | |
| 656 ** initialization. It is not a documented part of the tokenizer interface. | |
| 657 ** If a tokenizer is used directly by any code outside of FTS, this | |
| 658 ** convention might not be respected. */ | |
| 659 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); | |
| 660 if( rc!=SQLITE_OK ){ | |
| 661 if( rc==SQLITE_DONE ){ | |
| 662 /* Special case - the last token of the snippet is also the last token | |
| 663 ** of the column. Append any punctuation that occurred between the end | |
| 664 ** of the previous token and the end of the document to the output. | |
| 665 ** Then break out of the loop. */ | |
| 666 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); | |
| 667 } | |
| 668 break; | |
| 669 } | |
| 670 if( iCurrent<iPos ){ continue; } | |
| 671 | |
| 672 if( !isShiftDone ){ | |
| 673 int n = nDoc - iBegin; | |
| 674 rc = fts3SnippetShift( | |
| 675 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask | |
| 676 ); | |
| 677 isShiftDone = 1; | |
| 678 | |
| 679 /* Now that the shift has been done, check if the initial "..." are | |
| 680 ** required. They are required if (a) this is not the first fragment, | |
| 681 ** or (b) this fragment does not begin at position 0 of its column. | |
| 682 */ | |
| 683 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ | |
| 684 rc = fts3StringAppend(pOut, zEllipsis, -1); | |
| 685 } | |
| 686 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; | |
| 687 } | |
| 688 | |
| 689 if( iCurrent>=(iPos+nSnippet) ){ | |
| 690 if( isLast ){ | |
| 691 rc = fts3StringAppend(pOut, zEllipsis, -1); | |
| 692 } | |
| 693 break; | |
| 694 } | |
| 695 | |
| 696 /* Set isHighlight to true if this term should be highlighted. */ | |
| 697 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; | |
| 698 | |
| 699 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); | |
| 700 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); | |
| 701 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); | |
| 702 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); | |
| 703 | |
| 704 iEnd = iFin; | |
| 705 } | |
| 706 | |
| 707 pMod->xClose(pC); | |
| 708 return rc; | |
| 709 } | |
| 710 | |
| 711 | |
| 712 /* | |
| 713 ** This function is used to count the entries in a column-list (a | |
| 714 ** delta-encoded list of term offsets within a single column of a single | |
| 715 ** row). When this function is called, *ppCollist should point to the | |
| 716 ** beginning of the first varint in the column-list (the varint that | |
| 717 ** contains the position of the first matching term in the column data). | |
| 718 ** Before returning, *ppCollist is set to point to the first byte after | |
| 719 ** the last varint in the column-list (either the 0x00 signifying the end | |
| 720 ** of the position-list, or the 0x01 that precedes the column number of | |
| 721 ** the next column in the position-list). | |
| 722 ** | |
| 723 ** The number of elements in the column-list is returned. | |
| 724 */ | |
| 725 static int fts3ColumnlistCount(char **ppCollist){ | |
| 726 char *pEnd = *ppCollist; | |
| 727 char c = 0; | |
| 728 int nEntry = 0; | |
| 729 | |
| 730 /* A column-list is terminated by either a 0x01 or 0x00. */ | |
| 731 while( 0xFE & (*pEnd | c) ){ | |
| 732 c = *pEnd++ & 0x80; | |
| 733 if( !c ) nEntry++; | |
| 734 } | |
| 735 | |
| 736 *ppCollist = pEnd; | |
| 737 return nEntry; | |
| 738 } | |
| 739 | |
| 740 /* | |
| 741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats | |
| 742 ** for a single query. | |
| 743 ** | |
| 744 ** fts3ExprIterate() callback to load the 'global' elements of a | |
| 745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements | |
| 746 ** of the matchinfo array that are constant for all rows returned by the | |
| 747 ** current query. | |
| 748 ** | |
| 749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This | |
| 750 ** function populates Matchinfo.aMatchinfo[] as follows: | |
| 751 ** | |
| 752 ** for(iCol=0; iCol<nCol; iCol++){ | |
| 753 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; | |
| 754 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; | |
| 755 ** } | |
| 756 ** | |
| 757 ** where X is the number of matches for phrase iPhrase is column iCol of all | |
| 758 ** rows of the table. Y is the number of rows for which column iCol contains | |
| 759 ** at least one instance of phrase iPhrase. | |
| 760 ** | |
| 761 ** If the phrase pExpr consists entirely of deferred tokens, then all X and | |
| 762 ** Y values are set to nDoc, where nDoc is the number of documents in the | |
| 763 ** file system. This is done because the full-text index doclist is required | |
| 764 ** to calculate these values properly, and the full-text index doclist is | |
| 765 ** not available for deferred tokens. | |
| 766 */ | |
| 767 static int fts3ExprGlobalHitsCb( | |
| 768 Fts3Expr *pExpr, /* Phrase expression node */ | |
| 769 int iPhrase, /* Phrase number (numbered from zero) */ | |
| 770 void *pCtx /* Pointer to MatchInfo structure */ | |
| 771 ){ | |
| 772 MatchInfo *p = (MatchInfo *)pCtx; | |
| 773 return sqlite3Fts3EvalPhraseStats( | |
| 774 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] | |
| 775 ); | |
| 776 } | |
| 777 | |
| 778 /* | |
| 779 ** fts3ExprIterate() callback used to collect the "local" part of the | |
| 780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the | |
| 781 ** array that are different for each row returned by the query. | |
| 782 */ | |
| 783 static int fts3ExprLocalHitsCb( | |
| 784 Fts3Expr *pExpr, /* Phrase expression node */ | |
| 785 int iPhrase, /* Phrase number */ | |
| 786 void *pCtx /* Pointer to MatchInfo structure */ | |
| 787 ){ | |
| 788 int rc = SQLITE_OK; | |
| 789 MatchInfo *p = (MatchInfo *)pCtx; | |
| 790 int iStart = iPhrase * p->nCol * 3; | |
| 791 int i; | |
| 792 | |
| 793 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ | |
| 794 char *pCsr; | |
| 795 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); | |
| 796 if( pCsr ){ | |
| 797 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); | |
| 798 }else{ | |
| 799 p->aMatchinfo[iStart+i*3] = 0; | |
| 800 } | |
| 801 } | |
| 802 | |
| 803 return rc; | |
| 804 } | |
| 805 | |
| 806 static int fts3MatchinfoCheck( | |
| 807 Fts3Table *pTab, | |
| 808 char cArg, | |
| 809 char **pzErr | |
| 810 ){ | |
| 811 if( (cArg==FTS3_MATCHINFO_NPHRASE) | |
| 812 || (cArg==FTS3_MATCHINFO_NCOL) | |
| 813 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) | |
| 814 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) | |
| 815 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) | |
| 816 || (cArg==FTS3_MATCHINFO_LCS) | |
| 817 || (cArg==FTS3_MATCHINFO_HITS) | |
| 818 ){ | |
| 819 return SQLITE_OK; | |
| 820 } | |
| 821 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); | |
| 822 return SQLITE_ERROR; | |
| 823 } | |
| 824 | |
| 825 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ | |
| 826 int nVal; /* Number of integers output by cArg */ | |
| 827 | |
| 828 switch( cArg ){ | |
| 829 case FTS3_MATCHINFO_NDOC: | |
| 830 case FTS3_MATCHINFO_NPHRASE: | |
| 831 case FTS3_MATCHINFO_NCOL: | |
| 832 nVal = 1; | |
| 833 break; | |
| 834 | |
| 835 case FTS3_MATCHINFO_AVGLENGTH: | |
| 836 case FTS3_MATCHINFO_LENGTH: | |
| 837 case FTS3_MATCHINFO_LCS: | |
| 838 nVal = pInfo->nCol; | |
| 839 break; | |
| 840 | |
| 841 default: | |
| 842 assert( cArg==FTS3_MATCHINFO_HITS ); | |
| 843 nVal = pInfo->nCol * pInfo->nPhrase * 3; | |
| 844 break; | |
| 845 } | |
| 846 | |
| 847 return nVal; | |
| 848 } | |
| 849 | |
| 850 static int fts3MatchinfoSelectDoctotal( | |
| 851 Fts3Table *pTab, | |
| 852 sqlite3_stmt **ppStmt, | |
| 853 sqlite3_int64 *pnDoc, | |
| 854 const char **paLen | |
| 855 ){ | |
| 856 sqlite3_stmt *pStmt; | |
| 857 const char *a; | |
| 858 sqlite3_int64 nDoc; | |
| 859 | |
| 860 if( !*ppStmt ){ | |
| 861 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); | |
| 862 if( rc!=SQLITE_OK ) return rc; | |
| 863 } | |
| 864 pStmt = *ppStmt; | |
| 865 assert( sqlite3_data_count(pStmt)==1 ); | |
| 866 | |
| 867 a = sqlite3_column_blob(pStmt, 0); | |
| 868 a += sqlite3Fts3GetVarint(a, &nDoc); | |
| 869 if( nDoc==0 ) return FTS_CORRUPT_VTAB; | |
| 870 *pnDoc = (u32)nDoc; | |
| 871 | |
| 872 if( paLen ) *paLen = a; | |
| 873 return SQLITE_OK; | |
| 874 } | |
| 875 | |
| 876 /* | |
| 877 ** An instance of the following structure is used to store state while | |
| 878 ** iterating through a multi-column position-list corresponding to the | |
| 879 ** hits for a single phrase on a single row in order to calculate the | |
| 880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. | |
| 881 */ | |
| 882 typedef struct LcsIterator LcsIterator; | |
| 883 struct LcsIterator { | |
| 884 Fts3Expr *pExpr; /* Pointer to phrase expression */ | |
| 885 int iPosOffset; /* Tokens count up to end of this phrase */ | |
| 886 char *pRead; /* Cursor used to iterate through aDoclist */ | |
| 887 int iPos; /* Current position */ | |
| 888 }; | |
| 889 | |
| 890 /* | |
| 891 ** If LcsIterator.iCol is set to the following value, the iterator has | |
| 892 ** finished iterating through all offsets for all columns. | |
| 893 */ | |
| 894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; | |
| 895 | |
| 896 static int fts3MatchinfoLcsCb( | |
| 897 Fts3Expr *pExpr, /* Phrase expression node */ | |
| 898 int iPhrase, /* Phrase number (numbered from zero) */ | |
| 899 void *pCtx /* Pointer to MatchInfo structure */ | |
| 900 ){ | |
| 901 LcsIterator *aIter = (LcsIterator *)pCtx; | |
| 902 aIter[iPhrase].pExpr = pExpr; | |
| 903 return SQLITE_OK; | |
| 904 } | |
| 905 | |
| 906 /* | |
| 907 ** Advance the iterator passed as an argument to the next position. Return | |
| 908 ** 1 if the iterator is at EOF or if it now points to the start of the | |
| 909 ** position list for the next column. | |
| 910 */ | |
| 911 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ | |
| 912 char *pRead = pIter->pRead; | |
| 913 sqlite3_int64 iRead; | |
| 914 int rc = 0; | |
| 915 | |
| 916 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
| 917 if( iRead==0 || iRead==1 ){ | |
| 918 pRead = 0; | |
| 919 rc = 1; | |
| 920 }else{ | |
| 921 pIter->iPos += (int)(iRead-2); | |
| 922 } | |
| 923 | |
| 924 pIter->pRead = pRead; | |
| 925 return rc; | |
| 926 } | |
| 927 | |
| 928 /* | |
| 929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. | |
| 930 ** | |
| 931 ** If the call is successful, the longest-common-substring lengths for each | |
| 932 ** column are written into the first nCol elements of the pInfo->aMatchinfo[] | |
| 933 ** array before returning. SQLITE_OK is returned in this case. | |
| 934 ** | |
| 935 ** Otherwise, if an error occurs, an SQLite error code is returned and the | |
| 936 ** data written to the first nCol elements of pInfo->aMatchinfo[] is | |
| 937 ** undefined. | |
| 938 */ | |
| 939 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ | |
| 940 LcsIterator *aIter; | |
| 941 int i; | |
| 942 int iCol; | |
| 943 int nToken = 0; | |
| 944 | |
| 945 /* Allocate and populate the array of LcsIterator objects. The array | |
| 946 ** contains one element for each matchable phrase in the query. | |
| 947 **/ | |
| 948 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); | |
| 949 if( !aIter ) return SQLITE_NOMEM; | |
| 950 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); | |
| 951 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); | |
| 952 | |
| 953 for(i=0; i<pInfo->nPhrase; i++){ | |
| 954 LcsIterator *pIter = &aIter[i]; | |
| 955 nToken -= pIter->pExpr->pPhrase->nToken; | |
| 956 pIter->iPosOffset = nToken; | |
| 957 } | |
| 958 | |
| 959 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
| 960 int nLcs = 0; /* LCS value for this column */ | |
| 961 int nLive = 0; /* Number of iterators in aIter not at EOF */ | |
| 962 | |
| 963 for(i=0; i<pInfo->nPhrase; i++){ | |
| 964 int rc; | |
| 965 LcsIterator *pIt = &aIter[i]; | |
| 966 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); | |
| 967 if( rc!=SQLITE_OK ) return rc; | |
| 968 if( pIt->pRead ){ | |
| 969 pIt->iPos = pIt->iPosOffset; | |
| 970 fts3LcsIteratorAdvance(&aIter[i]); | |
| 971 nLive++; | |
| 972 } | |
| 973 } | |
| 974 | |
| 975 while( nLive>0 ){ | |
| 976 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ | |
| 977 int nThisLcs = 0; /* LCS for the current iterator positions */ | |
| 978 | |
| 979 for(i=0; i<pInfo->nPhrase; i++){ | |
| 980 LcsIterator *pIter = &aIter[i]; | |
| 981 if( pIter->pRead==0 ){ | |
| 982 /* This iterator is already at EOF for this column. */ | |
| 983 nThisLcs = 0; | |
| 984 }else{ | |
| 985 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ | |
| 986 pAdv = pIter; | |
| 987 } | |
| 988 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ | |
| 989 nThisLcs++; | |
| 990 }else{ | |
| 991 nThisLcs = 1; | |
| 992 } | |
| 993 if( nThisLcs>nLcs ) nLcs = nThisLcs; | |
| 994 } | |
| 995 } | |
| 996 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; | |
| 997 } | |
| 998 | |
| 999 pInfo->aMatchinfo[iCol] = nLcs; | |
| 1000 } | |
| 1001 | |
| 1002 sqlite3_free(aIter); | |
| 1003 return SQLITE_OK; | |
| 1004 } | |
| 1005 | |
| 1006 /* | |
| 1007 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to | |
| 1008 ** be returned by the matchinfo() function. Argument zArg contains the | |
| 1009 ** format string passed as the second argument to matchinfo (or the | |
| 1010 ** default value "pcx" if no second argument was specified). The format | |
| 1011 ** string has already been validated and the pInfo->aMatchinfo[] array | |
| 1012 ** is guaranteed to be large enough for the output. | |
| 1013 ** | |
| 1014 ** If bGlobal is true, then populate all fields of the matchinfo() output. | |
| 1015 ** If it is false, then assume that those fields that do not change between | |
| 1016 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) | |
| 1017 ** have already been populated. | |
| 1018 ** | |
| 1019 ** Return SQLITE_OK if successful, or an SQLite error code if an error | |
| 1020 ** occurs. If a value other than SQLITE_OK is returned, the state the | |
| 1021 ** pInfo->aMatchinfo[] buffer is left in is undefined. | |
| 1022 */ | |
| 1023 static int fts3MatchinfoValues( | |
| 1024 Fts3Cursor *pCsr, /* FTS3 cursor object */ | |
| 1025 int bGlobal, /* True to grab the global stats */ | |
| 1026 MatchInfo *pInfo, /* Matchinfo context object */ | |
| 1027 const char *zArg /* Matchinfo format string */ | |
| 1028 ){ | |
| 1029 int rc = SQLITE_OK; | |
| 1030 int i; | |
| 1031 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 1032 sqlite3_stmt *pSelect = 0; | |
| 1033 | |
| 1034 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ | |
| 1035 | |
| 1036 switch( zArg[i] ){ | |
| 1037 case FTS3_MATCHINFO_NPHRASE: | |
| 1038 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; | |
| 1039 break; | |
| 1040 | |
| 1041 case FTS3_MATCHINFO_NCOL: | |
| 1042 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; | |
| 1043 break; | |
| 1044 | |
| 1045 case FTS3_MATCHINFO_NDOC: | |
| 1046 if( bGlobal ){ | |
| 1047 sqlite3_int64 nDoc = 0; | |
| 1048 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); | |
| 1049 pInfo->aMatchinfo[0] = (u32)nDoc; | |
| 1050 } | |
| 1051 break; | |
| 1052 | |
| 1053 case FTS3_MATCHINFO_AVGLENGTH: | |
| 1054 if( bGlobal ){ | |
| 1055 sqlite3_int64 nDoc; /* Number of rows in table */ | |
| 1056 const char *a; /* Aggregate column length array */ | |
| 1057 | |
| 1058 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); | |
| 1059 if( rc==SQLITE_OK ){ | |
| 1060 int iCol; | |
| 1061 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
| 1062 u32 iVal; | |
| 1063 sqlite3_int64 nToken; | |
| 1064 a += sqlite3Fts3GetVarint(a, &nToken); | |
| 1065 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); | |
| 1066 pInfo->aMatchinfo[iCol] = iVal; | |
| 1067 } | |
| 1068 } | |
| 1069 } | |
| 1070 break; | |
| 1071 | |
| 1072 case FTS3_MATCHINFO_LENGTH: { | |
| 1073 sqlite3_stmt *pSelectDocsize = 0; | |
| 1074 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); | |
| 1075 if( rc==SQLITE_OK ){ | |
| 1076 int iCol; | |
| 1077 const char *a = sqlite3_column_blob(pSelectDocsize, 0); | |
| 1078 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
| 1079 sqlite3_int64 nToken; | |
| 1080 a += sqlite3Fts3GetVarint(a, &nToken); | |
| 1081 pInfo->aMatchinfo[iCol] = (u32)nToken; | |
| 1082 } | |
| 1083 } | |
| 1084 sqlite3_reset(pSelectDocsize); | |
| 1085 break; | |
| 1086 } | |
| 1087 | |
| 1088 case FTS3_MATCHINFO_LCS: | |
| 1089 rc = fts3ExprLoadDoclists(pCsr, 0, 0); | |
| 1090 if( rc==SQLITE_OK ){ | |
| 1091 rc = fts3MatchinfoLcs(pCsr, pInfo); | |
| 1092 } | |
| 1093 break; | |
| 1094 | |
| 1095 default: { | |
| 1096 Fts3Expr *pExpr; | |
| 1097 assert( zArg[i]==FTS3_MATCHINFO_HITS ); | |
| 1098 pExpr = pCsr->pExpr; | |
| 1099 rc = fts3ExprLoadDoclists(pCsr, 0, 0); | |
| 1100 if( rc!=SQLITE_OK ) break; | |
| 1101 if( bGlobal ){ | |
| 1102 if( pCsr->pDeferred ){ | |
| 1103 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); | |
| 1104 if( rc!=SQLITE_OK ) break; | |
| 1105 } | |
| 1106 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); | |
| 1107 if( rc!=SQLITE_OK ) break; | |
| 1108 } | |
| 1109 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); | |
| 1110 break; | |
| 1111 } | |
| 1112 } | |
| 1113 | |
| 1114 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); | |
| 1115 } | |
| 1116 | |
| 1117 sqlite3_reset(pSelect); | |
| 1118 return rc; | |
| 1119 } | |
| 1120 | |
| 1121 | |
| 1122 /* | |
| 1123 ** Populate pCsr->aMatchinfo[] with data for the current row. The | |
| 1124 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). | |
| 1125 */ | |
| 1126 static int fts3GetMatchinfo( | |
| 1127 Fts3Cursor *pCsr, /* FTS3 Cursor object */ | |
| 1128 const char *zArg /* Second argument to matchinfo() function */ | |
| 1129 ){ | |
| 1130 MatchInfo sInfo; | |
| 1131 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 1132 int rc = SQLITE_OK; | |
| 1133 int bGlobal = 0; /* Collect 'global' stats as well as local */ | |
| 1134 | |
| 1135 memset(&sInfo, 0, sizeof(MatchInfo)); | |
| 1136 sInfo.pCursor = pCsr; | |
| 1137 sInfo.nCol = pTab->nColumn; | |
| 1138 | |
| 1139 /* If there is cached matchinfo() data, but the format string for the | |
| 1140 ** cache does not match the format string for this request, discard | |
| 1141 ** the cached data. */ | |
| 1142 if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ | |
| 1143 assert( pCsr->aMatchinfo ); | |
| 1144 sqlite3_free(pCsr->aMatchinfo); | |
| 1145 pCsr->zMatchinfo = 0; | |
| 1146 pCsr->aMatchinfo = 0; | |
| 1147 } | |
| 1148 | |
| 1149 /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the | |
| 1150 ** matchinfo function has been called for this query. In this case | |
| 1151 ** allocate the array used to accumulate the matchinfo data and | |
| 1152 ** initialize those elements that are constant for every row. | |
| 1153 */ | |
| 1154 if( pCsr->aMatchinfo==0 ){ | |
| 1155 int nMatchinfo = 0; /* Number of u32 elements in match-info */ | |
| 1156 int nArg; /* Bytes in zArg */ | |
| 1157 int i; /* Used to iterate through zArg */ | |
| 1158 | |
| 1159 /* Determine the number of phrases in the query */ | |
| 1160 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); | |
| 1161 sInfo.nPhrase = pCsr->nPhrase; | |
| 1162 | |
| 1163 /* Determine the number of integers in the buffer returned by this call. */ | |
| 1164 for(i=0; zArg[i]; i++){ | |
| 1165 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); | |
| 1166 } | |
| 1167 | |
| 1168 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ | |
| 1169 nArg = (int)strlen(zArg); | |
| 1170 pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); | |
| 1171 if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; | |
| 1172 | |
| 1173 pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; | |
| 1174 pCsr->nMatchinfo = nMatchinfo; | |
| 1175 memcpy(pCsr->zMatchinfo, zArg, nArg+1); | |
| 1176 memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); | |
| 1177 pCsr->isMatchinfoNeeded = 1; | |
| 1178 bGlobal = 1; | |
| 1179 } | |
| 1180 | |
| 1181 sInfo.aMatchinfo = pCsr->aMatchinfo; | |
| 1182 sInfo.nPhrase = pCsr->nPhrase; | |
| 1183 if( pCsr->isMatchinfoNeeded ){ | |
| 1184 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); | |
| 1185 pCsr->isMatchinfoNeeded = 0; | |
| 1186 } | |
| 1187 | |
| 1188 return rc; | |
| 1189 } | |
| 1190 | |
| 1191 /* | |
| 1192 ** Implementation of snippet() function. | |
| 1193 */ | |
| 1194 void sqlite3Fts3Snippet( | |
| 1195 sqlite3_context *pCtx, /* SQLite function call context */ | |
| 1196 Fts3Cursor *pCsr, /* Cursor object */ | |
| 1197 const char *zStart, /* Snippet start text - "<b>" */ | |
| 1198 const char *zEnd, /* Snippet end text - "</b>" */ | |
| 1199 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ | |
| 1200 int iCol, /* Extract snippet from this column */ | |
| 1201 int nToken /* Approximate number of tokens in snippet */ | |
| 1202 ){ | |
| 1203 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 1204 int rc = SQLITE_OK; | |
| 1205 int i; | |
| 1206 StrBuffer res = {0, 0, 0}; | |
| 1207 | |
| 1208 /* The returned text includes up to four fragments of text extracted from | |
| 1209 ** the data in the current row. The first iteration of the for(...) loop | |
| 1210 ** below attempts to locate a single fragment of text nToken tokens in | |
| 1211 ** size that contains at least one instance of all phrases in the query | |
| 1212 ** expression that appear in the current row. If such a fragment of text | |
| 1213 ** cannot be found, the second iteration of the loop attempts to locate | |
| 1214 ** a pair of fragments, and so on. | |
| 1215 */ | |
| 1216 int nSnippet = 0; /* Number of fragments in this snippet */ | |
| 1217 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ | |
| 1218 int nFToken = -1; /* Number of tokens in each fragment */ | |
| 1219 | |
| 1220 if( !pCsr->pExpr ){ | |
| 1221 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | |
| 1222 return; | |
| 1223 } | |
| 1224 | |
| 1225 for(nSnippet=1; 1; nSnippet++){ | |
| 1226 | |
| 1227 int iSnip; /* Loop counter 0..nSnippet-1 */ | |
| 1228 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ | |
| 1229 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ | |
| 1230 | |
| 1231 if( nToken>=0 ){ | |
| 1232 nFToken = (nToken+nSnippet-1) / nSnippet; | |
| 1233 }else{ | |
| 1234 nFToken = -1 * nToken; | |
| 1235 } | |
| 1236 | |
| 1237 for(iSnip=0; iSnip<nSnippet; iSnip++){ | |
| 1238 int iBestScore = -1; /* Best score of columns checked so far */ | |
| 1239 int iRead; /* Used to iterate through columns */ | |
| 1240 SnippetFragment *pFragment = &aSnippet[iSnip]; | |
| 1241 | |
| 1242 memset(pFragment, 0, sizeof(*pFragment)); | |
| 1243 | |
| 1244 /* Loop through all columns of the table being considered for snippets. | |
| 1245 ** If the iCol argument to this function was negative, this means all | |
| 1246 ** columns of the FTS3 table. Otherwise, only column iCol is considered. | |
| 1247 */ | |
| 1248 for(iRead=0; iRead<pTab->nColumn; iRead++){ | |
| 1249 SnippetFragment sF = {0, 0, 0, 0}; | |
| 1250 int iS; | |
| 1251 if( iCol>=0 && iRead!=iCol ) continue; | |
| 1252 | |
| 1253 /* Find the best snippet of nFToken tokens in column iRead. */ | |
| 1254 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); | |
| 1255 if( rc!=SQLITE_OK ){ | |
| 1256 goto snippet_out; | |
| 1257 } | |
| 1258 if( iS>iBestScore ){ | |
| 1259 *pFragment = sF; | |
| 1260 iBestScore = iS; | |
| 1261 } | |
| 1262 } | |
| 1263 | |
| 1264 mCovered |= pFragment->covered; | |
| 1265 } | |
| 1266 | |
| 1267 /* If all query phrases seen by fts3BestSnippet() are present in at least | |
| 1268 ** one of the nSnippet snippet fragments, break out of the loop. | |
| 1269 */ | |
| 1270 assert( (mCovered&mSeen)==mCovered ); | |
| 1271 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; | |
| 1272 } | |
| 1273 | |
| 1274 assert( nFToken>0 ); | |
| 1275 | |
| 1276 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ | |
| 1277 rc = fts3SnippetText(pCsr, &aSnippet[i], | |
| 1278 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res | |
| 1279 ); | |
| 1280 } | |
| 1281 | |
| 1282 snippet_out: | |
| 1283 sqlite3Fts3SegmentsClose(pTab); | |
| 1284 if( rc!=SQLITE_OK ){ | |
| 1285 sqlite3_result_error_code(pCtx, rc); | |
| 1286 sqlite3_free(res.z); | |
| 1287 }else{ | |
| 1288 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); | |
| 1289 } | |
| 1290 } | |
| 1291 | |
| 1292 | |
| 1293 typedef struct TermOffset TermOffset; | |
| 1294 typedef struct TermOffsetCtx TermOffsetCtx; | |
| 1295 | |
| 1296 struct TermOffset { | |
| 1297 char *pList; /* Position-list */ | |
| 1298 int iPos; /* Position just read from pList */ | |
| 1299 int iOff; /* Offset of this term from read positions */ | |
| 1300 }; | |
| 1301 | |
| 1302 struct TermOffsetCtx { | |
| 1303 Fts3Cursor *pCsr; | |
| 1304 int iCol; /* Column of table to populate aTerm for */ | |
| 1305 int iTerm; | |
| 1306 sqlite3_int64 iDocid; | |
| 1307 TermOffset *aTerm; | |
| 1308 }; | |
| 1309 | |
| 1310 /* | |
| 1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). | |
| 1312 */ | |
| 1313 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
| 1314 TermOffsetCtx *p = (TermOffsetCtx *)ctx; | |
| 1315 int nTerm; /* Number of tokens in phrase */ | |
| 1316 int iTerm; /* For looping through nTerm phrase terms */ | |
| 1317 char *pList; /* Pointer to position list for phrase */ | |
| 1318 int iPos = 0; /* First position in position-list */ | |
| 1319 int rc; | |
| 1320 | |
| 1321 UNUSED_PARAMETER(iPhrase); | |
| 1322 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); | |
| 1323 nTerm = pExpr->pPhrase->nToken; | |
| 1324 if( pList ){ | |
| 1325 fts3GetDeltaPosition(&pList, &iPos); | |
| 1326 assert( iPos>=0 ); | |
| 1327 } | |
| 1328 | |
| 1329 for(iTerm=0; iTerm<nTerm; iTerm++){ | |
| 1330 TermOffset *pT = &p->aTerm[p->iTerm++]; | |
| 1331 pT->iOff = nTerm-iTerm-1; | |
| 1332 pT->pList = pList; | |
| 1333 pT->iPos = iPos; | |
| 1334 } | |
| 1335 | |
| 1336 return rc; | |
| 1337 } | |
| 1338 | |
| 1339 /* | |
| 1340 ** Implementation of offsets() function. | |
| 1341 */ | |
| 1342 void sqlite3Fts3Offsets( | |
| 1343 sqlite3_context *pCtx, /* SQLite function call context */ | |
| 1344 Fts3Cursor *pCsr /* Cursor object */ | |
| 1345 ){ | |
| 1346 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 1347 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; | |
| 1348 int rc; /* Return Code */ | |
| 1349 int nToken; /* Number of tokens in query */ | |
| 1350 int iCol; /* Column currently being processed */ | |
| 1351 StrBuffer res = {0, 0, 0}; /* Result string */ | |
| 1352 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ | |
| 1353 | |
| 1354 if( !pCsr->pExpr ){ | |
| 1355 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | |
| 1356 return; | |
| 1357 } | |
| 1358 | |
| 1359 memset(&sCtx, 0, sizeof(sCtx)); | |
| 1360 assert( pCsr->isRequireSeek==0 ); | |
| 1361 | |
| 1362 /* Count the number of terms in the query */ | |
| 1363 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); | |
| 1364 if( rc!=SQLITE_OK ) goto offsets_out; | |
| 1365 | |
| 1366 /* Allocate the array of TermOffset iterators. */ | |
| 1367 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); | |
| 1368 if( 0==sCtx.aTerm ){ | |
| 1369 rc = SQLITE_NOMEM; | |
| 1370 goto offsets_out; | |
| 1371 } | |
| 1372 sCtx.iDocid = pCsr->iPrevId; | |
| 1373 sCtx.pCsr = pCsr; | |
| 1374 | |
| 1375 /* Loop through the table columns, appending offset information to | |
| 1376 ** string-buffer res for each column. | |
| 1377 */ | |
| 1378 for(iCol=0; iCol<pTab->nColumn; iCol++){ | |
| 1379 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ | |
| 1380 const char *ZDUMMY; /* Dummy argument used with xNext() */ | |
| 1381 int NDUMMY = 0; /* Dummy argument used with xNext() */ | |
| 1382 int iStart = 0; | |
| 1383 int iEnd = 0; | |
| 1384 int iCurrent = 0; | |
| 1385 const char *zDoc; | |
| 1386 int nDoc; | |
| 1387 | |
| 1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is | |
| 1389 ** no way that this operation can fail, so the return code from | |
| 1390 ** fts3ExprIterate() can be discarded. | |
| 1391 */ | |
| 1392 sCtx.iCol = iCol; | |
| 1393 sCtx.iTerm = 0; | |
| 1394 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); | |
| 1395 | |
| 1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored | |
| 1397 ** in column iCol, jump immediately to the next iteration of the loop. | |
| 1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite | |
| 1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM | |
| 1400 ** to the caller. | |
| 1401 */ | |
| 1402 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); | |
| 1403 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); | |
| 1404 if( zDoc==0 ){ | |
| 1405 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ | |
| 1406 continue; | |
| 1407 } | |
| 1408 rc = SQLITE_NOMEM; | |
| 1409 goto offsets_out; | |
| 1410 } | |
| 1411 | |
| 1412 /* Initialize a tokenizer iterator to iterate through column iCol. */ | |
| 1413 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, | |
| 1414 zDoc, nDoc, &pC | |
| 1415 ); | |
| 1416 if( rc!=SQLITE_OK ) goto offsets_out; | |
| 1417 | |
| 1418 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | |
| 1419 while( rc==SQLITE_OK ){ | |
| 1420 int i; /* Used to loop through terms */ | |
| 1421 int iMinPos = 0x7FFFFFFF; /* Position of next token */ | |
| 1422 TermOffset *pTerm = 0; /* TermOffset associated with next token */ | |
| 1423 | |
| 1424 for(i=0; i<nToken; i++){ | |
| 1425 TermOffset *pT = &sCtx.aTerm[i]; | |
| 1426 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ | |
| 1427 iMinPos = pT->iPos-pT->iOff; | |
| 1428 pTerm = pT; | |
| 1429 } | |
| 1430 } | |
| 1431 | |
| 1432 if( !pTerm ){ | |
| 1433 /* All offsets for this column have been gathered. */ | |
| 1434 rc = SQLITE_DONE; | |
| 1435 }else{ | |
| 1436 assert( iCurrent<=iMinPos ); | |
| 1437 if( 0==(0xFE&*pTerm->pList) ){ | |
| 1438 pTerm->pList = 0; | |
| 1439 }else{ | |
| 1440 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); | |
| 1441 } | |
| 1442 while( rc==SQLITE_OK && iCurrent<iMinPos ){ | |
| 1443 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | |
| 1444 } | |
| 1445 if( rc==SQLITE_OK ){ | |
| 1446 char aBuffer[64]; | |
| 1447 sqlite3_snprintf(sizeof(aBuffer), aBuffer, | |
| 1448 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart | |
| 1449 ); | |
| 1450 rc = fts3StringAppend(&res, aBuffer, -1); | |
| 1451 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ | |
| 1452 rc = FTS_CORRUPT_VTAB; | |
| 1453 } | |
| 1454 } | |
| 1455 } | |
| 1456 if( rc==SQLITE_DONE ){ | |
| 1457 rc = SQLITE_OK; | |
| 1458 } | |
| 1459 | |
| 1460 pMod->xClose(pC); | |
| 1461 if( rc!=SQLITE_OK ) goto offsets_out; | |
| 1462 } | |
| 1463 | |
| 1464 offsets_out: | |
| 1465 sqlite3_free(sCtx.aTerm); | |
| 1466 assert( rc!=SQLITE_DONE ); | |
| 1467 sqlite3Fts3SegmentsClose(pTab); | |
| 1468 if( rc!=SQLITE_OK ){ | |
| 1469 sqlite3_result_error_code(pCtx, rc); | |
| 1470 sqlite3_free(res.z); | |
| 1471 }else{ | |
| 1472 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); | |
| 1473 } | |
| 1474 return; | |
| 1475 } | |
| 1476 | |
| 1477 /* | |
| 1478 ** Implementation of matchinfo() function. | |
| 1479 */ | |
| 1480 void sqlite3Fts3Matchinfo( | |
| 1481 sqlite3_context *pContext, /* Function call context */ | |
| 1482 Fts3Cursor *pCsr, /* FTS3 table cursor */ | |
| 1483 const char *zArg /* Second arg to matchinfo() function */ | |
| 1484 ){ | |
| 1485 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
| 1486 int rc; | |
| 1487 int i; | |
| 1488 const char *zFormat; | |
| 1489 | |
| 1490 if( zArg ){ | |
| 1491 for(i=0; zArg[i]; i++){ | |
| 1492 char *zErr = 0; | |
| 1493 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ | |
| 1494 sqlite3_result_error(pContext, zErr, -1); | |
| 1495 sqlite3_free(zErr); | |
| 1496 return; | |
| 1497 } | |
| 1498 } | |
| 1499 zFormat = zArg; | |
| 1500 }else{ | |
| 1501 zFormat = FTS3_MATCHINFO_DEFAULT; | |
| 1502 } | |
| 1503 | |
| 1504 if( !pCsr->pExpr ){ | |
| 1505 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); | |
| 1506 return; | |
| 1507 } | |
| 1508 | |
| 1509 /* Retrieve matchinfo() data. */ | |
| 1510 rc = fts3GetMatchinfo(pCsr, zFormat); | |
| 1511 sqlite3Fts3SegmentsClose(pTab); | |
| 1512 | |
| 1513 if( rc!=SQLITE_OK ){ | |
| 1514 sqlite3_result_error_code(pContext, rc); | |
| 1515 }else{ | |
| 1516 int n = pCsr->nMatchinfo * sizeof(u32); | |
| 1517 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); | |
| 1518 } | |
| 1519 } | |
| 1520 | |
| 1521 #endif | |
| OLD | NEW |