OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2009 Oct 23 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 */ |
| 13 |
| 14 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 15 |
| 16 #include "fts3Int.h" |
| 17 #include <string.h> |
| 18 #include <assert.h> |
| 19 |
| 20 /* |
| 21 ** Characters that may appear in the second argument to matchinfo(). |
| 22 */ |
| 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ |
| 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ |
| 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ |
| 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ |
| 27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ |
| 28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */ |
| 29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ |
| 30 |
| 31 /* |
| 32 ** The default value for the second argument to matchinfo(). |
| 33 */ |
| 34 #define FTS3_MATCHINFO_DEFAULT "pcx" |
| 35 |
| 36 |
| 37 /* |
| 38 ** Used as an fts3ExprIterate() context when loading phrase doclists to |
| 39 ** Fts3Expr.aDoclist[]/nDoclist. |
| 40 */ |
| 41 typedef struct LoadDoclistCtx LoadDoclistCtx; |
| 42 struct LoadDoclistCtx { |
| 43 Fts3Cursor *pCsr; /* FTS3 Cursor */ |
| 44 int nPhrase; /* Number of phrases seen so far */ |
| 45 int nToken; /* Number of tokens seen so far */ |
| 46 }; |
| 47 |
| 48 /* |
| 49 ** The following types are used as part of the implementation of the |
| 50 ** fts3BestSnippet() routine. |
| 51 */ |
| 52 typedef struct SnippetIter SnippetIter; |
| 53 typedef struct SnippetPhrase SnippetPhrase; |
| 54 typedef struct SnippetFragment SnippetFragment; |
| 55 |
| 56 struct SnippetIter { |
| 57 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ |
| 58 int iCol; /* Extract snippet from this column */ |
| 59 int nSnippet; /* Requested snippet length (in tokens) */ |
| 60 int nPhrase; /* Number of phrases in query */ |
| 61 SnippetPhrase *aPhrase; /* Array of size nPhrase */ |
| 62 int iCurrent; /* First token of current snippet */ |
| 63 }; |
| 64 |
| 65 struct SnippetPhrase { |
| 66 int nToken; /* Number of tokens in phrase */ |
| 67 char *pList; /* Pointer to start of phrase position list */ |
| 68 int iHead; /* Next value in position list */ |
| 69 char *pHead; /* Position list data following iHead */ |
| 70 int iTail; /* Next value in trailing position list */ |
| 71 char *pTail; /* Position list data following iTail */ |
| 72 }; |
| 73 |
| 74 struct SnippetFragment { |
| 75 int iCol; /* Column snippet is extracted from */ |
| 76 int iPos; /* Index of first token in snippet */ |
| 77 u64 covered; /* Mask of query phrases covered */ |
| 78 u64 hlmask; /* Mask of snippet terms to highlight */ |
| 79 }; |
| 80 |
| 81 /* |
| 82 ** This type is used as an fts3ExprIterate() context object while |
| 83 ** accumulating the data returned by the matchinfo() function. |
| 84 */ |
| 85 typedef struct MatchInfo MatchInfo; |
| 86 struct MatchInfo { |
| 87 Fts3Cursor *pCursor; /* FTS3 Cursor */ |
| 88 int nCol; /* Number of columns in table */ |
| 89 int nPhrase; /* Number of matchable phrases in query */ |
| 90 sqlite3_int64 nDoc; /* Number of docs in database */ |
| 91 u32 *aMatchinfo; /* Pre-allocated buffer */ |
| 92 }; |
| 93 |
| 94 |
| 95 |
| 96 /* |
| 97 ** The snippet() and offsets() functions both return text values. An instance |
| 98 ** of the following structure is used to accumulate those values while the |
| 99 ** functions are running. See fts3StringAppend() for details. |
| 100 */ |
| 101 typedef struct StrBuffer StrBuffer; |
| 102 struct StrBuffer { |
| 103 char *z; /* Pointer to buffer containing string */ |
| 104 int n; /* Length of z in bytes (excl. nul-term) */ |
| 105 int nAlloc; /* Allocated size of buffer z in bytes */ |
| 106 }; |
| 107 |
| 108 |
| 109 /* |
| 110 ** This function is used to help iterate through a position-list. A position |
| 111 ** list is a list of unique integers, sorted from smallest to largest. Each |
| 112 ** element of the list is represented by an FTS3 varint that takes the value |
| 113 ** of the difference between the current element and the previous one plus |
| 114 ** two. For example, to store the position-list: |
| 115 ** |
| 116 ** 4 9 113 |
| 117 ** |
| 118 ** the three varints: |
| 119 ** |
| 120 ** 6 7 106 |
| 121 ** |
| 122 ** are encoded. |
| 123 ** |
| 124 ** When this function is called, *pp points to the start of an element of |
| 125 ** the list. *piPos contains the value of the previous entry in the list. |
| 126 ** After it returns, *piPos contains the value of the next element of the |
| 127 ** list and *pp is advanced to the following varint. |
| 128 */ |
| 129 static void fts3GetDeltaPosition(char **pp, int *piPos){ |
| 130 int iVal; |
| 131 *pp += sqlite3Fts3GetVarint32(*pp, &iVal); |
| 132 *piPos += (iVal-2); |
| 133 } |
| 134 |
| 135 /* |
| 136 ** Helper function for fts3ExprIterate() (see below). |
| 137 */ |
| 138 static int fts3ExprIterate2( |
| 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 140 int *piPhrase, /* Pointer to phrase counter */ |
| 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 142 void *pCtx /* Second argument to pass to callback */ |
| 143 ){ |
| 144 int rc; /* Return code */ |
| 145 int eType = pExpr->eType; /* Type of expression node pExpr */ |
| 146 |
| 147 if( eType!=FTSQUERY_PHRASE ){ |
| 148 assert( pExpr->pLeft && pExpr->pRight ); |
| 149 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); |
| 150 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ |
| 151 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); |
| 152 } |
| 153 }else{ |
| 154 rc = x(pExpr, *piPhrase, pCtx); |
| 155 (*piPhrase)++; |
| 156 } |
| 157 return rc; |
| 158 } |
| 159 |
| 160 /* |
| 161 ** Iterate through all phrase nodes in an FTS3 query, except those that |
| 162 ** are part of a sub-tree that is the right-hand-side of a NOT operator. |
| 163 ** For each phrase node found, the supplied callback function is invoked. |
| 164 ** |
| 165 ** If the callback function returns anything other than SQLITE_OK, |
| 166 ** the iteration is abandoned and the error code returned immediately. |
| 167 ** Otherwise, SQLITE_OK is returned after a callback has been made for |
| 168 ** all eligible phrase nodes. |
| 169 */ |
| 170 static int fts3ExprIterate( |
| 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 173 void *pCtx /* Second argument to pass to callback */ |
| 174 ){ |
| 175 int iPhrase = 0; /* Variable used as the phrase counter */ |
| 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); |
| 177 } |
| 178 |
| 179 /* |
| 180 ** The argument to this function is always a phrase node. Its doclist |
| 181 ** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes |
| 182 ** to the left of this one in the query tree have already been loaded. |
| 183 ** |
| 184 ** If this phrase node is part of a series of phrase nodes joined by |
| 185 ** NEAR operators (and is not the left-most of said series), then elements are |
| 186 ** removed from the phrases doclist consistent with the NEAR restriction. If |
| 187 ** required, elements may be removed from the doclists of phrases to the |
| 188 ** left of this one that are part of the same series of NEAR operator |
| 189 ** connected phrases. |
| 190 ** |
| 191 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. |
| 192 */ |
| 193 static int fts3ExprNearTrim(Fts3Expr *pExpr){ |
| 194 int rc = SQLITE_OK; |
| 195 Fts3Expr *pParent = pExpr->pParent; |
| 196 |
| 197 assert( pExpr->eType==FTSQUERY_PHRASE ); |
| 198 while( rc==SQLITE_OK |
| 199 && pParent |
| 200 && pParent->eType==FTSQUERY_NEAR |
| 201 && pParent->pRight==pExpr |
| 202 ){ |
| 203 /* This expression (pExpr) is the right-hand-side of a NEAR operator. |
| 204 ** Find the expression to the left of the same operator. |
| 205 */ |
| 206 int nNear = pParent->nNear; |
| 207 Fts3Expr *pLeft = pParent->pLeft; |
| 208 |
| 209 if( pLeft->eType!=FTSQUERY_PHRASE ){ |
| 210 assert( pLeft->eType==FTSQUERY_NEAR ); |
| 211 assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); |
| 212 pLeft = pLeft->pRight; |
| 213 } |
| 214 |
| 215 rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); |
| 216 |
| 217 pExpr = pLeft; |
| 218 pParent = pExpr->pParent; |
| 219 } |
| 220 |
| 221 return rc; |
| 222 } |
| 223 |
| 224 /* |
| 225 ** This is an fts3ExprIterate() callback used while loading the doclists |
| 226 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also |
| 227 ** fts3ExprLoadDoclists(). |
| 228 */ |
| 229 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 230 int rc = SQLITE_OK; |
| 231 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; |
| 232 |
| 233 UNUSED_PARAMETER(iPhrase); |
| 234 |
| 235 p->nPhrase++; |
| 236 p->nToken += pExpr->pPhrase->nToken; |
| 237 |
| 238 if( pExpr->isLoaded==0 ){ |
| 239 rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); |
| 240 pExpr->isLoaded = 1; |
| 241 if( rc==SQLITE_OK ){ |
| 242 rc = fts3ExprNearTrim(pExpr); |
| 243 } |
| 244 } |
| 245 |
| 246 return rc; |
| 247 } |
| 248 |
| 249 /* |
| 250 ** Load the doclists for each phrase in the query associated with FTS3 cursor |
| 251 ** pCsr. |
| 252 ** |
| 253 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable |
| 254 ** phrases in the expression (all phrases except those directly or |
| 255 ** indirectly descended from the right-hand-side of a NOT operator). If |
| 256 ** pnToken is not NULL, then it is set to the number of tokens in all |
| 257 ** matchable phrases of the expression. |
| 258 */ |
| 259 static int fts3ExprLoadDoclists( |
| 260 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ |
| 261 int *pnPhrase, /* OUT: Number of phrases in query */ |
| 262 int *pnToken /* OUT: Number of tokens in query */ |
| 263 ){ |
| 264 int rc; /* Return Code */ |
| 265 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ |
| 266 sCtx.pCsr = pCsr; |
| 267 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); |
| 268 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; |
| 269 if( pnToken ) *pnToken = sCtx.nToken; |
| 270 return rc; |
| 271 } |
| 272 |
| 273 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 274 (*(int *)ctx)++; |
| 275 UNUSED_PARAMETER(pExpr); |
| 276 UNUSED_PARAMETER(iPhrase); |
| 277 return SQLITE_OK; |
| 278 } |
| 279 static int fts3ExprPhraseCount(Fts3Expr *pExpr){ |
| 280 int nPhrase = 0; |
| 281 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); |
| 282 return nPhrase; |
| 283 } |
| 284 |
| 285 /* |
| 286 ** Advance the position list iterator specified by the first two |
| 287 ** arguments so that it points to the first element with a value greater |
| 288 ** than or equal to parameter iNext. |
| 289 */ |
| 290 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ |
| 291 char *pIter = *ppIter; |
| 292 if( pIter ){ |
| 293 int iIter = *piIter; |
| 294 |
| 295 while( iIter<iNext ){ |
| 296 if( 0==(*pIter & 0xFE) ){ |
| 297 iIter = -1; |
| 298 pIter = 0; |
| 299 break; |
| 300 } |
| 301 fts3GetDeltaPosition(&pIter, &iIter); |
| 302 } |
| 303 |
| 304 *piIter = iIter; |
| 305 *ppIter = pIter; |
| 306 } |
| 307 } |
| 308 |
| 309 /* |
| 310 ** Advance the snippet iterator to the next candidate snippet. |
| 311 */ |
| 312 static int fts3SnippetNextCandidate(SnippetIter *pIter){ |
| 313 int i; /* Loop counter */ |
| 314 |
| 315 if( pIter->iCurrent<0 ){ |
| 316 /* The SnippetIter object has just been initialized. The first snippet |
| 317 ** candidate always starts at offset 0 (even if this candidate has a |
| 318 ** score of 0.0). |
| 319 */ |
| 320 pIter->iCurrent = 0; |
| 321 |
| 322 /* Advance the 'head' iterator of each phrase to the first offset that |
| 323 ** is greater than or equal to (iNext+nSnippet). |
| 324 */ |
| 325 for(i=0; i<pIter->nPhrase; i++){ |
| 326 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 327 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); |
| 328 } |
| 329 }else{ |
| 330 int iStart; |
| 331 int iEnd = 0x7FFFFFFF; |
| 332 |
| 333 for(i=0; i<pIter->nPhrase; i++){ |
| 334 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 335 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ |
| 336 iEnd = pPhrase->iHead; |
| 337 } |
| 338 } |
| 339 if( iEnd==0x7FFFFFFF ){ |
| 340 return 1; |
| 341 } |
| 342 |
| 343 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; |
| 344 for(i=0; i<pIter->nPhrase; i++){ |
| 345 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 346 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); |
| 347 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); |
| 348 } |
| 349 } |
| 350 |
| 351 return 0; |
| 352 } |
| 353 |
| 354 /* |
| 355 ** Retrieve information about the current candidate snippet of snippet |
| 356 ** iterator pIter. |
| 357 */ |
| 358 static void fts3SnippetDetails( |
| 359 SnippetIter *pIter, /* Snippet iterator */ |
| 360 u64 mCovered, /* Bitmask of phrases already covered */ |
| 361 int *piToken, /* OUT: First token of proposed snippet */ |
| 362 int *piScore, /* OUT: "Score" for this snippet */ |
| 363 u64 *pmCover, /* OUT: Bitmask of phrases covered */ |
| 364 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ |
| 365 ){ |
| 366 int iStart = pIter->iCurrent; /* First token of snippet */ |
| 367 int iScore = 0; /* Score of this snippet */ |
| 368 int i; /* Loop counter */ |
| 369 u64 mCover = 0; /* Mask of phrases covered by this snippet */ |
| 370 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ |
| 371 |
| 372 for(i=0; i<pIter->nPhrase; i++){ |
| 373 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; |
| 374 if( pPhrase->pTail ){ |
| 375 char *pCsr = pPhrase->pTail; |
| 376 int iCsr = pPhrase->iTail; |
| 377 |
| 378 while( iCsr<(iStart+pIter->nSnippet) ){ |
| 379 int j; |
| 380 u64 mPhrase = (u64)1 << i; |
| 381 u64 mPos = (u64)1 << (iCsr - iStart); |
| 382 assert( iCsr>=iStart ); |
| 383 if( (mCover|mCovered)&mPhrase ){ |
| 384 iScore++; |
| 385 }else{ |
| 386 iScore += 1000; |
| 387 } |
| 388 mCover |= mPhrase; |
| 389 |
| 390 for(j=0; j<pPhrase->nToken; j++){ |
| 391 mHighlight |= (mPos>>j); |
| 392 } |
| 393 |
| 394 if( 0==(*pCsr & 0x0FE) ) break; |
| 395 fts3GetDeltaPosition(&pCsr, &iCsr); |
| 396 } |
| 397 } |
| 398 } |
| 399 |
| 400 /* Set the output variables before returning. */ |
| 401 *piToken = iStart; |
| 402 *piScore = iScore; |
| 403 *pmCover = mCover; |
| 404 *pmHighlight = mHighlight; |
| 405 } |
| 406 |
| 407 /* |
| 408 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). |
| 409 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. |
| 410 */ |
| 411 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 412 SnippetIter *p = (SnippetIter *)ctx; |
| 413 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; |
| 414 char *pCsr; |
| 415 |
| 416 pPhrase->nToken = pExpr->pPhrase->nToken; |
| 417 |
| 418 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); |
| 419 if( pCsr ){ |
| 420 int iFirst = 0; |
| 421 pPhrase->pList = pCsr; |
| 422 fts3GetDeltaPosition(&pCsr, &iFirst); |
| 423 pPhrase->pHead = pCsr; |
| 424 pPhrase->pTail = pCsr; |
| 425 pPhrase->iHead = iFirst; |
| 426 pPhrase->iTail = iFirst; |
| 427 }else{ |
| 428 assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); |
| 429 } |
| 430 |
| 431 return SQLITE_OK; |
| 432 } |
| 433 |
| 434 /* |
| 435 ** Select the fragment of text consisting of nFragment contiguous tokens |
| 436 ** from column iCol that represent the "best" snippet. The best snippet |
| 437 ** is the snippet with the highest score, where scores are calculated |
| 438 ** by adding: |
| 439 ** |
| 440 ** (a) +1 point for each occurence of a matchable phrase in the snippet. |
| 441 ** |
| 442 ** (b) +1000 points for the first occurence of each matchable phrase in |
| 443 ** the snippet for which the corresponding mCovered bit is not set. |
| 444 ** |
| 445 ** The selected snippet parameters are stored in structure *pFragment before |
| 446 ** returning. The score of the selected snippet is stored in *piScore |
| 447 ** before returning. |
| 448 */ |
| 449 static int fts3BestSnippet( |
| 450 int nSnippet, /* Desired snippet length */ |
| 451 Fts3Cursor *pCsr, /* Cursor to create snippet for */ |
| 452 int iCol, /* Index of column to create snippet from */ |
| 453 u64 mCovered, /* Mask of phrases already covered */ |
| 454 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ |
| 455 SnippetFragment *pFragment, /* OUT: Best snippet found */ |
| 456 int *piScore /* OUT: Score of snippet pFragment */ |
| 457 ){ |
| 458 int rc; /* Return Code */ |
| 459 int nList; /* Number of phrases in expression */ |
| 460 SnippetIter sIter; /* Iterates through snippet candidates */ |
| 461 int nByte; /* Number of bytes of space to allocate */ |
| 462 int iBestScore = -1; /* Best snippet score found so far */ |
| 463 int i; /* Loop counter */ |
| 464 |
| 465 memset(&sIter, 0, sizeof(sIter)); |
| 466 |
| 467 /* Iterate through the phrases in the expression to count them. The same |
| 468 ** callback makes sure the doclists are loaded for each phrase. |
| 469 */ |
| 470 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); |
| 471 if( rc!=SQLITE_OK ){ |
| 472 return rc; |
| 473 } |
| 474 |
| 475 /* Now that it is known how many phrases there are, allocate and zero |
| 476 ** the required space using malloc(). |
| 477 */ |
| 478 nByte = sizeof(SnippetPhrase) * nList; |
| 479 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); |
| 480 if( !sIter.aPhrase ){ |
| 481 return SQLITE_NOMEM; |
| 482 } |
| 483 memset(sIter.aPhrase, 0, nByte); |
| 484 |
| 485 /* Initialize the contents of the SnippetIter object. Then iterate through |
| 486 ** the set of phrases in the expression to populate the aPhrase[] array. |
| 487 */ |
| 488 sIter.pCsr = pCsr; |
| 489 sIter.iCol = iCol; |
| 490 sIter.nSnippet = nSnippet; |
| 491 sIter.nPhrase = nList; |
| 492 sIter.iCurrent = -1; |
| 493 (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); |
| 494 |
| 495 /* Set the *pmSeen output variable. */ |
| 496 for(i=0; i<nList; i++){ |
| 497 if( sIter.aPhrase[i].pHead ){ |
| 498 *pmSeen |= (u64)1 << i; |
| 499 } |
| 500 } |
| 501 |
| 502 /* Loop through all candidate snippets. Store the best snippet in |
| 503 ** *pFragment. Store its associated 'score' in iBestScore. |
| 504 */ |
| 505 pFragment->iCol = iCol; |
| 506 while( !fts3SnippetNextCandidate(&sIter) ){ |
| 507 int iPos; |
| 508 int iScore; |
| 509 u64 mCover; |
| 510 u64 mHighlight; |
| 511 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight); |
| 512 assert( iScore>=0 ); |
| 513 if( iScore>iBestScore ){ |
| 514 pFragment->iPos = iPos; |
| 515 pFragment->hlmask = mHighlight; |
| 516 pFragment->covered = mCover; |
| 517 iBestScore = iScore; |
| 518 } |
| 519 } |
| 520 |
| 521 sqlite3_free(sIter.aPhrase); |
| 522 *piScore = iBestScore; |
| 523 return SQLITE_OK; |
| 524 } |
| 525 |
| 526 |
| 527 /* |
| 528 ** Append a string to the string-buffer passed as the first argument. |
| 529 ** |
| 530 ** If nAppend is negative, then the length of the string zAppend is |
| 531 ** determined using strlen(). |
| 532 */ |
| 533 static int fts3StringAppend( |
| 534 StrBuffer *pStr, /* Buffer to append to */ |
| 535 const char *zAppend, /* Pointer to data to append to buffer */ |
| 536 int nAppend /* Size of zAppend in bytes (or -1) */ |
| 537 ){ |
| 538 if( nAppend<0 ){ |
| 539 nAppend = (int)strlen(zAppend); |
| 540 } |
| 541 |
| 542 /* If there is insufficient space allocated at StrBuffer.z, use realloc() |
| 543 ** to grow the buffer until so that it is big enough to accomadate the |
| 544 ** appended data. |
| 545 */ |
| 546 if( pStr->n+nAppend+1>=pStr->nAlloc ){ |
| 547 int nAlloc = pStr->nAlloc+nAppend+100; |
| 548 char *zNew = sqlite3_realloc(pStr->z, nAlloc); |
| 549 if( !zNew ){ |
| 550 return SQLITE_NOMEM; |
| 551 } |
| 552 pStr->z = zNew; |
| 553 pStr->nAlloc = nAlloc; |
| 554 } |
| 555 |
| 556 /* Append the data to the string buffer. */ |
| 557 memcpy(&pStr->z[pStr->n], zAppend, nAppend); |
| 558 pStr->n += nAppend; |
| 559 pStr->z[pStr->n] = '\0'; |
| 560 |
| 561 return SQLITE_OK; |
| 562 } |
| 563 |
| 564 /* |
| 565 ** The fts3BestSnippet() function often selects snippets that end with a |
| 566 ** query term. That is, the final term of the snippet is always a term |
| 567 ** that requires highlighting. For example, if 'X' is a highlighted term |
| 568 ** and '.' is a non-highlighted term, BestSnippet() may select: |
| 569 ** |
| 570 ** ........X.....X |
| 571 ** |
| 572 ** This function "shifts" the beginning of the snippet forward in the |
| 573 ** document so that there are approximately the same number of |
| 574 ** non-highlighted terms to the right of the final highlighted term as there |
| 575 ** are to the left of the first highlighted term. For example, to this: |
| 576 ** |
| 577 ** ....X.....X.... |
| 578 ** |
| 579 ** This is done as part of extracting the snippet text, not when selecting |
| 580 ** the snippet. Snippet selection is done based on doclists only, so there |
| 581 ** is no way for fts3BestSnippet() to know whether or not the document |
| 582 ** actually contains terms that follow the final highlighted term. |
| 583 */ |
| 584 static int fts3SnippetShift( |
| 585 Fts3Table *pTab, /* FTS3 table snippet comes from */ |
| 586 int nSnippet, /* Number of tokens desired for snippet */ |
| 587 const char *zDoc, /* Document text to extract snippet from */ |
| 588 int nDoc, /* Size of buffer zDoc in bytes */ |
| 589 int *piPos, /* IN/OUT: First token of snippet */ |
| 590 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ |
| 591 ){ |
| 592 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ |
| 593 |
| 594 if( hlmask ){ |
| 595 int nLeft; /* Tokens to the left of first highlight */ |
| 596 int nRight; /* Tokens to the right of last highlight */ |
| 597 int nDesired; /* Ideal number of tokens to shift forward */ |
| 598 |
| 599 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); |
| 600 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); |
| 601 nDesired = (nLeft-nRight)/2; |
| 602 |
| 603 /* Ideally, the start of the snippet should be pushed forward in the |
| 604 ** document nDesired tokens. This block checks if there are actually |
| 605 ** nDesired tokens to the right of the snippet. If so, *piPos and |
| 606 ** *pHlMask are updated to shift the snippet nDesired tokens to the |
| 607 ** right. Otherwise, the snippet is shifted by the number of tokens |
| 608 ** available. |
| 609 */ |
| 610 if( nDesired>0 ){ |
| 611 int nShift; /* Number of tokens to shift snippet by */ |
| 612 int iCurrent = 0; /* Token counter */ |
| 613 int rc; /* Return Code */ |
| 614 sqlite3_tokenizer_module *pMod; |
| 615 sqlite3_tokenizer_cursor *pC; |
| 616 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 617 |
| 618 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) |
| 619 ** or more tokens in zDoc/nDoc. |
| 620 */ |
| 621 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); |
| 622 if( rc!=SQLITE_OK ){ |
| 623 return rc; |
| 624 } |
| 625 pC->pTokenizer = pTab->pTokenizer; |
| 626 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ |
| 627 const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; |
| 628 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); |
| 629 } |
| 630 pMod->xClose(pC); |
| 631 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } |
| 632 |
| 633 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; |
| 634 assert( nShift<=nDesired ); |
| 635 if( nShift>0 ){ |
| 636 *piPos += nShift; |
| 637 *pHlmask = hlmask >> nShift; |
| 638 } |
| 639 } |
| 640 } |
| 641 return SQLITE_OK; |
| 642 } |
| 643 |
| 644 /* |
| 645 ** Extract the snippet text for fragment pFragment from cursor pCsr and |
| 646 ** append it to string buffer pOut. |
| 647 */ |
| 648 static int fts3SnippetText( |
| 649 Fts3Cursor *pCsr, /* FTS3 Cursor */ |
| 650 SnippetFragment *pFragment, /* Snippet to extract */ |
| 651 int iFragment, /* Fragment number */ |
| 652 int isLast, /* True for final fragment in snippet */ |
| 653 int nSnippet, /* Number of tokens in extracted snippet */ |
| 654 const char *zOpen, /* String inserted before highlighted term */ |
| 655 const char *zClose, /* String inserted after highlighted term */ |
| 656 const char *zEllipsis, /* String inserted between snippets */ |
| 657 StrBuffer *pOut /* Write output here */ |
| 658 ){ |
| 659 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 660 int rc; /* Return code */ |
| 661 const char *zDoc; /* Document text to extract snippet from */ |
| 662 int nDoc; /* Size of zDoc in bytes */ |
| 663 int iCurrent = 0; /* Current token number of document */ |
| 664 int iEnd = 0; /* Byte offset of end of current token */ |
| 665 int isShiftDone = 0; /* True after snippet is shifted */ |
| 666 int iPos = pFragment->iPos; /* First token of snippet */ |
| 667 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ |
| 668 int iCol = pFragment->iCol+1; /* Query column to extract text from */ |
| 669 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ |
| 670 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ |
| 671 const char *ZDUMMY; /* Dummy argument used with tokenizer */ |
| 672 int DUMMY1; /* Dummy argument used with tokenizer */ |
| 673 |
| 674 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); |
| 675 if( zDoc==0 ){ |
| 676 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ |
| 677 return SQLITE_NOMEM; |
| 678 } |
| 679 return SQLITE_OK; |
| 680 } |
| 681 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); |
| 682 |
| 683 /* Open a token cursor on the document. */ |
| 684 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 685 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); |
| 686 if( rc!=SQLITE_OK ){ |
| 687 return rc; |
| 688 } |
| 689 pC->pTokenizer = pTab->pTokenizer; |
| 690 |
| 691 while( rc==SQLITE_OK ){ |
| 692 int iBegin; /* Offset in zDoc of start of token */ |
| 693 int iFin; /* Offset in zDoc of end of token */ |
| 694 int isHighlight; /* True for highlighted terms */ |
| 695 |
| 696 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); |
| 697 if( rc!=SQLITE_OK ){ |
| 698 if( rc==SQLITE_DONE ){ |
| 699 /* Special case - the last token of the snippet is also the last token |
| 700 ** of the column. Append any punctuation that occurred between the end |
| 701 ** of the previous token and the end of the document to the output. |
| 702 ** Then break out of the loop. */ |
| 703 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); |
| 704 } |
| 705 break; |
| 706 } |
| 707 if( iCurrent<iPos ){ continue; } |
| 708 |
| 709 if( !isShiftDone ){ |
| 710 int n = nDoc - iBegin; |
| 711 rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); |
| 712 isShiftDone = 1; |
| 713 |
| 714 /* Now that the shift has been done, check if the initial "..." are |
| 715 ** required. They are required if (a) this is not the first fragment, |
| 716 ** or (b) this fragment does not begin at position 0 of its column. |
| 717 */ |
| 718 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ |
| 719 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 720 } |
| 721 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; |
| 722 } |
| 723 |
| 724 if( iCurrent>=(iPos+nSnippet) ){ |
| 725 if( isLast ){ |
| 726 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 727 } |
| 728 break; |
| 729 } |
| 730 |
| 731 /* Set isHighlight to true if this term should be highlighted. */ |
| 732 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; |
| 733 |
| 734 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); |
| 735 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); |
| 736 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); |
| 737 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); |
| 738 |
| 739 iEnd = iFin; |
| 740 } |
| 741 |
| 742 pMod->xClose(pC); |
| 743 return rc; |
| 744 } |
| 745 |
| 746 |
| 747 /* |
| 748 ** This function is used to count the entries in a column-list (a |
| 749 ** delta-encoded list of term offsets within a single column of a single |
| 750 ** row). When this function is called, *ppCollist should point to the |
| 751 ** beginning of the first varint in the column-list (the varint that |
| 752 ** contains the position of the first matching term in the column data). |
| 753 ** Before returning, *ppCollist is set to point to the first byte after |
| 754 ** the last varint in the column-list (either the 0x00 signifying the end |
| 755 ** of the position-list, or the 0x01 that precedes the column number of |
| 756 ** the next column in the position-list). |
| 757 ** |
| 758 ** The number of elements in the column-list is returned. |
| 759 */ |
| 760 static int fts3ColumnlistCount(char **ppCollist){ |
| 761 char *pEnd = *ppCollist; |
| 762 char c = 0; |
| 763 int nEntry = 0; |
| 764 |
| 765 /* A column-list is terminated by either a 0x01 or 0x00. */ |
| 766 while( 0xFE & (*pEnd | c) ){ |
| 767 c = *pEnd++ & 0x80; |
| 768 if( !c ) nEntry++; |
| 769 } |
| 770 |
| 771 *ppCollist = pEnd; |
| 772 return nEntry; |
| 773 } |
| 774 |
| 775 static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){ |
| 776 char *pCsr = *pp; |
| 777 while( *pCsr ){ |
| 778 int nHit; |
| 779 sqlite3_int64 iCol = 0; |
| 780 if( *pCsr==0x01 ){ |
| 781 pCsr++; |
| 782 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); |
| 783 } |
| 784 nHit = fts3ColumnlistCount(&pCsr); |
| 785 assert( nHit>0 ); |
| 786 if( isGlobal ){ |
| 787 aOut[iCol*3+1]++; |
| 788 } |
| 789 aOut[iCol*3] += nHit; |
| 790 } |
| 791 pCsr++; |
| 792 *pp = pCsr; |
| 793 } |
| 794 |
| 795 /* |
| 796 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats |
| 797 ** for a single query. |
| 798 ** |
| 799 ** fts3ExprIterate() callback to load the 'global' elements of a |
| 800 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements |
| 801 ** of the matchinfo array that are constant for all rows returned by the |
| 802 ** current query. |
| 803 ** |
| 804 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This |
| 805 ** function populates Matchinfo.aMatchinfo[] as follows: |
| 806 ** |
| 807 ** for(iCol=0; iCol<nCol; iCol++){ |
| 808 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; |
| 809 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; |
| 810 ** } |
| 811 ** |
| 812 ** where X is the number of matches for phrase iPhrase is column iCol of all |
| 813 ** rows of the table. Y is the number of rows for which column iCol contains |
| 814 ** at least one instance of phrase iPhrase. |
| 815 ** |
| 816 ** If the phrase pExpr consists entirely of deferred tokens, then all X and |
| 817 ** Y values are set to nDoc, where nDoc is the number of documents in the |
| 818 ** file system. This is done because the full-text index doclist is required |
| 819 ** to calculate these values properly, and the full-text index doclist is |
| 820 ** not available for deferred tokens. |
| 821 */ |
| 822 static int fts3ExprGlobalHitsCb( |
| 823 Fts3Expr *pExpr, /* Phrase expression node */ |
| 824 int iPhrase, /* Phrase number (numbered from zero) */ |
| 825 void *pCtx /* Pointer to MatchInfo structure */ |
| 826 ){ |
| 827 MatchInfo *p = (MatchInfo *)pCtx; |
| 828 Fts3Cursor *pCsr = p->pCursor; |
| 829 char *pIter; |
| 830 char *pEnd; |
| 831 char *pFree = 0; |
| 832 u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; |
| 833 |
| 834 assert( pExpr->isLoaded ); |
| 835 assert( pExpr->eType==FTSQUERY_PHRASE ); |
| 836 |
| 837 if( pCsr->pDeferred ){ |
| 838 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 839 int ii; |
| 840 for(ii=0; ii<pPhrase->nToken; ii++){ |
| 841 if( pPhrase->aToken[ii].bFulltext ) break; |
| 842 } |
| 843 if( ii<pPhrase->nToken ){ |
| 844 int nFree = 0; |
| 845 int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); |
| 846 if( rc!=SQLITE_OK ) return rc; |
| 847 pIter = pFree; |
| 848 pEnd = &pFree[nFree]; |
| 849 }else{ |
| 850 int iCol; /* Column index */ |
| 851 for(iCol=0; iCol<p->nCol; iCol++){ |
| 852 aOut[iCol*3 + 1] = (u32)p->nDoc; |
| 853 aOut[iCol*3 + 2] = (u32)p->nDoc; |
| 854 } |
| 855 return SQLITE_OK; |
| 856 } |
| 857 }else{ |
| 858 pIter = pExpr->aDoclist; |
| 859 pEnd = &pExpr->aDoclist[pExpr->nDoclist]; |
| 860 } |
| 861 |
| 862 /* Fill in the global hit count matrix row for this phrase. */ |
| 863 while( pIter<pEnd ){ |
| 864 while( *pIter++ & 0x80 ); /* Skip past docid. */ |
| 865 fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); |
| 866 } |
| 867 |
| 868 sqlite3_free(pFree); |
| 869 return SQLITE_OK; |
| 870 } |
| 871 |
| 872 /* |
| 873 ** fts3ExprIterate() callback used to collect the "local" part of the |
| 874 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the |
| 875 ** array that are different for each row returned by the query. |
| 876 */ |
| 877 static int fts3ExprLocalHitsCb( |
| 878 Fts3Expr *pExpr, /* Phrase expression node */ |
| 879 int iPhrase, /* Phrase number */ |
| 880 void *pCtx /* Pointer to MatchInfo structure */ |
| 881 ){ |
| 882 MatchInfo *p = (MatchInfo *)pCtx; |
| 883 int iStart = iPhrase * p->nCol * 3; |
| 884 int i; |
| 885 |
| 886 for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0; |
| 887 |
| 888 if( pExpr->aDoclist ){ |
| 889 char *pCsr; |
| 890 |
| 891 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); |
| 892 if( pCsr ){ |
| 893 fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); |
| 894 } |
| 895 } |
| 896 |
| 897 return SQLITE_OK; |
| 898 } |
| 899 |
| 900 static int fts3MatchinfoCheck( |
| 901 Fts3Table *pTab, |
| 902 char cArg, |
| 903 char **pzErr |
| 904 ){ |
| 905 if( (cArg==FTS3_MATCHINFO_NPHRASE) |
| 906 || (cArg==FTS3_MATCHINFO_NCOL) |
| 907 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) |
| 908 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) |
| 909 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) |
| 910 || (cArg==FTS3_MATCHINFO_LCS) |
| 911 || (cArg==FTS3_MATCHINFO_HITS) |
| 912 ){ |
| 913 return SQLITE_OK; |
| 914 } |
| 915 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); |
| 916 return SQLITE_ERROR; |
| 917 } |
| 918 |
| 919 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ |
| 920 int nVal; /* Number of integers output by cArg */ |
| 921 |
| 922 switch( cArg ){ |
| 923 case FTS3_MATCHINFO_NDOC: |
| 924 case FTS3_MATCHINFO_NPHRASE: |
| 925 case FTS3_MATCHINFO_NCOL: |
| 926 nVal = 1; |
| 927 break; |
| 928 |
| 929 case FTS3_MATCHINFO_AVGLENGTH: |
| 930 case FTS3_MATCHINFO_LENGTH: |
| 931 case FTS3_MATCHINFO_LCS: |
| 932 nVal = pInfo->nCol; |
| 933 break; |
| 934 |
| 935 default: |
| 936 assert( cArg==FTS3_MATCHINFO_HITS ); |
| 937 nVal = pInfo->nCol * pInfo->nPhrase * 3; |
| 938 break; |
| 939 } |
| 940 |
| 941 return nVal; |
| 942 } |
| 943 |
| 944 static int fts3MatchinfoSelectDoctotal( |
| 945 Fts3Table *pTab, |
| 946 sqlite3_stmt **ppStmt, |
| 947 sqlite3_int64 *pnDoc, |
| 948 const char **paLen |
| 949 ){ |
| 950 sqlite3_stmt *pStmt; |
| 951 const char *a; |
| 952 sqlite3_int64 nDoc; |
| 953 |
| 954 if( !*ppStmt ){ |
| 955 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); |
| 956 if( rc!=SQLITE_OK ) return rc; |
| 957 } |
| 958 pStmt = *ppStmt; |
| 959 assert( sqlite3_data_count(pStmt)==1 ); |
| 960 |
| 961 a = sqlite3_column_blob(pStmt, 0); |
| 962 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 963 if( nDoc==0 ) return SQLITE_CORRUPT; |
| 964 *pnDoc = (u32)nDoc; |
| 965 |
| 966 if( paLen ) *paLen = a; |
| 967 return SQLITE_OK; |
| 968 } |
| 969 |
| 970 /* |
| 971 ** An instance of the following structure is used to store state while |
| 972 ** iterating through a multi-column position-list corresponding to the |
| 973 ** hits for a single phrase on a single row in order to calculate the |
| 974 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. |
| 975 */ |
| 976 typedef struct LcsIterator LcsIterator; |
| 977 struct LcsIterator { |
| 978 Fts3Expr *pExpr; /* Pointer to phrase expression */ |
| 979 char *pRead; /* Cursor used to iterate through aDoclist */ |
| 980 int iPosOffset; /* Tokens count up to end of this phrase */ |
| 981 int iCol; /* Current column number */ |
| 982 int iPos; /* Current position */ |
| 983 }; |
| 984 |
| 985 /* |
| 986 ** If LcsIterator.iCol is set to the following value, the iterator has |
| 987 ** finished iterating through all offsets for all columns. |
| 988 */ |
| 989 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; |
| 990 |
| 991 static int fts3MatchinfoLcsCb( |
| 992 Fts3Expr *pExpr, /* Phrase expression node */ |
| 993 int iPhrase, /* Phrase number (numbered from zero) */ |
| 994 void *pCtx /* Pointer to MatchInfo structure */ |
| 995 ){ |
| 996 LcsIterator *aIter = (LcsIterator *)pCtx; |
| 997 aIter[iPhrase].pExpr = pExpr; |
| 998 return SQLITE_OK; |
| 999 } |
| 1000 |
| 1001 /* |
| 1002 ** Advance the iterator passed as an argument to the next position. Return |
| 1003 ** 1 if the iterator is at EOF or if it now points to the start of the |
| 1004 ** position list for the next column. |
| 1005 */ |
| 1006 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ |
| 1007 char *pRead = pIter->pRead; |
| 1008 sqlite3_int64 iRead; |
| 1009 int rc = 0; |
| 1010 |
| 1011 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 1012 if( iRead==0 ){ |
| 1013 pIter->iCol = LCS_ITERATOR_FINISHED; |
| 1014 rc = 1; |
| 1015 }else{ |
| 1016 if( iRead==1 ){ |
| 1017 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 1018 pIter->iCol = (int)iRead; |
| 1019 pIter->iPos = pIter->iPosOffset; |
| 1020 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 1021 rc = 1; |
| 1022 } |
| 1023 pIter->iPos += (int)(iRead-2); |
| 1024 } |
| 1025 |
| 1026 pIter->pRead = pRead; |
| 1027 return rc; |
| 1028 } |
| 1029 |
| 1030 /* |
| 1031 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. |
| 1032 ** |
| 1033 ** If the call is successful, the longest-common-substring lengths for each |
| 1034 ** column are written into the first nCol elements of the pInfo->aMatchinfo[] |
| 1035 ** array before returning. SQLITE_OK is returned in this case. |
| 1036 ** |
| 1037 ** Otherwise, if an error occurs, an SQLite error code is returned and the |
| 1038 ** data written to the first nCol elements of pInfo->aMatchinfo[] is |
| 1039 ** undefined. |
| 1040 */ |
| 1041 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ |
| 1042 LcsIterator *aIter; |
| 1043 int i; |
| 1044 int iCol; |
| 1045 int nToken = 0; |
| 1046 |
| 1047 /* Allocate and populate the array of LcsIterator objects. The array |
| 1048 ** contains one element for each matchable phrase in the query. |
| 1049 **/ |
| 1050 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); |
| 1051 if( !aIter ) return SQLITE_NOMEM; |
| 1052 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); |
| 1053 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); |
| 1054 for(i=0; i<pInfo->nPhrase; i++){ |
| 1055 LcsIterator *pIter = &aIter[i]; |
| 1056 nToken -= pIter->pExpr->pPhrase->nToken; |
| 1057 pIter->iPosOffset = nToken; |
| 1058 pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); |
| 1059 if( pIter->pRead ){ |
| 1060 pIter->iPos = pIter->iPosOffset; |
| 1061 fts3LcsIteratorAdvance(&aIter[i]); |
| 1062 }else{ |
| 1063 pIter->iCol = LCS_ITERATOR_FINISHED; |
| 1064 } |
| 1065 } |
| 1066 |
| 1067 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 1068 int nLcs = 0; /* LCS value for this column */ |
| 1069 int nLive = 0; /* Number of iterators in aIter not at EOF */ |
| 1070 |
| 1071 /* Loop through the iterators in aIter[]. Set nLive to the number of |
| 1072 ** iterators that point to a position-list corresponding to column iCol. |
| 1073 */ |
| 1074 for(i=0; i<pInfo->nPhrase; i++){ |
| 1075 assert( aIter[i].iCol>=iCol ); |
| 1076 if( aIter[i].iCol==iCol ) nLive++; |
| 1077 } |
| 1078 |
| 1079 /* The following loop runs until all iterators in aIter[] have finished |
| 1080 ** iterating through positions in column iCol. Exactly one of the |
| 1081 ** iterators is advanced each time the body of the loop is run. |
| 1082 */ |
| 1083 while( nLive>0 ){ |
| 1084 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ |
| 1085 int nThisLcs = 0; /* LCS for the current iterator positions */ |
| 1086 |
| 1087 for(i=0; i<pInfo->nPhrase; i++){ |
| 1088 LcsIterator *pIter = &aIter[i]; |
| 1089 if( iCol!=pIter->iCol ){ |
| 1090 /* This iterator is already at EOF for this column. */ |
| 1091 nThisLcs = 0; |
| 1092 }else{ |
| 1093 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ |
| 1094 pAdv = pIter; |
| 1095 } |
| 1096 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
| 1097 nThisLcs++; |
| 1098 }else{ |
| 1099 nThisLcs = 1; |
| 1100 } |
| 1101 if( nThisLcs>nLcs ) nLcs = nThisLcs; |
| 1102 } |
| 1103 } |
| 1104 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; |
| 1105 } |
| 1106 |
| 1107 pInfo->aMatchinfo[iCol] = nLcs; |
| 1108 } |
| 1109 |
| 1110 sqlite3_free(aIter); |
| 1111 return SQLITE_OK; |
| 1112 } |
| 1113 |
| 1114 /* |
| 1115 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to |
| 1116 ** be returned by the matchinfo() function. Argument zArg contains the |
| 1117 ** format string passed as the second argument to matchinfo (or the |
| 1118 ** default value "pcx" if no second argument was specified). The format |
| 1119 ** string has already been validated and the pInfo->aMatchinfo[] array |
| 1120 ** is guaranteed to be large enough for the output. |
| 1121 ** |
| 1122 ** If bGlobal is true, then populate all fields of the matchinfo() output. |
| 1123 ** If it is false, then assume that those fields that do not change between |
| 1124 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) |
| 1125 ** have already been populated. |
| 1126 ** |
| 1127 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 1128 ** occurs. If a value other than SQLITE_OK is returned, the state the |
| 1129 ** pInfo->aMatchinfo[] buffer is left in is undefined. |
| 1130 */ |
| 1131 static int fts3MatchinfoValues( |
| 1132 Fts3Cursor *pCsr, /* FTS3 cursor object */ |
| 1133 int bGlobal, /* True to grab the global stats */ |
| 1134 MatchInfo *pInfo, /* Matchinfo context object */ |
| 1135 const char *zArg /* Matchinfo format string */ |
| 1136 ){ |
| 1137 int rc = SQLITE_OK; |
| 1138 int i; |
| 1139 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1140 sqlite3_stmt *pSelect = 0; |
| 1141 |
| 1142 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ |
| 1143 |
| 1144 switch( zArg[i] ){ |
| 1145 case FTS3_MATCHINFO_NPHRASE: |
| 1146 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; |
| 1147 break; |
| 1148 |
| 1149 case FTS3_MATCHINFO_NCOL: |
| 1150 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
| 1151 break; |
| 1152 |
| 1153 case FTS3_MATCHINFO_NDOC: |
| 1154 if( bGlobal ){ |
| 1155 sqlite3_int64 nDoc; |
| 1156 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); |
| 1157 pInfo->aMatchinfo[0] = (u32)nDoc; |
| 1158 } |
| 1159 break; |
| 1160 |
| 1161 case FTS3_MATCHINFO_AVGLENGTH: |
| 1162 if( bGlobal ){ |
| 1163 sqlite3_int64 nDoc; /* Number of rows in table */ |
| 1164 const char *a; /* Aggregate column length array */ |
| 1165 |
| 1166 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); |
| 1167 if( rc==SQLITE_OK ){ |
| 1168 int iCol; |
| 1169 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 1170 u32 iVal; |
| 1171 sqlite3_int64 nToken; |
| 1172 a += sqlite3Fts3GetVarint(a, &nToken); |
| 1173 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); |
| 1174 pInfo->aMatchinfo[iCol] = iVal; |
| 1175 } |
| 1176 } |
| 1177 } |
| 1178 break; |
| 1179 |
| 1180 case FTS3_MATCHINFO_LENGTH: { |
| 1181 sqlite3_stmt *pSelectDocsize = 0; |
| 1182 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); |
| 1183 if( rc==SQLITE_OK ){ |
| 1184 int iCol; |
| 1185 const char *a = sqlite3_column_blob(pSelectDocsize, 0); |
| 1186 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 1187 sqlite3_int64 nToken; |
| 1188 a += sqlite3Fts3GetVarint(a, &nToken); |
| 1189 pInfo->aMatchinfo[iCol] = (u32)nToken; |
| 1190 } |
| 1191 } |
| 1192 sqlite3_reset(pSelectDocsize); |
| 1193 break; |
| 1194 } |
| 1195 |
| 1196 case FTS3_MATCHINFO_LCS: |
| 1197 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 1198 if( rc==SQLITE_OK ){ |
| 1199 rc = fts3MatchinfoLcs(pCsr, pInfo); |
| 1200 } |
| 1201 break; |
| 1202 |
| 1203 default: { |
| 1204 Fts3Expr *pExpr; |
| 1205 assert( zArg[i]==FTS3_MATCHINFO_HITS ); |
| 1206 pExpr = pCsr->pExpr; |
| 1207 rc = fts3ExprLoadDoclists(pCsr, 0, 0); |
| 1208 if( rc!=SQLITE_OK ) break; |
| 1209 if( bGlobal ){ |
| 1210 if( pCsr->pDeferred ){ |
| 1211 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); |
| 1212 if( rc!=SQLITE_OK ) break; |
| 1213 } |
| 1214 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); |
| 1215 if( rc!=SQLITE_OK ) break; |
| 1216 } |
| 1217 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); |
| 1218 break; |
| 1219 } |
| 1220 } |
| 1221 |
| 1222 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); |
| 1223 } |
| 1224 |
| 1225 sqlite3_reset(pSelect); |
| 1226 return rc; |
| 1227 } |
| 1228 |
| 1229 |
| 1230 /* |
| 1231 ** Populate pCsr->aMatchinfo[] with data for the current row. The |
| 1232 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). |
| 1233 */ |
| 1234 static int fts3GetMatchinfo( |
| 1235 Fts3Cursor *pCsr, /* FTS3 Cursor object */ |
| 1236 const char *zArg /* Second argument to matchinfo() function */ |
| 1237 ){ |
| 1238 MatchInfo sInfo; |
| 1239 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1240 int rc = SQLITE_OK; |
| 1241 int bGlobal = 0; /* Collect 'global' stats as well as local */ |
| 1242 |
| 1243 memset(&sInfo, 0, sizeof(MatchInfo)); |
| 1244 sInfo.pCursor = pCsr; |
| 1245 sInfo.nCol = pTab->nColumn; |
| 1246 |
| 1247 /* If there is cached matchinfo() data, but the format string for the |
| 1248 ** cache does not match the format string for this request, discard |
| 1249 ** the cached data. */ |
| 1250 if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ |
| 1251 assert( pCsr->aMatchinfo ); |
| 1252 sqlite3_free(pCsr->aMatchinfo); |
| 1253 pCsr->zMatchinfo = 0; |
| 1254 pCsr->aMatchinfo = 0; |
| 1255 } |
| 1256 |
| 1257 /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the |
| 1258 ** matchinfo function has been called for this query. In this case |
| 1259 ** allocate the array used to accumulate the matchinfo data and |
| 1260 ** initialize those elements that are constant for every row. |
| 1261 */ |
| 1262 if( pCsr->aMatchinfo==0 ){ |
| 1263 int nMatchinfo = 0; /* Number of u32 elements in match-info */ |
| 1264 int nArg; /* Bytes in zArg */ |
| 1265 int i; /* Used to iterate through zArg */ |
| 1266 |
| 1267 /* Determine the number of phrases in the query */ |
| 1268 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); |
| 1269 sInfo.nPhrase = pCsr->nPhrase; |
| 1270 |
| 1271 /* Determine the number of integers in the buffer returned by this call. */ |
| 1272 for(i=0; zArg[i]; i++){ |
| 1273 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); |
| 1274 } |
| 1275 |
| 1276 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ |
| 1277 nArg = (int)strlen(zArg); |
| 1278 pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); |
| 1279 if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; |
| 1280 |
| 1281 pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; |
| 1282 pCsr->nMatchinfo = nMatchinfo; |
| 1283 memcpy(pCsr->zMatchinfo, zArg, nArg+1); |
| 1284 memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); |
| 1285 pCsr->isMatchinfoNeeded = 1; |
| 1286 bGlobal = 1; |
| 1287 } |
| 1288 |
| 1289 sInfo.aMatchinfo = pCsr->aMatchinfo; |
| 1290 sInfo.nPhrase = pCsr->nPhrase; |
| 1291 if( pCsr->isMatchinfoNeeded ){ |
| 1292 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); |
| 1293 pCsr->isMatchinfoNeeded = 0; |
| 1294 } |
| 1295 |
| 1296 return rc; |
| 1297 } |
| 1298 |
| 1299 /* |
| 1300 ** Implementation of snippet() function. |
| 1301 */ |
| 1302 void sqlite3Fts3Snippet( |
| 1303 sqlite3_context *pCtx, /* SQLite function call context */ |
| 1304 Fts3Cursor *pCsr, /* Cursor object */ |
| 1305 const char *zStart, /* Snippet start text - "<b>" */ |
| 1306 const char *zEnd, /* Snippet end text - "</b>" */ |
| 1307 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ |
| 1308 int iCol, /* Extract snippet from this column */ |
| 1309 int nToken /* Approximate number of tokens in snippet */ |
| 1310 ){ |
| 1311 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1312 int rc = SQLITE_OK; |
| 1313 int i; |
| 1314 StrBuffer res = {0, 0, 0}; |
| 1315 |
| 1316 /* The returned text includes up to four fragments of text extracted from |
| 1317 ** the data in the current row. The first iteration of the for(...) loop |
| 1318 ** below attempts to locate a single fragment of text nToken tokens in |
| 1319 ** size that contains at least one instance of all phrases in the query |
| 1320 ** expression that appear in the current row. If such a fragment of text |
| 1321 ** cannot be found, the second iteration of the loop attempts to locate |
| 1322 ** a pair of fragments, and so on. |
| 1323 */ |
| 1324 int nSnippet = 0; /* Number of fragments in this snippet */ |
| 1325 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ |
| 1326 int nFToken = -1; /* Number of tokens in each fragment */ |
| 1327 |
| 1328 if( !pCsr->pExpr ){ |
| 1329 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 1330 return; |
| 1331 } |
| 1332 |
| 1333 for(nSnippet=1; 1; nSnippet++){ |
| 1334 |
| 1335 int iSnip; /* Loop counter 0..nSnippet-1 */ |
| 1336 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ |
| 1337 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ |
| 1338 |
| 1339 if( nToken>=0 ){ |
| 1340 nFToken = (nToken+nSnippet-1) / nSnippet; |
| 1341 }else{ |
| 1342 nFToken = -1 * nToken; |
| 1343 } |
| 1344 |
| 1345 for(iSnip=0; iSnip<nSnippet; iSnip++){ |
| 1346 int iBestScore = -1; /* Best score of columns checked so far */ |
| 1347 int iRead; /* Used to iterate through columns */ |
| 1348 SnippetFragment *pFragment = &aSnippet[iSnip]; |
| 1349 |
| 1350 memset(pFragment, 0, sizeof(*pFragment)); |
| 1351 |
| 1352 /* Loop through all columns of the table being considered for snippets. |
| 1353 ** If the iCol argument to this function was negative, this means all |
| 1354 ** columns of the FTS3 table. Otherwise, only column iCol is considered. |
| 1355 */ |
| 1356 for(iRead=0; iRead<pTab->nColumn; iRead++){ |
| 1357 SnippetFragment sF = {0, 0, 0, 0}; |
| 1358 int iS; |
| 1359 if( iCol>=0 && iRead!=iCol ) continue; |
| 1360 |
| 1361 /* Find the best snippet of nFToken tokens in column iRead. */ |
| 1362 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); |
| 1363 if( rc!=SQLITE_OK ){ |
| 1364 goto snippet_out; |
| 1365 } |
| 1366 if( iS>iBestScore ){ |
| 1367 *pFragment = sF; |
| 1368 iBestScore = iS; |
| 1369 } |
| 1370 } |
| 1371 |
| 1372 mCovered |= pFragment->covered; |
| 1373 } |
| 1374 |
| 1375 /* If all query phrases seen by fts3BestSnippet() are present in at least |
| 1376 ** one of the nSnippet snippet fragments, break out of the loop. |
| 1377 */ |
| 1378 assert( (mCovered&mSeen)==mCovered ); |
| 1379 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; |
| 1380 } |
| 1381 |
| 1382 assert( nFToken>0 ); |
| 1383 |
| 1384 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ |
| 1385 rc = fts3SnippetText(pCsr, &aSnippet[i], |
| 1386 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res |
| 1387 ); |
| 1388 } |
| 1389 |
| 1390 snippet_out: |
| 1391 sqlite3Fts3SegmentsClose(pTab); |
| 1392 if( rc!=SQLITE_OK ){ |
| 1393 sqlite3_result_error_code(pCtx, rc); |
| 1394 sqlite3_free(res.z); |
| 1395 }else{ |
| 1396 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); |
| 1397 } |
| 1398 } |
| 1399 |
| 1400 |
| 1401 typedef struct TermOffset TermOffset; |
| 1402 typedef struct TermOffsetCtx TermOffsetCtx; |
| 1403 |
| 1404 struct TermOffset { |
| 1405 char *pList; /* Position-list */ |
| 1406 int iPos; /* Position just read from pList */ |
| 1407 int iOff; /* Offset of this term from read positions */ |
| 1408 }; |
| 1409 |
| 1410 struct TermOffsetCtx { |
| 1411 int iCol; /* Column of table to populate aTerm for */ |
| 1412 int iTerm; |
| 1413 sqlite3_int64 iDocid; |
| 1414 TermOffset *aTerm; |
| 1415 }; |
| 1416 |
| 1417 /* |
| 1418 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). |
| 1419 */ |
| 1420 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 1421 TermOffsetCtx *p = (TermOffsetCtx *)ctx; |
| 1422 int nTerm; /* Number of tokens in phrase */ |
| 1423 int iTerm; /* For looping through nTerm phrase terms */ |
| 1424 char *pList; /* Pointer to position list for phrase */ |
| 1425 int iPos = 0; /* First position in position-list */ |
| 1426 |
| 1427 UNUSED_PARAMETER(iPhrase); |
| 1428 pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); |
| 1429 nTerm = pExpr->pPhrase->nToken; |
| 1430 if( pList ){ |
| 1431 fts3GetDeltaPosition(&pList, &iPos); |
| 1432 assert( iPos>=0 ); |
| 1433 } |
| 1434 |
| 1435 for(iTerm=0; iTerm<nTerm; iTerm++){ |
| 1436 TermOffset *pT = &p->aTerm[p->iTerm++]; |
| 1437 pT->iOff = nTerm-iTerm-1; |
| 1438 pT->pList = pList; |
| 1439 pT->iPos = iPos; |
| 1440 } |
| 1441 |
| 1442 return SQLITE_OK; |
| 1443 } |
| 1444 |
| 1445 /* |
| 1446 ** Implementation of offsets() function. |
| 1447 */ |
| 1448 void sqlite3Fts3Offsets( |
| 1449 sqlite3_context *pCtx, /* SQLite function call context */ |
| 1450 Fts3Cursor *pCsr /* Cursor object */ |
| 1451 ){ |
| 1452 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1453 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; |
| 1454 const char *ZDUMMY; /* Dummy argument used with xNext() */ |
| 1455 int NDUMMY; /* Dummy argument used with xNext() */ |
| 1456 int rc; /* Return Code */ |
| 1457 int nToken; /* Number of tokens in query */ |
| 1458 int iCol; /* Column currently being processed */ |
| 1459 StrBuffer res = {0, 0, 0}; /* Result string */ |
| 1460 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ |
| 1461 |
| 1462 if( !pCsr->pExpr ){ |
| 1463 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 1464 return; |
| 1465 } |
| 1466 |
| 1467 memset(&sCtx, 0, sizeof(sCtx)); |
| 1468 assert( pCsr->isRequireSeek==0 ); |
| 1469 |
| 1470 /* Count the number of terms in the query */ |
| 1471 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); |
| 1472 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1473 |
| 1474 /* Allocate the array of TermOffset iterators. */ |
| 1475 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); |
| 1476 if( 0==sCtx.aTerm ){ |
| 1477 rc = SQLITE_NOMEM; |
| 1478 goto offsets_out; |
| 1479 } |
| 1480 sCtx.iDocid = pCsr->iPrevId; |
| 1481 |
| 1482 /* Loop through the table columns, appending offset information to |
| 1483 ** string-buffer res for each column. |
| 1484 */ |
| 1485 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 1486 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ |
| 1487 int iStart; |
| 1488 int iEnd; |
| 1489 int iCurrent; |
| 1490 const char *zDoc; |
| 1491 int nDoc; |
| 1492 |
| 1493 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is |
| 1494 ** no way that this operation can fail, so the return code from |
| 1495 ** fts3ExprIterate() can be discarded. |
| 1496 */ |
| 1497 sCtx.iCol = iCol; |
| 1498 sCtx.iTerm = 0; |
| 1499 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); |
| 1500 |
| 1501 /* Retreive the text stored in column iCol. If an SQL NULL is stored |
| 1502 ** in column iCol, jump immediately to the next iteration of the loop. |
| 1503 ** If an OOM occurs while retrieving the data (this can happen if SQLite |
| 1504 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM |
| 1505 ** to the caller. |
| 1506 */ |
| 1507 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); |
| 1508 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
| 1509 if( zDoc==0 ){ |
| 1510 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ |
| 1511 continue; |
| 1512 } |
| 1513 rc = SQLITE_NOMEM; |
| 1514 goto offsets_out; |
| 1515 } |
| 1516 |
| 1517 /* Initialize a tokenizer iterator to iterate through column iCol. */ |
| 1518 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); |
| 1519 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1520 pC->pTokenizer = pTab->pTokenizer; |
| 1521 |
| 1522 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 1523 while( rc==SQLITE_OK ){ |
| 1524 int i; /* Used to loop through terms */ |
| 1525 int iMinPos = 0x7FFFFFFF; /* Position of next token */ |
| 1526 TermOffset *pTerm = 0; /* TermOffset associated with next token */ |
| 1527 |
| 1528 for(i=0; i<nToken; i++){ |
| 1529 TermOffset *pT = &sCtx.aTerm[i]; |
| 1530 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ |
| 1531 iMinPos = pT->iPos-pT->iOff; |
| 1532 pTerm = pT; |
| 1533 } |
| 1534 } |
| 1535 |
| 1536 if( !pTerm ){ |
| 1537 /* All offsets for this column have been gathered. */ |
| 1538 break; |
| 1539 }else{ |
| 1540 assert( iCurrent<=iMinPos ); |
| 1541 if( 0==(0xFE&*pTerm->pList) ){ |
| 1542 pTerm->pList = 0; |
| 1543 }else{ |
| 1544 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); |
| 1545 } |
| 1546 while( rc==SQLITE_OK && iCurrent<iMinPos ){ |
| 1547 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 1548 } |
| 1549 if( rc==SQLITE_OK ){ |
| 1550 char aBuffer[64]; |
| 1551 sqlite3_snprintf(sizeof(aBuffer), aBuffer, |
| 1552 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart |
| 1553 ); |
| 1554 rc = fts3StringAppend(&res, aBuffer, -1); |
| 1555 }else if( rc==SQLITE_DONE ){ |
| 1556 rc = SQLITE_CORRUPT; |
| 1557 } |
| 1558 } |
| 1559 } |
| 1560 if( rc==SQLITE_DONE ){ |
| 1561 rc = SQLITE_OK; |
| 1562 } |
| 1563 |
| 1564 pMod->xClose(pC); |
| 1565 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1566 } |
| 1567 |
| 1568 offsets_out: |
| 1569 sqlite3_free(sCtx.aTerm); |
| 1570 assert( rc!=SQLITE_DONE ); |
| 1571 sqlite3Fts3SegmentsClose(pTab); |
| 1572 if( rc!=SQLITE_OK ){ |
| 1573 sqlite3_result_error_code(pCtx, rc); |
| 1574 sqlite3_free(res.z); |
| 1575 }else{ |
| 1576 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); |
| 1577 } |
| 1578 return; |
| 1579 } |
| 1580 |
| 1581 /* |
| 1582 ** Implementation of matchinfo() function. |
| 1583 */ |
| 1584 void sqlite3Fts3Matchinfo( |
| 1585 sqlite3_context *pContext, /* Function call context */ |
| 1586 Fts3Cursor *pCsr, /* FTS3 table cursor */ |
| 1587 const char *zArg /* Second arg to matchinfo() function */ |
| 1588 ){ |
| 1589 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1590 int rc; |
| 1591 int i; |
| 1592 const char *zFormat; |
| 1593 |
| 1594 if( zArg ){ |
| 1595 for(i=0; zArg[i]; i++){ |
| 1596 char *zErr = 0; |
| 1597 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ |
| 1598 sqlite3_result_error(pContext, zErr, -1); |
| 1599 sqlite3_free(zErr); |
| 1600 return; |
| 1601 } |
| 1602 } |
| 1603 zFormat = zArg; |
| 1604 }else{ |
| 1605 zFormat = FTS3_MATCHINFO_DEFAULT; |
| 1606 } |
| 1607 |
| 1608 if( !pCsr->pExpr ){ |
| 1609 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); |
| 1610 return; |
| 1611 } |
| 1612 |
| 1613 /* Retrieve matchinfo() data. */ |
| 1614 rc = fts3GetMatchinfo(pCsr, zFormat); |
| 1615 sqlite3Fts3SegmentsClose(pTab); |
| 1616 |
| 1617 if( rc!=SQLITE_OK ){ |
| 1618 sqlite3_result_error_code(pContext, rc); |
| 1619 }else{ |
| 1620 int n = pCsr->nMatchinfo * sizeof(u32); |
| 1621 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); |
| 1622 } |
| 1623 } |
| 1624 |
| 1625 #endif |
OLD | NEW |