OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2014 May 31 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 */ |
| 13 |
| 14 |
| 15 #include "fts5Int.h" |
| 16 #include <math.h> /* amalgamator: keep */ |
| 17 |
| 18 /* |
| 19 ** Object used to iterate through all "coalesced phrase instances" in |
| 20 ** a single column of the current row. If the phrase instances in the |
| 21 ** column being considered do not overlap, this object simply iterates |
| 22 ** through them. Or, if they do overlap (share one or more tokens in |
| 23 ** common), each set of overlapping instances is treated as a single |
| 24 ** match. See documentation for the highlight() auxiliary function for |
| 25 ** details. |
| 26 ** |
| 27 ** Usage is: |
| 28 ** |
| 29 ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); |
| 30 ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); |
| 31 ** rc = fts5CInstIterNext(&iter) |
| 32 ** ){ |
| 33 ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); |
| 34 ** } |
| 35 ** |
| 36 */ |
| 37 typedef struct CInstIter CInstIter; |
| 38 struct CInstIter { |
| 39 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ |
| 40 Fts5Context *pFts; /* First arg to pass to pApi functions */ |
| 41 int iCol; /* Column to search */ |
| 42 int iInst; /* Next phrase instance index */ |
| 43 int nInst; /* Total number of phrase instances */ |
| 44 |
| 45 /* Output variables */ |
| 46 int iStart; /* First token in coalesced phrase instance */ |
| 47 int iEnd; /* Last token in coalesced phrase instance */ |
| 48 }; |
| 49 |
| 50 /* |
| 51 ** Advance the iterator to the next coalesced phrase instance. Return |
| 52 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
| 53 */ |
| 54 static int fts5CInstIterNext(CInstIter *pIter){ |
| 55 int rc = SQLITE_OK; |
| 56 pIter->iStart = -1; |
| 57 pIter->iEnd = -1; |
| 58 |
| 59 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ |
| 60 int ip; int ic; int io; |
| 61 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); |
| 62 if( rc==SQLITE_OK ){ |
| 63 if( ic==pIter->iCol ){ |
| 64 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); |
| 65 if( pIter->iStart<0 ){ |
| 66 pIter->iStart = io; |
| 67 pIter->iEnd = iEnd; |
| 68 }else if( io<=pIter->iEnd ){ |
| 69 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; |
| 70 }else{ |
| 71 break; |
| 72 } |
| 73 } |
| 74 pIter->iInst++; |
| 75 } |
| 76 } |
| 77 |
| 78 return rc; |
| 79 } |
| 80 |
| 81 /* |
| 82 ** Initialize the iterator object indicated by the final parameter to |
| 83 ** iterate through coalesced phrase instances in column iCol. |
| 84 */ |
| 85 static int fts5CInstIterInit( |
| 86 const Fts5ExtensionApi *pApi, |
| 87 Fts5Context *pFts, |
| 88 int iCol, |
| 89 CInstIter *pIter |
| 90 ){ |
| 91 int rc; |
| 92 |
| 93 memset(pIter, 0, sizeof(CInstIter)); |
| 94 pIter->pApi = pApi; |
| 95 pIter->pFts = pFts; |
| 96 pIter->iCol = iCol; |
| 97 rc = pApi->xInstCount(pFts, &pIter->nInst); |
| 98 |
| 99 if( rc==SQLITE_OK ){ |
| 100 rc = fts5CInstIterNext(pIter); |
| 101 } |
| 102 |
| 103 return rc; |
| 104 } |
| 105 |
| 106 |
| 107 |
| 108 /************************************************************************* |
| 109 ** Start of highlight() implementation. |
| 110 */ |
| 111 typedef struct HighlightContext HighlightContext; |
| 112 struct HighlightContext { |
| 113 CInstIter iter; /* Coalesced Instance Iterator */ |
| 114 int iPos; /* Current token offset in zIn[] */ |
| 115 int iRangeStart; /* First token to include */ |
| 116 int iRangeEnd; /* If non-zero, last token to include */ |
| 117 const char *zOpen; /* Opening highlight */ |
| 118 const char *zClose; /* Closing highlight */ |
| 119 const char *zIn; /* Input text */ |
| 120 int nIn; /* Size of input text in bytes */ |
| 121 int iOff; /* Current offset within zIn[] */ |
| 122 char *zOut; /* Output value */ |
| 123 }; |
| 124 |
| 125 /* |
| 126 ** Append text to the HighlightContext output string - p->zOut. Argument |
| 127 ** z points to a buffer containing n bytes of text to append. If n is |
| 128 ** negative, everything up until the first '\0' is appended to the output. |
| 129 ** |
| 130 ** If *pRc is set to any value other than SQLITE_OK when this function is |
| 131 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, |
| 132 ** *pRc is set to an error code before returning. |
| 133 */ |
| 134 static void fts5HighlightAppend( |
| 135 int *pRc, |
| 136 HighlightContext *p, |
| 137 const char *z, int n |
| 138 ){ |
| 139 if( *pRc==SQLITE_OK ){ |
| 140 if( n<0 ) n = (int)strlen(z); |
| 141 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); |
| 142 if( p->zOut==0 ) *pRc = SQLITE_NOMEM; |
| 143 } |
| 144 } |
| 145 |
| 146 /* |
| 147 ** Tokenizer callback used by implementation of highlight() function. |
| 148 */ |
| 149 static int fts5HighlightCb( |
| 150 void *pContext, /* Pointer to HighlightContext object */ |
| 151 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 152 const char *pToken, /* Buffer containing token */ |
| 153 int nToken, /* Size of token in bytes */ |
| 154 int iStartOff, /* Start offset of token */ |
| 155 int iEndOff /* End offset of token */ |
| 156 ){ |
| 157 HighlightContext *p = (HighlightContext*)pContext; |
| 158 int rc = SQLITE_OK; |
| 159 int iPos; |
| 160 |
| 161 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; |
| 162 iPos = p->iPos++; |
| 163 |
| 164 if( p->iRangeEnd>0 ){ |
| 165 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; |
| 166 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; |
| 167 } |
| 168 |
| 169 if( iPos==p->iter.iStart ){ |
| 170 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); |
| 171 fts5HighlightAppend(&rc, p, p->zOpen, -1); |
| 172 p->iOff = iStartOff; |
| 173 } |
| 174 |
| 175 if( iPos==p->iter.iEnd ){ |
| 176 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ |
| 177 fts5HighlightAppend(&rc, p, p->zOpen, -1); |
| 178 } |
| 179 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
| 180 fts5HighlightAppend(&rc, p, p->zClose, -1); |
| 181 p->iOff = iEndOff; |
| 182 if( rc==SQLITE_OK ){ |
| 183 rc = fts5CInstIterNext(&p->iter); |
| 184 } |
| 185 } |
| 186 |
| 187 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ |
| 188 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
| 189 p->iOff = iEndOff; |
| 190 if( iPos<p->iter.iEnd ){ |
| 191 fts5HighlightAppend(&rc, p, p->zClose, -1); |
| 192 } |
| 193 } |
| 194 |
| 195 return rc; |
| 196 } |
| 197 |
| 198 /* |
| 199 ** Implementation of highlight() function. |
| 200 */ |
| 201 static void fts5HighlightFunction( |
| 202 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 203 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 204 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 205 int nVal, /* Number of values in apVal[] array */ |
| 206 sqlite3_value **apVal /* Array of trailing arguments */ |
| 207 ){ |
| 208 HighlightContext ctx; |
| 209 int rc; |
| 210 int iCol; |
| 211 |
| 212 if( nVal!=3 ){ |
| 213 const char *zErr = "wrong number of arguments to function highlight()"; |
| 214 sqlite3_result_error(pCtx, zErr, -1); |
| 215 return; |
| 216 } |
| 217 |
| 218 iCol = sqlite3_value_int(apVal[0]); |
| 219 memset(&ctx, 0, sizeof(HighlightContext)); |
| 220 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
| 221 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
| 222 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); |
| 223 |
| 224 if( ctx.zIn ){ |
| 225 if( rc==SQLITE_OK ){ |
| 226 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); |
| 227 } |
| 228 |
| 229 if( rc==SQLITE_OK ){ |
| 230 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
| 231 } |
| 232 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
| 233 |
| 234 if( rc==SQLITE_OK ){ |
| 235 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
| 236 } |
| 237 sqlite3_free(ctx.zOut); |
| 238 } |
| 239 if( rc!=SQLITE_OK ){ |
| 240 sqlite3_result_error_code(pCtx, rc); |
| 241 } |
| 242 } |
| 243 /* |
| 244 ** End of highlight() implementation. |
| 245 **************************************************************************/ |
| 246 |
| 247 /* |
| 248 ** Implementation of snippet() function. |
| 249 */ |
| 250 static void fts5SnippetFunction( |
| 251 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 252 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 253 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 254 int nVal, /* Number of values in apVal[] array */ |
| 255 sqlite3_value **apVal /* Array of trailing arguments */ |
| 256 ){ |
| 257 HighlightContext ctx; |
| 258 int rc = SQLITE_OK; /* Return code */ |
| 259 int iCol; /* 1st argument to snippet() */ |
| 260 const char *zEllips; /* 4th argument to snippet() */ |
| 261 int nToken; /* 5th argument to snippet() */ |
| 262 int nInst = 0; /* Number of instance matches this row */ |
| 263 int i; /* Used to iterate through instances */ |
| 264 int nPhrase; /* Number of phrases in query */ |
| 265 unsigned char *aSeen; /* Array of "seen instance" flags */ |
| 266 int iBestCol; /* Column containing best snippet */ |
| 267 int iBestStart = 0; /* First token of best snippet */ |
| 268 int iBestLast; /* Last token of best snippet */ |
| 269 int nBestScore = 0; /* Score of best snippet */ |
| 270 int nColSize = 0; /* Total size of iBestCol in tokens */ |
| 271 |
| 272 if( nVal!=5 ){ |
| 273 const char *zErr = "wrong number of arguments to function snippet()"; |
| 274 sqlite3_result_error(pCtx, zErr, -1); |
| 275 return; |
| 276 } |
| 277 |
| 278 memset(&ctx, 0, sizeof(HighlightContext)); |
| 279 iCol = sqlite3_value_int(apVal[0]); |
| 280 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
| 281 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
| 282 zEllips = (const char*)sqlite3_value_text(apVal[3]); |
| 283 nToken = sqlite3_value_int(apVal[4]); |
| 284 iBestLast = nToken-1; |
| 285 |
| 286 iBestCol = (iCol>=0 ? iCol : 0); |
| 287 nPhrase = pApi->xPhraseCount(pFts); |
| 288 aSeen = sqlite3_malloc(nPhrase); |
| 289 if( aSeen==0 ){ |
| 290 rc = SQLITE_NOMEM; |
| 291 } |
| 292 |
| 293 if( rc==SQLITE_OK ){ |
| 294 rc = pApi->xInstCount(pFts, &nInst); |
| 295 } |
| 296 for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
| 297 int ip, iSnippetCol, iStart; |
| 298 memset(aSeen, 0, nPhrase); |
| 299 rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart); |
| 300 if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ |
| 301 int nScore = 1000; |
| 302 int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); |
| 303 int j; |
| 304 aSeen[ip] = 1; |
| 305 |
| 306 for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ |
| 307 int ic; int io; int iFinal; |
| 308 rc = pApi->xInst(pFts, j, &ip, &ic, &io); |
| 309 iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; |
| 310 if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){ |
| 311 nScore += aSeen[ip] ? 1000 : 1; |
| 312 aSeen[ip] = 1; |
| 313 if( iFinal>iLast ) iLast = iFinal; |
| 314 } |
| 315 } |
| 316 |
| 317 if( rc==SQLITE_OK && nScore>nBestScore ){ |
| 318 iBestCol = iSnippetCol; |
| 319 iBestStart = iStart; |
| 320 iBestLast = iLast; |
| 321 nBestScore = nScore; |
| 322 } |
| 323 } |
| 324 } |
| 325 |
| 326 if( rc==SQLITE_OK ){ |
| 327 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); |
| 328 } |
| 329 if( rc==SQLITE_OK ){ |
| 330 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); |
| 331 } |
| 332 if( ctx.zIn ){ |
| 333 if( rc==SQLITE_OK ){ |
| 334 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); |
| 335 } |
| 336 |
| 337 if( (iBestStart+nToken-1)>iBestLast ){ |
| 338 iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; |
| 339 } |
| 340 if( iBestStart+nToken>nColSize ){ |
| 341 iBestStart = nColSize - nToken; |
| 342 } |
| 343 if( iBestStart<0 ) iBestStart = 0; |
| 344 |
| 345 ctx.iRangeStart = iBestStart; |
| 346 ctx.iRangeEnd = iBestStart + nToken - 1; |
| 347 |
| 348 if( iBestStart>0 ){ |
| 349 fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
| 350 } |
| 351 if( rc==SQLITE_OK ){ |
| 352 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
| 353 } |
| 354 if( ctx.iRangeEnd>=(nColSize-1) ){ |
| 355 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
| 356 }else{ |
| 357 fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
| 358 } |
| 359 |
| 360 if( rc==SQLITE_OK ){ |
| 361 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
| 362 }else{ |
| 363 sqlite3_result_error_code(pCtx, rc); |
| 364 } |
| 365 sqlite3_free(ctx.zOut); |
| 366 } |
| 367 sqlite3_free(aSeen); |
| 368 } |
| 369 |
| 370 /************************************************************************/ |
| 371 |
| 372 /* |
| 373 ** The first time the bm25() function is called for a query, an instance |
| 374 ** of the following structure is allocated and populated. |
| 375 */ |
| 376 typedef struct Fts5Bm25Data Fts5Bm25Data; |
| 377 struct Fts5Bm25Data { |
| 378 int nPhrase; /* Number of phrases in query */ |
| 379 double avgdl; /* Average number of tokens in each row */ |
| 380 double *aIDF; /* IDF for each phrase */ |
| 381 double *aFreq; /* Array used to calculate phrase freq. */ |
| 382 }; |
| 383 |
| 384 /* |
| 385 ** Callback used by fts5Bm25GetData() to count the number of rows in the |
| 386 ** table matched by each individual phrase within the query. |
| 387 */ |
| 388 static int fts5CountCb( |
| 389 const Fts5ExtensionApi *pApi, |
| 390 Fts5Context *pFts, |
| 391 void *pUserData /* Pointer to sqlite3_int64 variable */ |
| 392 ){ |
| 393 sqlite3_int64 *pn = (sqlite3_int64*)pUserData; |
| 394 (*pn)++; |
| 395 return SQLITE_OK; |
| 396 } |
| 397 |
| 398 /* |
| 399 ** Set *ppData to point to the Fts5Bm25Data object for the current query. |
| 400 ** If the object has not already been allocated, allocate and populate it |
| 401 ** now. |
| 402 */ |
| 403 static int fts5Bm25GetData( |
| 404 const Fts5ExtensionApi *pApi, |
| 405 Fts5Context *pFts, |
| 406 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ |
| 407 ){ |
| 408 int rc = SQLITE_OK; /* Return code */ |
| 409 Fts5Bm25Data *p; /* Object to return */ |
| 410 |
| 411 p = pApi->xGetAuxdata(pFts, 0); |
| 412 if( p==0 ){ |
| 413 int nPhrase; /* Number of phrases in query */ |
| 414 sqlite3_int64 nRow = 0; /* Number of rows in table */ |
| 415 sqlite3_int64 nToken = 0; /* Number of tokens in table */ |
| 416 int nByte; /* Bytes of space to allocate */ |
| 417 int i; |
| 418 |
| 419 /* Allocate the Fts5Bm25Data object */ |
| 420 nPhrase = pApi->xPhraseCount(pFts); |
| 421 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); |
| 422 p = (Fts5Bm25Data*)sqlite3_malloc(nByte); |
| 423 if( p==0 ){ |
| 424 rc = SQLITE_NOMEM; |
| 425 }else{ |
| 426 memset(p, 0, nByte); |
| 427 p->nPhrase = nPhrase; |
| 428 p->aIDF = (double*)&p[1]; |
| 429 p->aFreq = &p->aIDF[nPhrase]; |
| 430 } |
| 431 |
| 432 /* Calculate the average document length for this FTS5 table */ |
| 433 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); |
| 434 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); |
| 435 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; |
| 436 |
| 437 /* Calculate an IDF for each phrase in the query */ |
| 438 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ |
| 439 sqlite3_int64 nHit = 0; |
| 440 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); |
| 441 if( rc==SQLITE_OK ){ |
| 442 /* Calculate the IDF (Inverse Document Frequency) for phrase i. |
| 443 ** This is done using the standard BM25 formula as found on wikipedia: |
| 444 ** |
| 445 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) |
| 446 ** |
| 447 ** where "N" is the total number of documents in the set and nHit |
| 448 ** is the number that contain at least one instance of the phrase |
| 449 ** under consideration. |
| 450 ** |
| 451 ** The problem with this is that if (N < 2*nHit), the IDF is |
| 452 ** negative. Which is undesirable. So the mimimum allowable IDF is |
| 453 ** (1e-6) - roughly the same as a term that appears in just over |
| 454 ** half of set of 5,000,000 documents. */ |
| 455 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); |
| 456 if( idf<=0.0 ) idf = 1e-6; |
| 457 p->aIDF[i] = idf; |
| 458 } |
| 459 } |
| 460 |
| 461 if( rc!=SQLITE_OK ){ |
| 462 sqlite3_free(p); |
| 463 }else{ |
| 464 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); |
| 465 } |
| 466 if( rc!=SQLITE_OK ) p = 0; |
| 467 } |
| 468 *ppData = p; |
| 469 return rc; |
| 470 } |
| 471 |
| 472 /* |
| 473 ** Implementation of bm25() function. |
| 474 */ |
| 475 static void fts5Bm25Function( |
| 476 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
| 477 Fts5Context *pFts, /* First arg to pass to pApi functions */ |
| 478 sqlite3_context *pCtx, /* Context for returning result/error */ |
| 479 int nVal, /* Number of values in apVal[] array */ |
| 480 sqlite3_value **apVal /* Array of trailing arguments */ |
| 481 ){ |
| 482 const double k1 = 1.2; /* Constant "k1" from BM25 formula */ |
| 483 const double b = 0.75; /* Constant "b" from BM25 formula */ |
| 484 int rc = SQLITE_OK; /* Error code */ |
| 485 double score = 0.0; /* SQL function return value */ |
| 486 Fts5Bm25Data *pData; /* Values allocated/calculated once only */ |
| 487 int i; /* Iterator variable */ |
| 488 int nInst = 0; /* Value returned by xInstCount() */ |
| 489 double D = 0.0; /* Total number of tokens in row */ |
| 490 double *aFreq = 0; /* Array of phrase freq. for current row */ |
| 491 |
| 492 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) |
| 493 ** for each phrase in the query for the current row. */ |
| 494 rc = fts5Bm25GetData(pApi, pFts, &pData); |
| 495 if( rc==SQLITE_OK ){ |
| 496 aFreq = pData->aFreq; |
| 497 memset(aFreq, 0, sizeof(double) * pData->nPhrase); |
| 498 rc = pApi->xInstCount(pFts, &nInst); |
| 499 } |
| 500 for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
| 501 int ip; int ic; int io; |
| 502 rc = pApi->xInst(pFts, i, &ip, &ic, &io); |
| 503 if( rc==SQLITE_OK ){ |
| 504 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; |
| 505 aFreq[ip] += w; |
| 506 } |
| 507 } |
| 508 |
| 509 /* Figure out the total size of the current row in tokens. */ |
| 510 if( rc==SQLITE_OK ){ |
| 511 int nTok; |
| 512 rc = pApi->xColumnSize(pFts, -1, &nTok); |
| 513 D = (double)nTok; |
| 514 } |
| 515 |
| 516 /* Determine the BM25 score for the current row. */ |
| 517 for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){ |
| 518 score += pData->aIDF[i] * ( |
| 519 ( aFreq[i] * (k1 + 1.0) ) / |
| 520 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) |
| 521 ); |
| 522 } |
| 523 |
| 524 /* If no error has occurred, return the calculated score. Otherwise, |
| 525 ** throw an SQL exception. */ |
| 526 if( rc==SQLITE_OK ){ |
| 527 sqlite3_result_double(pCtx, -1.0 * score); |
| 528 }else{ |
| 529 sqlite3_result_error_code(pCtx, rc); |
| 530 } |
| 531 } |
| 532 |
| 533 int sqlite3Fts5AuxInit(fts5_api *pApi){ |
| 534 struct Builtin { |
| 535 const char *zFunc; /* Function name (nul-terminated) */ |
| 536 void *pUserData; /* User-data pointer */ |
| 537 fts5_extension_function xFunc;/* Callback function */ |
| 538 void (*xDestroy)(void*); /* Destructor function */ |
| 539 } aBuiltin [] = { |
| 540 { "snippet", 0, fts5SnippetFunction, 0 }, |
| 541 { "highlight", 0, fts5HighlightFunction, 0 }, |
| 542 { "bm25", 0, fts5Bm25Function, 0 }, |
| 543 }; |
| 544 int rc = SQLITE_OK; /* Return code */ |
| 545 int i; /* To iterate through builtin functions */ |
| 546 |
| 547 for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ |
| 548 rc = pApi->xCreateFunction(pApi, |
| 549 aBuiltin[i].zFunc, |
| 550 aBuiltin[i].pUserData, |
| 551 aBuiltin[i].xFunc, |
| 552 aBuiltin[i].xDestroy |
| 553 ); |
| 554 } |
| 555 |
| 556 return rc; |
| 557 } |
| 558 |
| 559 |
OLD | NEW |