| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ** 2014 May 31 | |
| 3 ** | |
| 4 ** The author disclaims copyright to this source code. In place of | |
| 5 ** a legal notice, here is a blessing: | |
| 6 ** | |
| 7 ** May you do good and not evil. | |
| 8 ** May you find forgiveness for yourself and forgive others. | |
| 9 ** May you share freely, never taking more than you give. | |
| 10 ** | |
| 11 ****************************************************************************** | |
| 12 */ | |
| 13 | |
| 14 | |
| 15 #include "fts5Int.h" | |
| 16 #include <math.h> /* amalgamator: keep */ | |
| 17 | |
| 18 /* | |
| 19 ** Object used to iterate through all "coalesced phrase instances" in | |
| 20 ** a single column of the current row. If the phrase instances in the | |
| 21 ** column being considered do not overlap, this object simply iterates | |
| 22 ** through them. Or, if they do overlap (share one or more tokens in | |
| 23 ** common), each set of overlapping instances is treated as a single | |
| 24 ** match. See documentation for the highlight() auxiliary function for | |
| 25 ** details. | |
| 26 ** | |
| 27 ** Usage is: | |
| 28 ** | |
| 29 ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); | |
| 30 ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); | |
| 31 ** rc = fts5CInstIterNext(&iter) | |
| 32 ** ){ | |
| 33 ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); | |
| 34 ** } | |
| 35 ** | |
| 36 */ | |
| 37 typedef struct CInstIter CInstIter; | |
| 38 struct CInstIter { | |
| 39 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ | |
| 40 Fts5Context *pFts; /* First arg to pass to pApi functions */ | |
| 41 int iCol; /* Column to search */ | |
| 42 int iInst; /* Next phrase instance index */ | |
| 43 int nInst; /* Total number of phrase instances */ | |
| 44 | |
| 45 /* Output variables */ | |
| 46 int iStart; /* First token in coalesced phrase instance */ | |
| 47 int iEnd; /* Last token in coalesced phrase instance */ | |
| 48 }; | |
| 49 | |
| 50 /* | |
| 51 ** Advance the iterator to the next coalesced phrase instance. Return | |
| 52 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. | |
| 53 */ | |
| 54 static int fts5CInstIterNext(CInstIter *pIter){ | |
| 55 int rc = SQLITE_OK; | |
| 56 pIter->iStart = -1; | |
| 57 pIter->iEnd = -1; | |
| 58 | |
| 59 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ | |
| 60 int ip; int ic; int io; | |
| 61 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); | |
| 62 if( rc==SQLITE_OK ){ | |
| 63 if( ic==pIter->iCol ){ | |
| 64 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); | |
| 65 if( pIter->iStart<0 ){ | |
| 66 pIter->iStart = io; | |
| 67 pIter->iEnd = iEnd; | |
| 68 }else if( io<=pIter->iEnd ){ | |
| 69 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; | |
| 70 }else{ | |
| 71 break; | |
| 72 } | |
| 73 } | |
| 74 pIter->iInst++; | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 return rc; | |
| 79 } | |
| 80 | |
| 81 /* | |
| 82 ** Initialize the iterator object indicated by the final parameter to | |
| 83 ** iterate through coalesced phrase instances in column iCol. | |
| 84 */ | |
| 85 static int fts5CInstIterInit( | |
| 86 const Fts5ExtensionApi *pApi, | |
| 87 Fts5Context *pFts, | |
| 88 int iCol, | |
| 89 CInstIter *pIter | |
| 90 ){ | |
| 91 int rc; | |
| 92 | |
| 93 memset(pIter, 0, sizeof(CInstIter)); | |
| 94 pIter->pApi = pApi; | |
| 95 pIter->pFts = pFts; | |
| 96 pIter->iCol = iCol; | |
| 97 rc = pApi->xInstCount(pFts, &pIter->nInst); | |
| 98 | |
| 99 if( rc==SQLITE_OK ){ | |
| 100 rc = fts5CInstIterNext(pIter); | |
| 101 } | |
| 102 | |
| 103 return rc; | |
| 104 } | |
| 105 | |
| 106 | |
| 107 | |
| 108 /************************************************************************* | |
| 109 ** Start of highlight() implementation. | |
| 110 */ | |
| 111 typedef struct HighlightContext HighlightContext; | |
| 112 struct HighlightContext { | |
| 113 CInstIter iter; /* Coalesced Instance Iterator */ | |
| 114 int iPos; /* Current token offset in zIn[] */ | |
| 115 int iRangeStart; /* First token to include */ | |
| 116 int iRangeEnd; /* If non-zero, last token to include */ | |
| 117 const char *zOpen; /* Opening highlight */ | |
| 118 const char *zClose; /* Closing highlight */ | |
| 119 const char *zIn; /* Input text */ | |
| 120 int nIn; /* Size of input text in bytes */ | |
| 121 int iOff; /* Current offset within zIn[] */ | |
| 122 char *zOut; /* Output value */ | |
| 123 }; | |
| 124 | |
| 125 /* | |
| 126 ** Append text to the HighlightContext output string - p->zOut. Argument | |
| 127 ** z points to a buffer containing n bytes of text to append. If n is | |
| 128 ** negative, everything up until the first '\0' is appended to the output. | |
| 129 ** | |
| 130 ** If *pRc is set to any value other than SQLITE_OK when this function is | |
| 131 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, | |
| 132 ** *pRc is set to an error code before returning. | |
| 133 */ | |
| 134 static void fts5HighlightAppend( | |
| 135 int *pRc, | |
| 136 HighlightContext *p, | |
| 137 const char *z, int n | |
| 138 ){ | |
| 139 if( *pRc==SQLITE_OK ){ | |
| 140 if( n<0 ) n = (int)strlen(z); | |
| 141 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); | |
| 142 if( p->zOut==0 ) *pRc = SQLITE_NOMEM; | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 /* | |
| 147 ** Tokenizer callback used by implementation of highlight() function. | |
| 148 */ | |
| 149 static int fts5HighlightCb( | |
| 150 void *pContext, /* Pointer to HighlightContext object */ | |
| 151 int tflags, /* Mask of FTS5_TOKEN_* flags */ | |
| 152 const char *pToken, /* Buffer containing token */ | |
| 153 int nToken, /* Size of token in bytes */ | |
| 154 int iStartOff, /* Start offset of token */ | |
| 155 int iEndOff /* End offset of token */ | |
| 156 ){ | |
| 157 HighlightContext *p = (HighlightContext*)pContext; | |
| 158 int rc = SQLITE_OK; | |
| 159 int iPos; | |
| 160 | |
| 161 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; | |
| 162 iPos = p->iPos++; | |
| 163 | |
| 164 if( p->iRangeEnd>0 ){ | |
| 165 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; | |
| 166 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; | |
| 167 } | |
| 168 | |
| 169 if( iPos==p->iter.iStart ){ | |
| 170 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); | |
| 171 fts5HighlightAppend(&rc, p, p->zOpen, -1); | |
| 172 p->iOff = iStartOff; | |
| 173 } | |
| 174 | |
| 175 if( iPos==p->iter.iEnd ){ | |
| 176 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ | |
| 177 fts5HighlightAppend(&rc, p, p->zOpen, -1); | |
| 178 } | |
| 179 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |
| 180 fts5HighlightAppend(&rc, p, p->zClose, -1); | |
| 181 p->iOff = iEndOff; | |
| 182 if( rc==SQLITE_OK ){ | |
| 183 rc = fts5CInstIterNext(&p->iter); | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ | |
| 188 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); | |
| 189 p->iOff = iEndOff; | |
| 190 if( iPos<p->iter.iEnd ){ | |
| 191 fts5HighlightAppend(&rc, p, p->zClose, -1); | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 return rc; | |
| 196 } | |
| 197 | |
| 198 /* | |
| 199 ** Implementation of highlight() function. | |
| 200 */ | |
| 201 static void fts5HighlightFunction( | |
| 202 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |
| 203 Fts5Context *pFts, /* First arg to pass to pApi functions */ | |
| 204 sqlite3_context *pCtx, /* Context for returning result/error */ | |
| 205 int nVal, /* Number of values in apVal[] array */ | |
| 206 sqlite3_value **apVal /* Array of trailing arguments */ | |
| 207 ){ | |
| 208 HighlightContext ctx; | |
| 209 int rc; | |
| 210 int iCol; | |
| 211 | |
| 212 if( nVal!=3 ){ | |
| 213 const char *zErr = "wrong number of arguments to function highlight()"; | |
| 214 sqlite3_result_error(pCtx, zErr, -1); | |
| 215 return; | |
| 216 } | |
| 217 | |
| 218 iCol = sqlite3_value_int(apVal[0]); | |
| 219 memset(&ctx, 0, sizeof(HighlightContext)); | |
| 220 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); | |
| 221 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); | |
| 222 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); | |
| 223 | |
| 224 if( ctx.zIn ){ | |
| 225 if( rc==SQLITE_OK ){ | |
| 226 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); | |
| 227 } | |
| 228 | |
| 229 if( rc==SQLITE_OK ){ | |
| 230 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); | |
| 231 } | |
| 232 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |
| 233 | |
| 234 if( rc==SQLITE_OK ){ | |
| 235 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); | |
| 236 } | |
| 237 sqlite3_free(ctx.zOut); | |
| 238 } | |
| 239 if( rc!=SQLITE_OK ){ | |
| 240 sqlite3_result_error_code(pCtx, rc); | |
| 241 } | |
| 242 } | |
| 243 /* | |
| 244 ** End of highlight() implementation. | |
| 245 **************************************************************************/ | |
| 246 | |
| 247 /* | |
| 248 ** Implementation of snippet() function. | |
| 249 */ | |
| 250 static void fts5SnippetFunction( | |
| 251 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |
| 252 Fts5Context *pFts, /* First arg to pass to pApi functions */ | |
| 253 sqlite3_context *pCtx, /* Context for returning result/error */ | |
| 254 int nVal, /* Number of values in apVal[] array */ | |
| 255 sqlite3_value **apVal /* Array of trailing arguments */ | |
| 256 ){ | |
| 257 HighlightContext ctx; | |
| 258 int rc = SQLITE_OK; /* Return code */ | |
| 259 int iCol; /* 1st argument to snippet() */ | |
| 260 const char *zEllips; /* 4th argument to snippet() */ | |
| 261 int nToken; /* 5th argument to snippet() */ | |
| 262 int nInst = 0; /* Number of instance matches this row */ | |
| 263 int i; /* Used to iterate through instances */ | |
| 264 int nPhrase; /* Number of phrases in query */ | |
| 265 unsigned char *aSeen; /* Array of "seen instance" flags */ | |
| 266 int iBestCol; /* Column containing best snippet */ | |
| 267 int iBestStart = 0; /* First token of best snippet */ | |
| 268 int iBestLast; /* Last token of best snippet */ | |
| 269 int nBestScore = 0; /* Score of best snippet */ | |
| 270 int nColSize = 0; /* Total size of iBestCol in tokens */ | |
| 271 | |
| 272 if( nVal!=5 ){ | |
| 273 const char *zErr = "wrong number of arguments to function snippet()"; | |
| 274 sqlite3_result_error(pCtx, zErr, -1); | |
| 275 return; | |
| 276 } | |
| 277 | |
| 278 memset(&ctx, 0, sizeof(HighlightContext)); | |
| 279 iCol = sqlite3_value_int(apVal[0]); | |
| 280 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); | |
| 281 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); | |
| 282 zEllips = (const char*)sqlite3_value_text(apVal[3]); | |
| 283 nToken = sqlite3_value_int(apVal[4]); | |
| 284 iBestLast = nToken-1; | |
| 285 | |
| 286 iBestCol = (iCol>=0 ? iCol : 0); | |
| 287 nPhrase = pApi->xPhraseCount(pFts); | |
| 288 aSeen = sqlite3_malloc(nPhrase); | |
| 289 if( aSeen==0 ){ | |
| 290 rc = SQLITE_NOMEM; | |
| 291 } | |
| 292 | |
| 293 if( rc==SQLITE_OK ){ | |
| 294 rc = pApi->xInstCount(pFts, &nInst); | |
| 295 } | |
| 296 for(i=0; rc==SQLITE_OK && i<nInst; i++){ | |
| 297 int ip, iSnippetCol, iStart; | |
| 298 memset(aSeen, 0, nPhrase); | |
| 299 rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart); | |
| 300 if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ | |
| 301 int nScore = 1000; | |
| 302 int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); | |
| 303 int j; | |
| 304 aSeen[ip] = 1; | |
| 305 | |
| 306 for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ | |
| 307 int ic; int io; int iFinal; | |
| 308 rc = pApi->xInst(pFts, j, &ip, &ic, &io); | |
| 309 iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; | |
| 310 if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){ | |
| 311 nScore += aSeen[ip] ? 1000 : 1; | |
| 312 aSeen[ip] = 1; | |
| 313 if( iFinal>iLast ) iLast = iFinal; | |
| 314 } | |
| 315 } | |
| 316 | |
| 317 if( rc==SQLITE_OK && nScore>nBestScore ){ | |
| 318 iBestCol = iSnippetCol; | |
| 319 iBestStart = iStart; | |
| 320 iBestLast = iLast; | |
| 321 nBestScore = nScore; | |
| 322 } | |
| 323 } | |
| 324 } | |
| 325 | |
| 326 if( rc==SQLITE_OK ){ | |
| 327 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); | |
| 328 } | |
| 329 if( rc==SQLITE_OK ){ | |
| 330 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); | |
| 331 } | |
| 332 if( ctx.zIn ){ | |
| 333 if( rc==SQLITE_OK ){ | |
| 334 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); | |
| 335 } | |
| 336 | |
| 337 if( (iBestStart+nToken-1)>iBestLast ){ | |
| 338 iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; | |
| 339 } | |
| 340 if( iBestStart+nToken>nColSize ){ | |
| 341 iBestStart = nColSize - nToken; | |
| 342 } | |
| 343 if( iBestStart<0 ) iBestStart = 0; | |
| 344 | |
| 345 ctx.iRangeStart = iBestStart; | |
| 346 ctx.iRangeEnd = iBestStart + nToken - 1; | |
| 347 | |
| 348 if( iBestStart>0 ){ | |
| 349 fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |
| 350 } | |
| 351 if( rc==SQLITE_OK ){ | |
| 352 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); | |
| 353 } | |
| 354 if( ctx.iRangeEnd>=(nColSize-1) ){ | |
| 355 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); | |
| 356 }else{ | |
| 357 fts5HighlightAppend(&rc, &ctx, zEllips, -1); | |
| 358 } | |
| 359 | |
| 360 if( rc==SQLITE_OK ){ | |
| 361 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); | |
| 362 }else{ | |
| 363 sqlite3_result_error_code(pCtx, rc); | |
| 364 } | |
| 365 sqlite3_free(ctx.zOut); | |
| 366 } | |
| 367 sqlite3_free(aSeen); | |
| 368 } | |
| 369 | |
| 370 /************************************************************************/ | |
| 371 | |
| 372 /* | |
| 373 ** The first time the bm25() function is called for a query, an instance | |
| 374 ** of the following structure is allocated and populated. | |
| 375 */ | |
| 376 typedef struct Fts5Bm25Data Fts5Bm25Data; | |
| 377 struct Fts5Bm25Data { | |
| 378 int nPhrase; /* Number of phrases in query */ | |
| 379 double avgdl; /* Average number of tokens in each row */ | |
| 380 double *aIDF; /* IDF for each phrase */ | |
| 381 double *aFreq; /* Array used to calculate phrase freq. */ | |
| 382 }; | |
| 383 | |
| 384 /* | |
| 385 ** Callback used by fts5Bm25GetData() to count the number of rows in the | |
| 386 ** table matched by each individual phrase within the query. | |
| 387 */ | |
| 388 static int fts5CountCb( | |
| 389 const Fts5ExtensionApi *pApi, | |
| 390 Fts5Context *pFts, | |
| 391 void *pUserData /* Pointer to sqlite3_int64 variable */ | |
| 392 ){ | |
| 393 sqlite3_int64 *pn = (sqlite3_int64*)pUserData; | |
| 394 (*pn)++; | |
| 395 return SQLITE_OK; | |
| 396 } | |
| 397 | |
| 398 /* | |
| 399 ** Set *ppData to point to the Fts5Bm25Data object for the current query. | |
| 400 ** If the object has not already been allocated, allocate and populate it | |
| 401 ** now. | |
| 402 */ | |
| 403 static int fts5Bm25GetData( | |
| 404 const Fts5ExtensionApi *pApi, | |
| 405 Fts5Context *pFts, | |
| 406 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ | |
| 407 ){ | |
| 408 int rc = SQLITE_OK; /* Return code */ | |
| 409 Fts5Bm25Data *p; /* Object to return */ | |
| 410 | |
| 411 p = pApi->xGetAuxdata(pFts, 0); | |
| 412 if( p==0 ){ | |
| 413 int nPhrase; /* Number of phrases in query */ | |
| 414 sqlite3_int64 nRow = 0; /* Number of rows in table */ | |
| 415 sqlite3_int64 nToken = 0; /* Number of tokens in table */ | |
| 416 int nByte; /* Bytes of space to allocate */ | |
| 417 int i; | |
| 418 | |
| 419 /* Allocate the Fts5Bm25Data object */ | |
| 420 nPhrase = pApi->xPhraseCount(pFts); | |
| 421 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); | |
| 422 p = (Fts5Bm25Data*)sqlite3_malloc(nByte); | |
| 423 if( p==0 ){ | |
| 424 rc = SQLITE_NOMEM; | |
| 425 }else{ | |
| 426 memset(p, 0, nByte); | |
| 427 p->nPhrase = nPhrase; | |
| 428 p->aIDF = (double*)&p[1]; | |
| 429 p->aFreq = &p->aIDF[nPhrase]; | |
| 430 } | |
| 431 | |
| 432 /* Calculate the average document length for this FTS5 table */ | |
| 433 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); | |
| 434 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); | |
| 435 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; | |
| 436 | |
| 437 /* Calculate an IDF for each phrase in the query */ | |
| 438 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ | |
| 439 sqlite3_int64 nHit = 0; | |
| 440 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); | |
| 441 if( rc==SQLITE_OK ){ | |
| 442 /* Calculate the IDF (Inverse Document Frequency) for phrase i. | |
| 443 ** This is done using the standard BM25 formula as found on wikipedia: | |
| 444 ** | |
| 445 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) | |
| 446 ** | |
| 447 ** where "N" is the total number of documents in the set and nHit | |
| 448 ** is the number that contain at least one instance of the phrase | |
| 449 ** under consideration. | |
| 450 ** | |
| 451 ** The problem with this is that if (N < 2*nHit), the IDF is | |
| 452 ** negative. Which is undesirable. So the mimimum allowable IDF is | |
| 453 ** (1e-6) - roughly the same as a term that appears in just over | |
| 454 ** half of set of 5,000,000 documents. */ | |
| 455 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); | |
| 456 if( idf<=0.0 ) idf = 1e-6; | |
| 457 p->aIDF[i] = idf; | |
| 458 } | |
| 459 } | |
| 460 | |
| 461 if( rc!=SQLITE_OK ){ | |
| 462 sqlite3_free(p); | |
| 463 }else{ | |
| 464 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); | |
| 465 } | |
| 466 if( rc!=SQLITE_OK ) p = 0; | |
| 467 } | |
| 468 *ppData = p; | |
| 469 return rc; | |
| 470 } | |
| 471 | |
| 472 /* | |
| 473 ** Implementation of bm25() function. | |
| 474 */ | |
| 475 static void fts5Bm25Function( | |
| 476 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ | |
| 477 Fts5Context *pFts, /* First arg to pass to pApi functions */ | |
| 478 sqlite3_context *pCtx, /* Context for returning result/error */ | |
| 479 int nVal, /* Number of values in apVal[] array */ | |
| 480 sqlite3_value **apVal /* Array of trailing arguments */ | |
| 481 ){ | |
| 482 const double k1 = 1.2; /* Constant "k1" from BM25 formula */ | |
| 483 const double b = 0.75; /* Constant "b" from BM25 formula */ | |
| 484 int rc = SQLITE_OK; /* Error code */ | |
| 485 double score = 0.0; /* SQL function return value */ | |
| 486 Fts5Bm25Data *pData; /* Values allocated/calculated once only */ | |
| 487 int i; /* Iterator variable */ | |
| 488 int nInst = 0; /* Value returned by xInstCount() */ | |
| 489 double D = 0.0; /* Total number of tokens in row */ | |
| 490 double *aFreq = 0; /* Array of phrase freq. for current row */ | |
| 491 | |
| 492 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) | |
| 493 ** for each phrase in the query for the current row. */ | |
| 494 rc = fts5Bm25GetData(pApi, pFts, &pData); | |
| 495 if( rc==SQLITE_OK ){ | |
| 496 aFreq = pData->aFreq; | |
| 497 memset(aFreq, 0, sizeof(double) * pData->nPhrase); | |
| 498 rc = pApi->xInstCount(pFts, &nInst); | |
| 499 } | |
| 500 for(i=0; rc==SQLITE_OK && i<nInst; i++){ | |
| 501 int ip; int ic; int io; | |
| 502 rc = pApi->xInst(pFts, i, &ip, &ic, &io); | |
| 503 if( rc==SQLITE_OK ){ | |
| 504 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; | |
| 505 aFreq[ip] += w; | |
| 506 } | |
| 507 } | |
| 508 | |
| 509 /* Figure out the total size of the current row in tokens. */ | |
| 510 if( rc==SQLITE_OK ){ | |
| 511 int nTok; | |
| 512 rc = pApi->xColumnSize(pFts, -1, &nTok); | |
| 513 D = (double)nTok; | |
| 514 } | |
| 515 | |
| 516 /* Determine the BM25 score for the current row. */ | |
| 517 for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){ | |
| 518 score += pData->aIDF[i] * ( | |
| 519 ( aFreq[i] * (k1 + 1.0) ) / | |
| 520 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) | |
| 521 ); | |
| 522 } | |
| 523 | |
| 524 /* If no error has occurred, return the calculated score. Otherwise, | |
| 525 ** throw an SQL exception. */ | |
| 526 if( rc==SQLITE_OK ){ | |
| 527 sqlite3_result_double(pCtx, -1.0 * score); | |
| 528 }else{ | |
| 529 sqlite3_result_error_code(pCtx, rc); | |
| 530 } | |
| 531 } | |
| 532 | |
| 533 int sqlite3Fts5AuxInit(fts5_api *pApi){ | |
| 534 struct Builtin { | |
| 535 const char *zFunc; /* Function name (nul-terminated) */ | |
| 536 void *pUserData; /* User-data pointer */ | |
| 537 fts5_extension_function xFunc;/* Callback function */ | |
| 538 void (*xDestroy)(void*); /* Destructor function */ | |
| 539 } aBuiltin [] = { | |
| 540 { "snippet", 0, fts5SnippetFunction, 0 }, | |
| 541 { "highlight", 0, fts5HighlightFunction, 0 }, | |
| 542 { "bm25", 0, fts5Bm25Function, 0 }, | |
| 543 }; | |
| 544 int rc = SQLITE_OK; /* Return code */ | |
| 545 int i; /* To iterate through builtin functions */ | |
| 546 | |
| 547 for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ | |
| 548 rc = pApi->xCreateFunction(pApi, | |
| 549 aBuiltin[i].zFunc, | |
| 550 aBuiltin[i].pUserData, | |
| 551 aBuiltin[i].xFunc, | |
| 552 aBuiltin[i].xDestroy | |
| 553 ); | |
| 554 } | |
| 555 | |
| 556 return rc; | |
| 557 } | |
| 558 | |
| 559 | |
| OLD | NEW |