Index: third_party/sqlite/src/ext/fts5/fts5_aux.c |
diff --git a/third_party/sqlite/src/ext/fts5/fts5_aux.c b/third_party/sqlite/src/ext/fts5/fts5_aux.c |
index 011064d4058ee7fca7744424bdb7f06295b4bf45..219ea6fff8f504798f684900967a584f2e1983e7 100644 |
--- a/third_party/sqlite/src/ext/fts5/fts5_aux.c |
+++ b/third_party/sqlite/src/ext/fts5/fts5_aux.c |
@@ -158,6 +158,8 @@ static int fts5HighlightCb( |
int rc = SQLITE_OK; |
int iPos; |
+ UNUSED_PARAM2(pToken, nToken); |
+ |
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; |
iPos = p->iPos++; |
@@ -187,7 +189,7 @@ static int fts5HighlightCb( |
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ |
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
p->iOff = iEndOff; |
- if( iPos<p->iter.iEnd ){ |
+ if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){ |
fts5HighlightAppend(&rc, p, p->zClose, -1); |
} |
} |
@@ -245,6 +247,118 @@ static void fts5HighlightFunction( |
**************************************************************************/ |
/* |
+** Context object passed to the fts5SentenceFinderCb() function. |
+*/ |
+typedef struct Fts5SFinder Fts5SFinder; |
+struct Fts5SFinder { |
+ int iPos; /* Current token position */ |
+ int nFirstAlloc; /* Allocated size of aFirst[] */ |
+ int nFirst; /* Number of entries in aFirst[] */ |
+ int *aFirst; /* Array of first token in each sentence */ |
+ const char *zDoc; /* Document being tokenized */ |
+}; |
+ |
+/* |
+** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if |
+** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an |
+** error occurs. |
+*/ |
+static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ |
+ if( p->nFirstAlloc==p->nFirst ){ |
+ int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; |
+ int *aNew; |
+ |
+ aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int)); |
+ if( aNew==0 ) return SQLITE_NOMEM; |
+ p->aFirst = aNew; |
+ p->nFirstAlloc = nNew; |
+ } |
+ p->aFirst[p->nFirst++] = iAdd; |
+ return SQLITE_OK; |
+} |
+ |
+/* |
+** This function is an xTokenize() callback used by the auxiliary snippet() |
+** function. Its job is to identify tokens that are the first in a sentence. |
+** For each such token, an entry is added to the SFinder.aFirst[] array. |
+*/ |
+static int fts5SentenceFinderCb( |
+ void *pContext, /* Pointer to HighlightContext object */ |
+ int tflags, /* Mask of FTS5_TOKEN_* flags */ |
+ const char *pToken, /* Buffer containing token */ |
+ int nToken, /* Size of token in bytes */ |
+ int iStartOff, /* Start offset of token */ |
+ int iEndOff /* End offset of token */ |
+){ |
+ int rc = SQLITE_OK; |
+ |
+ UNUSED_PARAM2(pToken, nToken); |
+ UNUSED_PARAM(iEndOff); |
+ |
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ |
+ Fts5SFinder *p = (Fts5SFinder*)pContext; |
+ if( p->iPos>0 ){ |
+ int i; |
+ char c = 0; |
+ for(i=iStartOff-1; i>=0; i--){ |
+ c = p->zDoc[i]; |
+ if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; |
+ } |
+ if( i!=iStartOff-1 && (c=='.' || c==':') ){ |
+ rc = fts5SentenceFinderAdd(p, p->iPos); |
+ } |
+ }else{ |
+ rc = fts5SentenceFinderAdd(p, 0); |
+ } |
+ p->iPos++; |
+ } |
+ return rc; |
+} |
+ |
+static int fts5SnippetScore( |
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
+ Fts5Context *pFts, /* First arg to pass to pApi functions */ |
+ int nDocsize, /* Size of column in tokens */ |
+ unsigned char *aSeen, /* Array with one element per query phrase */ |
+ int iCol, /* Column to score */ |
+ int iPos, /* Starting offset to score */ |
+ int nToken, /* Max tokens per snippet */ |
+ int *pnScore, /* OUT: Score */ |
+ int *piPos /* OUT: Adjusted offset */ |
+){ |
+ int rc; |
+ int i; |
+ int ip = 0; |
+ int ic = 0; |
+ int iOff = 0; |
+ int iFirst = -1; |
+ int nInst; |
+ int nScore = 0; |
+ int iLast = 0; |
+ |
+ rc = pApi->xInstCount(pFts, &nInst); |
+ for(i=0; i<nInst && rc==SQLITE_OK; i++){ |
+ rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); |
+ if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){ |
+ nScore += (aSeen[ip] ? 1 : 1000); |
+ aSeen[ip] = 1; |
+ if( iFirst<0 ) iFirst = iOff; |
+ iLast = iOff + pApi->xPhraseSize(pFts, ip); |
+ } |
+ } |
+ |
+ *pnScore = nScore; |
+ if( piPos ){ |
+ int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; |
+ if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; |
+ if( iAdj<0 ) iAdj = 0; |
+ *piPos = iAdj; |
+ } |
+ |
+ return rc; |
+} |
+ |
+/* |
** Implementation of snippet() function. |
*/ |
static void fts5SnippetFunction( |
@@ -265,9 +379,10 @@ static void fts5SnippetFunction( |
unsigned char *aSeen; /* Array of "seen instance" flags */ |
int iBestCol; /* Column containing best snippet */ |
int iBestStart = 0; /* First token of best snippet */ |
- int iBestLast; /* Last token of best snippet */ |
int nBestScore = 0; /* Score of best snippet */ |
int nColSize = 0; /* Total size of iBestCol in tokens */ |
+ Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ |
+ int nCol; |
if( nVal!=5 ){ |
const char *zErr = "wrong number of arguments to function snippet()"; |
@@ -275,13 +390,13 @@ static void fts5SnippetFunction( |
return; |
} |
+ nCol = pApi->xColumnCount(pFts); |
memset(&ctx, 0, sizeof(HighlightContext)); |
iCol = sqlite3_value_int(apVal[0]); |
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
zEllips = (const char*)sqlite3_value_text(apVal[3]); |
nToken = sqlite3_value_int(apVal[4]); |
- iBestLast = nToken-1; |
iBestCol = (iCol>=0 ? iCol : 0); |
nPhrase = pApi->xPhraseCount(pFts); |
@@ -289,65 +404,94 @@ static void fts5SnippetFunction( |
if( aSeen==0 ){ |
rc = SQLITE_NOMEM; |
} |
- |
if( rc==SQLITE_OK ){ |
rc = pApi->xInstCount(pFts, &nInst); |
} |
- for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
- int ip, iSnippetCol, iStart; |
- memset(aSeen, 0, nPhrase); |
- rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart); |
- if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ |
- int nScore = 1000; |
- int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); |
- int j; |
- aSeen[ip] = 1; |
- for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ |
- int ic; int io; int iFinal; |
- rc = pApi->xInst(pFts, j, &ip, &ic, &io); |
- iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; |
- if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){ |
- nScore += aSeen[ip] ? 1000 : 1; |
- aSeen[ip] = 1; |
- if( iFinal>iLast ) iLast = iFinal; |
+ memset(&sFinder, 0, sizeof(Fts5SFinder)); |
+ for(i=0; i<nCol; i++){ |
+ if( iCol<0 || iCol==i ){ |
+ int nDoc; |
+ int nDocsize; |
+ int ii; |
+ sFinder.iPos = 0; |
+ sFinder.nFirst = 0; |
+ rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); |
+ if( rc!=SQLITE_OK ) break; |
+ rc = pApi->xTokenize(pFts, |
+ sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb |
+ ); |
+ if( rc!=SQLITE_OK ) break; |
+ rc = pApi->xColumnSize(pFts, i, &nDocsize); |
+ if( rc!=SQLITE_OK ) break; |
+ |
+ for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){ |
+ int ip, ic, io; |
+ int iAdj; |
+ int nScore; |
+ int jj; |
+ |
+ rc = pApi->xInst(pFts, ii, &ip, &ic, &io); |
+ if( ic!=i || rc!=SQLITE_OK ) continue; |
+ memset(aSeen, 0, nPhrase); |
+ rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
+ io, nToken, &nScore, &iAdj |
+ ); |
+ if( rc==SQLITE_OK && nScore>nBestScore ){ |
+ nBestScore = nScore; |
+ iBestCol = i; |
+ iBestStart = iAdj; |
+ nColSize = nDocsize; |
} |
- } |
- if( rc==SQLITE_OK && nScore>nBestScore ){ |
- iBestCol = iSnippetCol; |
- iBestStart = iStart; |
- iBestLast = iLast; |
- nBestScore = nScore; |
+ if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ |
+ for(jj=0; jj<(sFinder.nFirst-1); jj++){ |
+ if( sFinder.aFirst[jj+1]>io ) break; |
+ } |
+ |
+ if( sFinder.aFirst[jj]<io ){ |
+ memset(aSeen, 0, nPhrase); |
+ rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
+ sFinder.aFirst[jj], nToken, &nScore, 0 |
+ ); |
+ |
+ nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); |
+ if( rc==SQLITE_OK && nScore>nBestScore ){ |
+ nBestScore = nScore; |
+ iBestCol = i; |
+ iBestStart = sFinder.aFirst[jj]; |
+ nColSize = nDocsize; |
+ } |
+ } |
+ } |
} |
} |
} |
if( rc==SQLITE_OK ){ |
- rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); |
- } |
- if( rc==SQLITE_OK ){ |
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); |
} |
+ if( rc==SQLITE_OK && nColSize==0 ){ |
+ rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); |
+ } |
if( ctx.zIn ){ |
if( rc==SQLITE_OK ){ |
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); |
} |
- if( (iBestStart+nToken-1)>iBestLast ){ |
- iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; |
- } |
- if( iBestStart+nToken>nColSize ){ |
- iBestStart = nColSize - nToken; |
- } |
- if( iBestStart<0 ) iBestStart = 0; |
- |
ctx.iRangeStart = iBestStart; |
ctx.iRangeEnd = iBestStart + nToken - 1; |
if( iBestStart>0 ){ |
fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
} |
+ |
+ /* Advance iterator ctx.iter so that it points to the first coalesced |
+ ** phrase instance at or following position iBestStart. */ |
+ while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){ |
+ rc = fts5CInstIterNext(&ctx.iter); |
+ } |
+ |
if( rc==SQLITE_OK ){ |
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
} |
@@ -356,15 +500,15 @@ static void fts5SnippetFunction( |
}else{ |
fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
} |
- |
- if( rc==SQLITE_OK ){ |
- sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
- }else{ |
- sqlite3_result_error_code(pCtx, rc); |
- } |
- sqlite3_free(ctx.zOut); |
} |
+ if( rc==SQLITE_OK ){ |
+ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
+ }else{ |
+ sqlite3_result_error_code(pCtx, rc); |
+ } |
+ sqlite3_free(ctx.zOut); |
sqlite3_free(aSeen); |
+ sqlite3_free(sFinder.aFirst); |
} |
/************************************************************************/ |
@@ -391,6 +535,7 @@ static int fts5CountCb( |
void *pUserData /* Pointer to sqlite3_int64 variable */ |
){ |
sqlite3_int64 *pn = (sqlite3_int64*)pUserData; |
+ UNUSED_PARAM2(pApi, pFts); |
(*pn)++; |
return SQLITE_OK; |
} |
@@ -544,7 +689,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){ |
int rc = SQLITE_OK; /* Return code */ |
int i; /* To iterate through builtin functions */ |
- for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){ |
+ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ |
rc = pApi->xCreateFunction(pApi, |
aBuiltin[i].zFunc, |
aBuiltin[i].pUserData, |