| Index: third_party/sqlite/src/ext/fts5/fts5_aux.c
|
| diff --git a/third_party/sqlite/src/ext/fts5/fts5_aux.c b/third_party/sqlite/src/ext/fts5/fts5_aux.c
|
| index 011064d4058ee7fca7744424bdb7f06295b4bf45..219ea6fff8f504798f684900967a584f2e1983e7 100644
|
| --- a/third_party/sqlite/src/ext/fts5/fts5_aux.c
|
| +++ b/third_party/sqlite/src/ext/fts5/fts5_aux.c
|
| @@ -158,6 +158,8 @@ static int fts5HighlightCb(
|
| int rc = SQLITE_OK;
|
| int iPos;
|
|
|
| + UNUSED_PARAM2(pToken, nToken);
|
| +
|
| if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
|
| iPos = p->iPos++;
|
|
|
| @@ -187,7 +189,7 @@ static int fts5HighlightCb(
|
| if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
|
| fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
| p->iOff = iEndOff;
|
| - if( iPos<p->iter.iEnd ){
|
| + if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
|
| fts5HighlightAppend(&rc, p, p->zClose, -1);
|
| }
|
| }
|
| @@ -245,6 +247,118 @@ static void fts5HighlightFunction(
|
| **************************************************************************/
|
|
|
| /*
|
| +** Context object passed to the fts5SentenceFinderCb() function.
|
| +*/
|
| +typedef struct Fts5SFinder Fts5SFinder;
|
| +struct Fts5SFinder {
|
| + int iPos; /* Current token position */
|
| + int nFirstAlloc; /* Allocated size of aFirst[] */
|
| + int nFirst; /* Number of entries in aFirst[] */
|
| + int *aFirst; /* Array of first token in each sentence */
|
| + const char *zDoc; /* Document being tokenized */
|
| +};
|
| +
|
| +/*
|
| +** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
|
| +** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
|
| +** error occurs.
|
| +*/
|
| +static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
|
| + if( p->nFirstAlloc==p->nFirst ){
|
| + int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
|
| + int *aNew;
|
| +
|
| + aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int));
|
| + if( aNew==0 ) return SQLITE_NOMEM;
|
| + p->aFirst = aNew;
|
| + p->nFirstAlloc = nNew;
|
| + }
|
| + p->aFirst[p->nFirst++] = iAdd;
|
| + return SQLITE_OK;
|
| +}
|
| +
|
| +/*
|
| +** This function is an xTokenize() callback used by the auxiliary snippet()
|
| +** function. Its job is to identify tokens that are the first in a sentence.
|
| +** For each such token, an entry is added to the SFinder.aFirst[] array.
|
| +*/
|
| +static int fts5SentenceFinderCb(
|
| + void *pContext, /* Pointer to HighlightContext object */
|
| + int tflags, /* Mask of FTS5_TOKEN_* flags */
|
| + const char *pToken, /* Buffer containing token */
|
| + int nToken, /* Size of token in bytes */
|
| + int iStartOff, /* Start offset of token */
|
| + int iEndOff /* End offset of token */
|
| +){
|
| + int rc = SQLITE_OK;
|
| +
|
| + UNUSED_PARAM2(pToken, nToken);
|
| + UNUSED_PARAM(iEndOff);
|
| +
|
| + if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
|
| + Fts5SFinder *p = (Fts5SFinder*)pContext;
|
| + if( p->iPos>0 ){
|
| + int i;
|
| + char c = 0;
|
| + for(i=iStartOff-1; i>=0; i--){
|
| + c = p->zDoc[i];
|
| + if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
|
| + }
|
| + if( i!=iStartOff-1 && (c=='.' || c==':') ){
|
| + rc = fts5SentenceFinderAdd(p, p->iPos);
|
| + }
|
| + }else{
|
| + rc = fts5SentenceFinderAdd(p, 0);
|
| + }
|
| + p->iPos++;
|
| + }
|
| + return rc;
|
| +}
|
| +
|
| +static int fts5SnippetScore(
|
| + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
| + Fts5Context *pFts, /* First arg to pass to pApi functions */
|
| + int nDocsize, /* Size of column in tokens */
|
| + unsigned char *aSeen, /* Array with one element per query phrase */
|
| + int iCol, /* Column to score */
|
| + int iPos, /* Starting offset to score */
|
| + int nToken, /* Max tokens per snippet */
|
| + int *pnScore, /* OUT: Score */
|
| + int *piPos /* OUT: Adjusted offset */
|
| +){
|
| + int rc;
|
| + int i;
|
| + int ip = 0;
|
| + int ic = 0;
|
| + int iOff = 0;
|
| + int iFirst = -1;
|
| + int nInst;
|
| + int nScore = 0;
|
| + int iLast = 0;
|
| +
|
| + rc = pApi->xInstCount(pFts, &nInst);
|
| + for(i=0; i<nInst && rc==SQLITE_OK; i++){
|
| + rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
|
| + if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){
|
| + nScore += (aSeen[ip] ? 1 : 1000);
|
| + aSeen[ip] = 1;
|
| + if( iFirst<0 ) iFirst = iOff;
|
| + iLast = iOff + pApi->xPhraseSize(pFts, ip);
|
| + }
|
| + }
|
| +
|
| + *pnScore = nScore;
|
| + if( piPos ){
|
| + int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
|
| + if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
|
| + if( iAdj<0 ) iAdj = 0;
|
| + *piPos = iAdj;
|
| + }
|
| +
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| ** Implementation of snippet() function.
|
| */
|
| static void fts5SnippetFunction(
|
| @@ -265,9 +379,10 @@ static void fts5SnippetFunction(
|
| unsigned char *aSeen; /* Array of "seen instance" flags */
|
| int iBestCol; /* Column containing best snippet */
|
| int iBestStart = 0; /* First token of best snippet */
|
| - int iBestLast; /* Last token of best snippet */
|
| int nBestScore = 0; /* Score of best snippet */
|
| int nColSize = 0; /* Total size of iBestCol in tokens */
|
| + Fts5SFinder sFinder; /* Used to find the beginnings of sentences */
|
| + int nCol;
|
|
|
| if( nVal!=5 ){
|
| const char *zErr = "wrong number of arguments to function snippet()";
|
| @@ -275,13 +390,13 @@ static void fts5SnippetFunction(
|
| return;
|
| }
|
|
|
| + nCol = pApi->xColumnCount(pFts);
|
| memset(&ctx, 0, sizeof(HighlightContext));
|
| iCol = sqlite3_value_int(apVal[0]);
|
| ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
| ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
| zEllips = (const char*)sqlite3_value_text(apVal[3]);
|
| nToken = sqlite3_value_int(apVal[4]);
|
| - iBestLast = nToken-1;
|
|
|
| iBestCol = (iCol>=0 ? iCol : 0);
|
| nPhrase = pApi->xPhraseCount(pFts);
|
| @@ -289,65 +404,94 @@ static void fts5SnippetFunction(
|
| if( aSeen==0 ){
|
| rc = SQLITE_NOMEM;
|
| }
|
| -
|
| if( rc==SQLITE_OK ){
|
| rc = pApi->xInstCount(pFts, &nInst);
|
| }
|
| - for(i=0; rc==SQLITE_OK && i<nInst; i++){
|
| - int ip, iSnippetCol, iStart;
|
| - memset(aSeen, 0, nPhrase);
|
| - rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
|
| - if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
|
| - int nScore = 1000;
|
| - int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
|
| - int j;
|
| - aSeen[ip] = 1;
|
|
|
| - for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
|
| - int ic; int io; int iFinal;
|
| - rc = pApi->xInst(pFts, j, &ip, &ic, &io);
|
| - iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
|
| - if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
|
| - nScore += aSeen[ip] ? 1000 : 1;
|
| - aSeen[ip] = 1;
|
| - if( iFinal>iLast ) iLast = iFinal;
|
| + memset(&sFinder, 0, sizeof(Fts5SFinder));
|
| + for(i=0; i<nCol; i++){
|
| + if( iCol<0 || iCol==i ){
|
| + int nDoc;
|
| + int nDocsize;
|
| + int ii;
|
| + sFinder.iPos = 0;
|
| + sFinder.nFirst = 0;
|
| + rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
|
| + if( rc!=SQLITE_OK ) break;
|
| + rc = pApi->xTokenize(pFts,
|
| + sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
|
| + );
|
| + if( rc!=SQLITE_OK ) break;
|
| + rc = pApi->xColumnSize(pFts, i, &nDocsize);
|
| + if( rc!=SQLITE_OK ) break;
|
| +
|
| + for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
|
| + int ip, ic, io;
|
| + int iAdj;
|
| + int nScore;
|
| + int jj;
|
| +
|
| + rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
|
| + if( ic!=i || rc!=SQLITE_OK ) continue;
|
| + memset(aSeen, 0, nPhrase);
|
| + rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
|
| + io, nToken, &nScore, &iAdj
|
| + );
|
| + if( rc==SQLITE_OK && nScore>nBestScore ){
|
| + nBestScore = nScore;
|
| + iBestCol = i;
|
| + iBestStart = iAdj;
|
| + nColSize = nDocsize;
|
| }
|
| - }
|
|
|
| - if( rc==SQLITE_OK && nScore>nBestScore ){
|
| - iBestCol = iSnippetCol;
|
| - iBestStart = iStart;
|
| - iBestLast = iLast;
|
| - nBestScore = nScore;
|
| + if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
|
| + for(jj=0; jj<(sFinder.nFirst-1); jj++){
|
| + if( sFinder.aFirst[jj+1]>io ) break;
|
| + }
|
| +
|
| + if( sFinder.aFirst[jj]<io ){
|
| + memset(aSeen, 0, nPhrase);
|
| + rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
|
| + sFinder.aFirst[jj], nToken, &nScore, 0
|
| + );
|
| +
|
| + nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
|
| + if( rc==SQLITE_OK && nScore>nBestScore ){
|
| + nBestScore = nScore;
|
| + iBestCol = i;
|
| + iBestStart = sFinder.aFirst[jj];
|
| + nColSize = nDocsize;
|
| + }
|
| + }
|
| + }
|
| }
|
| }
|
| }
|
|
|
| if( rc==SQLITE_OK ){
|
| - rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
|
| - }
|
| - if( rc==SQLITE_OK ){
|
| rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
|
| }
|
| + if( rc==SQLITE_OK && nColSize==0 ){
|
| + rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
|
| + }
|
| if( ctx.zIn ){
|
| if( rc==SQLITE_OK ){
|
| rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
|
| }
|
|
|
| - if( (iBestStart+nToken-1)>iBestLast ){
|
| - iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
|
| - }
|
| - if( iBestStart+nToken>nColSize ){
|
| - iBestStart = nColSize - nToken;
|
| - }
|
| - if( iBestStart<0 ) iBestStart = 0;
|
| -
|
| ctx.iRangeStart = iBestStart;
|
| ctx.iRangeEnd = iBestStart + nToken - 1;
|
|
|
| if( iBestStart>0 ){
|
| fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
| }
|
| +
|
| + /* Advance iterator ctx.iter so that it points to the first coalesced
|
| + ** phrase instance at or following position iBestStart. */
|
| + while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
|
| + rc = fts5CInstIterNext(&ctx.iter);
|
| + }
|
| +
|
| if( rc==SQLITE_OK ){
|
| rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
|
| }
|
| @@ -356,15 +500,15 @@ static void fts5SnippetFunction(
|
| }else{
|
| fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
| }
|
| -
|
| - if( rc==SQLITE_OK ){
|
| - sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
| - }else{
|
| - sqlite3_result_error_code(pCtx, rc);
|
| - }
|
| - sqlite3_free(ctx.zOut);
|
| }
|
| + if( rc==SQLITE_OK ){
|
| + sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
| + }else{
|
| + sqlite3_result_error_code(pCtx, rc);
|
| + }
|
| + sqlite3_free(ctx.zOut);
|
| sqlite3_free(aSeen);
|
| + sqlite3_free(sFinder.aFirst);
|
| }
|
|
|
| /************************************************************************/
|
| @@ -391,6 +535,7 @@ static int fts5CountCb(
|
| void *pUserData /* Pointer to sqlite3_int64 variable */
|
| ){
|
| sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
|
| + UNUSED_PARAM2(pApi, pFts);
|
| (*pn)++;
|
| return SQLITE_OK;
|
| }
|
| @@ -544,7 +689,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){
|
| int rc = SQLITE_OK; /* Return code */
|
| int i; /* To iterate through builtin functions */
|
|
|
| - for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){
|
| + for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
|
| rc = pApi->xCreateFunction(pApi,
|
| aBuiltin[i].zFunc,
|
| aBuiltin[i].pUserData,
|
|
|