Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1878)

Unified Diff: third_party/sqlite/src/ext/fts5/fts5_aux.c

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5Int.h ('k') | third_party/sqlite/src/ext/fts5/fts5_buffer.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/sqlite/src/ext/fts5/fts5_aux.c
diff --git a/third_party/sqlite/src/ext/fts5/fts5_aux.c b/third_party/sqlite/src/ext/fts5/fts5_aux.c
index 011064d4058ee7fca7744424bdb7f06295b4bf45..219ea6fff8f504798f684900967a584f2e1983e7 100644
--- a/third_party/sqlite/src/ext/fts5/fts5_aux.c
+++ b/third_party/sqlite/src/ext/fts5/fts5_aux.c
@@ -158,6 +158,8 @@ static int fts5HighlightCb(
int rc = SQLITE_OK;
int iPos;
+ UNUSED_PARAM2(pToken, nToken);
+
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
iPos = p->iPos++;
@@ -187,7 +189,7 @@ static int fts5HighlightCb(
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
p->iOff = iEndOff;
- if( iPos<p->iter.iEnd ){
+ if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
fts5HighlightAppend(&rc, p, p->zClose, -1);
}
}
@@ -245,6 +247,118 @@ static void fts5HighlightFunction(
**************************************************************************/
/*
+** Context object passed to the fts5SentenceFinderCb() function.
+*/
+typedef struct Fts5SFinder Fts5SFinder;
+struct Fts5SFinder {
+ int iPos; /* Current token position */
+ int nFirstAlloc; /* Allocated size of aFirst[] */
+ int nFirst; /* Number of entries in aFirst[] */
+ int *aFirst; /* Array of first token in each sentence */
+ const char *zDoc; /* Document being tokenized */
+};
+
+/*
+** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
+** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
+** error occurs.
+*/
+static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
+ if( p->nFirstAlloc==p->nFirst ){
+ int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
+ int *aNew;
+
+ aNew = (int*)sqlite3_realloc(p->aFirst, nNew*sizeof(int));
+ if( aNew==0 ) return SQLITE_NOMEM;
+ p->aFirst = aNew;
+ p->nFirstAlloc = nNew;
+ }
+ p->aFirst[p->nFirst++] = iAdd;
+ return SQLITE_OK;
+}
+
+/*
+** This function is an xTokenize() callback used by the auxiliary snippet()
+** function. Its job is to identify tokens that are the first in a sentence.
+** For each such token, an entry is added to the SFinder.aFirst[] array.
+*/
+static int fts5SentenceFinderCb(
+ void *pContext, /* Pointer to HighlightContext object */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStartOff, /* Start offset of token */
+ int iEndOff /* End offset of token */
+){
+ int rc = SQLITE_OK;
+
+ UNUSED_PARAM2(pToken, nToken);
+ UNUSED_PARAM(iEndOff);
+
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
+ Fts5SFinder *p = (Fts5SFinder*)pContext;
+ if( p->iPos>0 ){
+ int i;
+ char c = 0;
+ for(i=iStartOff-1; i>=0; i--){
+ c = p->zDoc[i];
+ if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
+ }
+ if( i!=iStartOff-1 && (c=='.' || c==':') ){
+ rc = fts5SentenceFinderAdd(p, p->iPos);
+ }
+ }else{
+ rc = fts5SentenceFinderAdd(p, 0);
+ }
+ p->iPos++;
+ }
+ return rc;
+}
+
+static int fts5SnippetScore(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ int nDocsize, /* Size of column in tokens */
+ unsigned char *aSeen, /* Array with one element per query phrase */
+ int iCol, /* Column to score */
+ int iPos, /* Starting offset to score */
+ int nToken, /* Max tokens per snippet */
+ int *pnScore, /* OUT: Score */
+ int *piPos /* OUT: Adjusted offset */
+){
+ int rc;
+ int i;
+ int ip = 0;
+ int ic = 0;
+ int iOff = 0;
+ int iFirst = -1;
+ int nInst;
+ int nScore = 0;
+ int iLast = 0;
+
+ rc = pApi->xInstCount(pFts, &nInst);
+ for(i=0; i<nInst && rc==SQLITE_OK; i++){
+ rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
+ if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<(iPos+nToken) ){
+ nScore += (aSeen[ip] ? 1 : 1000);
+ aSeen[ip] = 1;
+ if( iFirst<0 ) iFirst = iOff;
+ iLast = iOff + pApi->xPhraseSize(pFts, ip);
+ }
+ }
+
+ *pnScore = nScore;
+ if( piPos ){
+ int iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
+ if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
+ if( iAdj<0 ) iAdj = 0;
+ *piPos = iAdj;
+ }
+
+ return rc;
+}
+
+/*
** Implementation of snippet() function.
*/
static void fts5SnippetFunction(
@@ -265,9 +379,10 @@ static void fts5SnippetFunction(
unsigned char *aSeen; /* Array of "seen instance" flags */
int iBestCol; /* Column containing best snippet */
int iBestStart = 0; /* First token of best snippet */
- int iBestLast; /* Last token of best snippet */
int nBestScore = 0; /* Score of best snippet */
int nColSize = 0; /* Total size of iBestCol in tokens */
+ Fts5SFinder sFinder; /* Used to find the beginnings of sentences */
+ int nCol;
if( nVal!=5 ){
const char *zErr = "wrong number of arguments to function snippet()";
@@ -275,13 +390,13 @@ static void fts5SnippetFunction(
return;
}
+ nCol = pApi->xColumnCount(pFts);
memset(&ctx, 0, sizeof(HighlightContext));
iCol = sqlite3_value_int(apVal[0]);
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
zEllips = (const char*)sqlite3_value_text(apVal[3]);
nToken = sqlite3_value_int(apVal[4]);
- iBestLast = nToken-1;
iBestCol = (iCol>=0 ? iCol : 0);
nPhrase = pApi->xPhraseCount(pFts);
@@ -289,65 +404,94 @@ static void fts5SnippetFunction(
if( aSeen==0 ){
rc = SQLITE_NOMEM;
}
-
if( rc==SQLITE_OK ){
rc = pApi->xInstCount(pFts, &nInst);
}
- for(i=0; rc==SQLITE_OK && i<nInst; i++){
- int ip, iSnippetCol, iStart;
- memset(aSeen, 0, nPhrase);
- rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
- if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
- int nScore = 1000;
- int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
- int j;
- aSeen[ip] = 1;
- for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
- int ic; int io; int iFinal;
- rc = pApi->xInst(pFts, j, &ip, &ic, &io);
- iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
- if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
- nScore += aSeen[ip] ? 1000 : 1;
- aSeen[ip] = 1;
- if( iFinal>iLast ) iLast = iFinal;
+ memset(&sFinder, 0, sizeof(Fts5SFinder));
+ for(i=0; i<nCol; i++){
+ if( iCol<0 || iCol==i ){
+ int nDoc;
+ int nDocsize;
+ int ii;
+ sFinder.iPos = 0;
+ sFinder.nFirst = 0;
+ rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
+ if( rc!=SQLITE_OK ) break;
+ rc = pApi->xTokenize(pFts,
+ sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
+ );
+ if( rc!=SQLITE_OK ) break;
+ rc = pApi->xColumnSize(pFts, i, &nDocsize);
+ if( rc!=SQLITE_OK ) break;
+
+ for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
+ int ip, ic, io;
+ int iAdj;
+ int nScore;
+ int jj;
+
+ rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
+ if( ic!=i || rc!=SQLITE_OK ) continue;
+ memset(aSeen, 0, nPhrase);
+ rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
+ io, nToken, &nScore, &iAdj
+ );
+ if( rc==SQLITE_OK && nScore>nBestScore ){
+ nBestScore = nScore;
+ iBestCol = i;
+ iBestStart = iAdj;
+ nColSize = nDocsize;
}
- }
- if( rc==SQLITE_OK && nScore>nBestScore ){
- iBestCol = iSnippetCol;
- iBestStart = iStart;
- iBestLast = iLast;
- nBestScore = nScore;
+ if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
+ for(jj=0; jj<(sFinder.nFirst-1); jj++){
+ if( sFinder.aFirst[jj+1]>io ) break;
+ }
+
+ if( sFinder.aFirst[jj]<io ){
+ memset(aSeen, 0, nPhrase);
+ rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
+ sFinder.aFirst[jj], nToken, &nScore, 0
+ );
+
+ nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
+ if( rc==SQLITE_OK && nScore>nBestScore ){
+ nBestScore = nScore;
+ iBestCol = i;
+ iBestStart = sFinder.aFirst[jj];
+ nColSize = nDocsize;
+ }
+ }
+ }
}
}
}
if( rc==SQLITE_OK ){
- rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
- }
- if( rc==SQLITE_OK ){
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
}
+ if( rc==SQLITE_OK && nColSize==0 ){
+ rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
+ }
if( ctx.zIn ){
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
- if( (iBestStart+nToken-1)>iBestLast ){
- iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
- }
- if( iBestStart+nToken>nColSize ){
- iBestStart = nColSize - nToken;
- }
- if( iBestStart<0 ) iBestStart = 0;
-
ctx.iRangeStart = iBestStart;
ctx.iRangeEnd = iBestStart + nToken - 1;
if( iBestStart>0 ){
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
+
+ /* Advance iterator ctx.iter so that it points to the first coalesced
+ ** phrase instance at or following position iBestStart. */
+ while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
+ rc = fts5CInstIterNext(&ctx.iter);
+ }
+
if( rc==SQLITE_OK ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
}
@@ -356,15 +500,15 @@ static void fts5SnippetFunction(
}else{
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
-
- if( rc==SQLITE_OK ){
- sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
- }else{
- sqlite3_result_error_code(pCtx, rc);
- }
- sqlite3_free(ctx.zOut);
}
+ if( rc==SQLITE_OK ){
+ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+ sqlite3_free(ctx.zOut);
sqlite3_free(aSeen);
+ sqlite3_free(sFinder.aFirst);
}
/************************************************************************/
@@ -391,6 +535,7 @@ static int fts5CountCb(
void *pUserData /* Pointer to sqlite3_int64 variable */
){
sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
+ UNUSED_PARAM2(pApi, pFts);
(*pn)++;
return SQLITE_OK;
}
@@ -544,7 +689,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){
int rc = SQLITE_OK; /* Return code */
int i; /* To iterate through builtin functions */
- for(i=0; rc==SQLITE_OK && i<(int)ArraySize(aBuiltin); i++){
+ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
rc = pApi->xCreateFunction(pApi,
aBuiltin[i].zFunc,
aBuiltin[i].pUserData,
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5Int.h ('k') | third_party/sqlite/src/ext/fts5/fts5_buffer.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698