OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2009 Oct 23 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ****************************************************************************** | |
12 */ | |
13 | |
14 #include "fts3Int.h" | |
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | |
16 | |
17 #include <string.h> | |
18 #include <assert.h> | |
19 | |
20 /* | |
21 ** Characters that may appear in the second argument to matchinfo(). | |
22 */ | |
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ | |
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ | |
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ | |
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ | |
27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ | |
28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */ | |
29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ | |
30 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ | |
31 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ | |
32 | |
33 /* | |
34 ** The default value for the second argument to matchinfo(). | |
35 */ | |
36 #define FTS3_MATCHINFO_DEFAULT "pcx" | |
37 | |
38 | |
39 /* | |
40 ** Used as an fts3ExprIterate() context when loading phrase doclists to | |
41 ** Fts3Expr.aDoclist[]/nDoclist. | |
42 */ | |
43 typedef struct LoadDoclistCtx LoadDoclistCtx; | |
44 struct LoadDoclistCtx { | |
45 Fts3Cursor *pCsr; /* FTS3 Cursor */ | |
46 int nPhrase; /* Number of phrases seen so far */ | |
47 int nToken; /* Number of tokens seen so far */ | |
48 }; | |
49 | |
50 /* | |
51 ** The following types are used as part of the implementation of the | |
52 ** fts3BestSnippet() routine. | |
53 */ | |
54 typedef struct SnippetIter SnippetIter; | |
55 typedef struct SnippetPhrase SnippetPhrase; | |
56 typedef struct SnippetFragment SnippetFragment; | |
57 | |
58 struct SnippetIter { | |
59 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ | |
60 int iCol; /* Extract snippet from this column */ | |
61 int nSnippet; /* Requested snippet length (in tokens) */ | |
62 int nPhrase; /* Number of phrases in query */ | |
63 SnippetPhrase *aPhrase; /* Array of size nPhrase */ | |
64 int iCurrent; /* First token of current snippet */ | |
65 }; | |
66 | |
67 struct SnippetPhrase { | |
68 int nToken; /* Number of tokens in phrase */ | |
69 char *pList; /* Pointer to start of phrase position list */ | |
70 int iHead; /* Next value in position list */ | |
71 char *pHead; /* Position list data following iHead */ | |
72 int iTail; /* Next value in trailing position list */ | |
73 char *pTail; /* Position list data following iTail */ | |
74 }; | |
75 | |
76 struct SnippetFragment { | |
77 int iCol; /* Column snippet is extracted from */ | |
78 int iPos; /* Index of first token in snippet */ | |
79 u64 covered; /* Mask of query phrases covered */ | |
80 u64 hlmask; /* Mask of snippet terms to highlight */ | |
81 }; | |
82 | |
83 /* | |
84 ** This type is used as an fts3ExprIterate() context object while | |
85 ** accumulating the data returned by the matchinfo() function. | |
86 */ | |
87 typedef struct MatchInfo MatchInfo; | |
88 struct MatchInfo { | |
89 Fts3Cursor *pCursor; /* FTS3 Cursor */ | |
90 int nCol; /* Number of columns in table */ | |
91 int nPhrase; /* Number of matchable phrases in query */ | |
92 sqlite3_int64 nDoc; /* Number of docs in database */ | |
93 char flag; | |
94 u32 *aMatchinfo; /* Pre-allocated buffer */ | |
95 }; | |
96 | |
97 /* | |
98 ** An instance of this structure is used to manage a pair of buffers, each | |
99 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below | |
100 ** for details. | |
101 */ | |
102 struct MatchinfoBuffer { | |
103 u8 aRef[3]; | |
104 int nElem; | |
105 int bGlobal; /* Set if global data is loaded */ | |
106 char *zMatchinfo; | |
107 u32 aMatchinfo[1]; | |
108 }; | |
109 | |
110 | |
111 /* | |
112 ** The snippet() and offsets() functions both return text values. An instance | |
113 ** of the following structure is used to accumulate those values while the | |
114 ** functions are running. See fts3StringAppend() for details. | |
115 */ | |
116 typedef struct StrBuffer StrBuffer; | |
117 struct StrBuffer { | |
118 char *z; /* Pointer to buffer containing string */ | |
119 int n; /* Length of z in bytes (excl. nul-term) */ | |
120 int nAlloc; /* Allocated size of buffer z in bytes */ | |
121 }; | |
122 | |
123 | |
124 /************************************************************************* | |
125 ** Start of MatchinfoBuffer code. | |
126 */ | |
127 | |
128 /* | |
129 ** Allocate a two-slot MatchinfoBuffer object. | |
130 */ | |
131 static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ | |
132 MatchinfoBuffer *pRet; | |
133 int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); | |
134 int nStr = (int)strlen(zMatchinfo); | |
135 | |
136 pRet = sqlite3_malloc(nByte + nStr+1); | |
137 if( pRet ){ | |
138 memset(pRet, 0, nByte); | |
139 pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; | |
140 pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); | |
141 pRet->nElem = nElem; | |
142 pRet->zMatchinfo = ((char*)pRet) + nByte; | |
143 memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); | |
144 pRet->aRef[0] = 1; | |
145 } | |
146 | |
147 return pRet; | |
148 } | |
149 | |
150 static void fts3MIBufferFree(void *p){ | |
151 MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); | |
152 | |
153 assert( (u32*)p==&pBuf->aMatchinfo[1] | |
154 || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] | |
155 ); | |
156 if( (u32*)p==&pBuf->aMatchinfo[1] ){ | |
157 pBuf->aRef[1] = 0; | |
158 }else{ | |
159 pBuf->aRef[2] = 0; | |
160 } | |
161 | |
162 if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ | |
163 sqlite3_free(pBuf); | |
164 } | |
165 } | |
166 | |
167 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ | |
168 void (*xRet)(void*) = 0; | |
169 u32 *aOut = 0; | |
170 | |
171 if( p->aRef[1]==0 ){ | |
172 p->aRef[1] = 1; | |
173 aOut = &p->aMatchinfo[1]; | |
174 xRet = fts3MIBufferFree; | |
175 } | |
176 else if( p->aRef[2]==0 ){ | |
177 p->aRef[2] = 1; | |
178 aOut = &p->aMatchinfo[p->nElem+2]; | |
179 xRet = fts3MIBufferFree; | |
180 }else{ | |
181 aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); | |
182 if( aOut ){ | |
183 xRet = sqlite3_free; | |
184 if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); | |
185 } | |
186 } | |
187 | |
188 *paOut = aOut; | |
189 return xRet; | |
190 } | |
191 | |
192 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ | |
193 p->bGlobal = 1; | |
194 memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); | |
195 } | |
196 | |
197 /* | |
198 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() | |
199 */ | |
200 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ | |
201 if( p ){ | |
202 assert( p->aRef[0]==1 ); | |
203 p->aRef[0] = 0; | |
204 if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ | |
205 sqlite3_free(p); | |
206 } | |
207 } | |
208 } | |
209 | |
210 /* | |
211 ** End of MatchinfoBuffer code. | |
212 *************************************************************************/ | |
213 | |
214 | |
215 /* | |
216 ** This function is used to help iterate through a position-list. A position | |
217 ** list is a list of unique integers, sorted from smallest to largest. Each | |
218 ** element of the list is represented by an FTS3 varint that takes the value | |
219 ** of the difference between the current element and the previous one plus | |
220 ** two. For example, to store the position-list: | |
221 ** | |
222 ** 4 9 113 | |
223 ** | |
224 ** the three varints: | |
225 ** | |
226 ** 6 7 106 | |
227 ** | |
228 ** are encoded. | |
229 ** | |
230 ** When this function is called, *pp points to the start of an element of | |
231 ** the list. *piPos contains the value of the previous entry in the list. | |
232 ** After it returns, *piPos contains the value of the next element of the | |
233 ** list and *pp is advanced to the following varint. | |
234 */ | |
235 static void fts3GetDeltaPosition(char **pp, int *piPos){ | |
236 int iVal; | |
237 *pp += fts3GetVarint32(*pp, &iVal); | |
238 *piPos += (iVal-2); | |
239 } | |
240 | |
241 /* | |
242 ** Helper function for fts3ExprIterate() (see below). | |
243 */ | |
244 static int fts3ExprIterate2( | |
245 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | |
246 int *piPhrase, /* Pointer to phrase counter */ | |
247 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | |
248 void *pCtx /* Second argument to pass to callback */ | |
249 ){ | |
250 int rc; /* Return code */ | |
251 int eType = pExpr->eType; /* Type of expression node pExpr */ | |
252 | |
253 if( eType!=FTSQUERY_PHRASE ){ | |
254 assert( pExpr->pLeft && pExpr->pRight ); | |
255 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); | |
256 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ | |
257 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); | |
258 } | |
259 }else{ | |
260 rc = x(pExpr, *piPhrase, pCtx); | |
261 (*piPhrase)++; | |
262 } | |
263 return rc; | |
264 } | |
265 | |
266 /* | |
267 ** Iterate through all phrase nodes in an FTS3 query, except those that | |
268 ** are part of a sub-tree that is the right-hand-side of a NOT operator. | |
269 ** For each phrase node found, the supplied callback function is invoked. | |
270 ** | |
271 ** If the callback function returns anything other than SQLITE_OK, | |
272 ** the iteration is abandoned and the error code returned immediately. | |
273 ** Otherwise, SQLITE_OK is returned after a callback has been made for | |
274 ** all eligible phrase nodes. | |
275 */ | |
276 static int fts3ExprIterate( | |
277 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | |
278 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | |
279 void *pCtx /* Second argument to pass to callback */ | |
280 ){ | |
281 int iPhrase = 0; /* Variable used as the phrase counter */ | |
282 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); | |
283 } | |
284 | |
285 | |
286 /* | |
287 ** This is an fts3ExprIterate() callback used while loading the doclists | |
288 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also | |
289 ** fts3ExprLoadDoclists(). | |
290 */ | |
291 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
292 int rc = SQLITE_OK; | |
293 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
294 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; | |
295 | |
296 UNUSED_PARAMETER(iPhrase); | |
297 | |
298 p->nPhrase++; | |
299 p->nToken += pPhrase->nToken; | |
300 | |
301 return rc; | |
302 } | |
303 | |
304 /* | |
305 ** Load the doclists for each phrase in the query associated with FTS3 cursor | |
306 ** pCsr. | |
307 ** | |
308 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable | |
309 ** phrases in the expression (all phrases except those directly or | |
310 ** indirectly descended from the right-hand-side of a NOT operator). If | |
311 ** pnToken is not NULL, then it is set to the number of tokens in all | |
312 ** matchable phrases of the expression. | |
313 */ | |
314 static int fts3ExprLoadDoclists( | |
315 Fts3Cursor *pCsr, /* Fts3 cursor for current query */ | |
316 int *pnPhrase, /* OUT: Number of phrases in query */ | |
317 int *pnToken /* OUT: Number of tokens in query */ | |
318 ){ | |
319 int rc; /* Return Code */ | |
320 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ | |
321 sCtx.pCsr = pCsr; | |
322 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); | |
323 if( pnPhrase ) *pnPhrase = sCtx.nPhrase; | |
324 if( pnToken ) *pnToken = sCtx.nToken; | |
325 return rc; | |
326 } | |
327 | |
328 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
329 (*(int *)ctx)++; | |
330 pExpr->iPhrase = iPhrase; | |
331 return SQLITE_OK; | |
332 } | |
333 static int fts3ExprPhraseCount(Fts3Expr *pExpr){ | |
334 int nPhrase = 0; | |
335 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); | |
336 return nPhrase; | |
337 } | |
338 | |
339 /* | |
340 ** Advance the position list iterator specified by the first two | |
341 ** arguments so that it points to the first element with a value greater | |
342 ** than or equal to parameter iNext. | |
343 */ | |
344 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ | |
345 char *pIter = *ppIter; | |
346 if( pIter ){ | |
347 int iIter = *piIter; | |
348 | |
349 while( iIter<iNext ){ | |
350 if( 0==(*pIter & 0xFE) ){ | |
351 iIter = -1; | |
352 pIter = 0; | |
353 break; | |
354 } | |
355 fts3GetDeltaPosition(&pIter, &iIter); | |
356 } | |
357 | |
358 *piIter = iIter; | |
359 *ppIter = pIter; | |
360 } | |
361 } | |
362 | |
363 /* | |
364 ** Advance the snippet iterator to the next candidate snippet. | |
365 */ | |
366 static int fts3SnippetNextCandidate(SnippetIter *pIter){ | |
367 int i; /* Loop counter */ | |
368 | |
369 if( pIter->iCurrent<0 ){ | |
370 /* The SnippetIter object has just been initialized. The first snippet | |
371 ** candidate always starts at offset 0 (even if this candidate has a | |
372 ** score of 0.0). | |
373 */ | |
374 pIter->iCurrent = 0; | |
375 | |
376 /* Advance the 'head' iterator of each phrase to the first offset that | |
377 ** is greater than or equal to (iNext+nSnippet). | |
378 */ | |
379 for(i=0; i<pIter->nPhrase; i++){ | |
380 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
381 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); | |
382 } | |
383 }else{ | |
384 int iStart; | |
385 int iEnd = 0x7FFFFFFF; | |
386 | |
387 for(i=0; i<pIter->nPhrase; i++){ | |
388 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
389 if( pPhrase->pHead && pPhrase->iHead<iEnd ){ | |
390 iEnd = pPhrase->iHead; | |
391 } | |
392 } | |
393 if( iEnd==0x7FFFFFFF ){ | |
394 return 1; | |
395 } | |
396 | |
397 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; | |
398 for(i=0; i<pIter->nPhrase; i++){ | |
399 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
400 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); | |
401 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); | |
402 } | |
403 } | |
404 | |
405 return 0; | |
406 } | |
407 | |
408 /* | |
409 ** Retrieve information about the current candidate snippet of snippet | |
410 ** iterator pIter. | |
411 */ | |
412 static void fts3SnippetDetails( | |
413 SnippetIter *pIter, /* Snippet iterator */ | |
414 u64 mCovered, /* Bitmask of phrases already covered */ | |
415 int *piToken, /* OUT: First token of proposed snippet */ | |
416 int *piScore, /* OUT: "Score" for this snippet */ | |
417 u64 *pmCover, /* OUT: Bitmask of phrases covered */ | |
418 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ | |
419 ){ | |
420 int iStart = pIter->iCurrent; /* First token of snippet */ | |
421 int iScore = 0; /* Score of this snippet */ | |
422 int i; /* Loop counter */ | |
423 u64 mCover = 0; /* Mask of phrases covered by this snippet */ | |
424 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ | |
425 | |
426 for(i=0; i<pIter->nPhrase; i++){ | |
427 SnippetPhrase *pPhrase = &pIter->aPhrase[i]; | |
428 if( pPhrase->pTail ){ | |
429 char *pCsr = pPhrase->pTail; | |
430 int iCsr = pPhrase->iTail; | |
431 | |
432 while( iCsr<(iStart+pIter->nSnippet) ){ | |
433 int j; | |
434 u64 mPhrase = (u64)1 << i; | |
435 u64 mPos = (u64)1 << (iCsr - iStart); | |
436 assert( iCsr>=iStart ); | |
437 if( (mCover|mCovered)&mPhrase ){ | |
438 iScore++; | |
439 }else{ | |
440 iScore += 1000; | |
441 } | |
442 mCover |= mPhrase; | |
443 | |
444 for(j=0; j<pPhrase->nToken; j++){ | |
445 mHighlight |= (mPos>>j); | |
446 } | |
447 | |
448 if( 0==(*pCsr & 0x0FE) ) break; | |
449 fts3GetDeltaPosition(&pCsr, &iCsr); | |
450 } | |
451 } | |
452 } | |
453 | |
454 /* Set the output variables before returning. */ | |
455 *piToken = iStart; | |
456 *piScore = iScore; | |
457 *pmCover = mCover; | |
458 *pmHighlight = mHighlight; | |
459 } | |
460 | |
461 /* | |
462 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). | |
463 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. | |
464 */ | |
465 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
466 SnippetIter *p = (SnippetIter *)ctx; | |
467 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; | |
468 char *pCsr; | |
469 int rc; | |
470 | |
471 pPhrase->nToken = pExpr->pPhrase->nToken; | |
472 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); | |
473 assert( rc==SQLITE_OK || pCsr==0 ); | |
474 if( pCsr ){ | |
475 int iFirst = 0; | |
476 pPhrase->pList = pCsr; | |
477 fts3GetDeltaPosition(&pCsr, &iFirst); | |
478 assert( iFirst>=0 ); | |
479 pPhrase->pHead = pCsr; | |
480 pPhrase->pTail = pCsr; | |
481 pPhrase->iHead = iFirst; | |
482 pPhrase->iTail = iFirst; | |
483 }else{ | |
484 assert( rc!=SQLITE_OK || ( | |
485 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 | |
486 )); | |
487 } | |
488 | |
489 return rc; | |
490 } | |
491 | |
492 /* | |
493 ** Select the fragment of text consisting of nFragment contiguous tokens | |
494 ** from column iCol that represent the "best" snippet. The best snippet | |
495 ** is the snippet with the highest score, where scores are calculated | |
496 ** by adding: | |
497 ** | |
498 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. | |
499 ** | |
500 ** (b) +1000 points for the first occurrence of each matchable phrase in | |
501 ** the snippet for which the corresponding mCovered bit is not set. | |
502 ** | |
503 ** The selected snippet parameters are stored in structure *pFragment before | |
504 ** returning. The score of the selected snippet is stored in *piScore | |
505 ** before returning. | |
506 */ | |
507 static int fts3BestSnippet( | |
508 int nSnippet, /* Desired snippet length */ | |
509 Fts3Cursor *pCsr, /* Cursor to create snippet for */ | |
510 int iCol, /* Index of column to create snippet from */ | |
511 u64 mCovered, /* Mask of phrases already covered */ | |
512 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ | |
513 SnippetFragment *pFragment, /* OUT: Best snippet found */ | |
514 int *piScore /* OUT: Score of snippet pFragment */ | |
515 ){ | |
516 int rc; /* Return Code */ | |
517 int nList; /* Number of phrases in expression */ | |
518 SnippetIter sIter; /* Iterates through snippet candidates */ | |
519 int nByte; /* Number of bytes of space to allocate */ | |
520 int iBestScore = -1; /* Best snippet score found so far */ | |
521 int i; /* Loop counter */ | |
522 | |
523 memset(&sIter, 0, sizeof(sIter)); | |
524 | |
525 /* Iterate through the phrases in the expression to count them. The same | |
526 ** callback makes sure the doclists are loaded for each phrase. | |
527 */ | |
528 rc = fts3ExprLoadDoclists(pCsr, &nList, 0); | |
529 if( rc!=SQLITE_OK ){ | |
530 return rc; | |
531 } | |
532 | |
533 /* Now that it is known how many phrases there are, allocate and zero | |
534 ** the required space using malloc(). | |
535 */ | |
536 nByte = sizeof(SnippetPhrase) * nList; | |
537 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); | |
538 if( !sIter.aPhrase ){ | |
539 return SQLITE_NOMEM; | |
540 } | |
541 memset(sIter.aPhrase, 0, nByte); | |
542 | |
543 /* Initialize the contents of the SnippetIter object. Then iterate through | |
544 ** the set of phrases in the expression to populate the aPhrase[] array. | |
545 */ | |
546 sIter.pCsr = pCsr; | |
547 sIter.iCol = iCol; | |
548 sIter.nSnippet = nSnippet; | |
549 sIter.nPhrase = nList; | |
550 sIter.iCurrent = -1; | |
551 rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); | |
552 if( rc==SQLITE_OK ){ | |
553 | |
554 /* Set the *pmSeen output variable. */ | |
555 for(i=0; i<nList; i++){ | |
556 if( sIter.aPhrase[i].pHead ){ | |
557 *pmSeen |= (u64)1 << i; | |
558 } | |
559 } | |
560 | |
561 /* Loop through all candidate snippets. Store the best snippet in | |
562 ** *pFragment. Store its associated 'score' in iBestScore. | |
563 */ | |
564 pFragment->iCol = iCol; | |
565 while( !fts3SnippetNextCandidate(&sIter) ){ | |
566 int iPos; | |
567 int iScore; | |
568 u64 mCover; | |
569 u64 mHighlite; | |
570 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); | |
571 assert( iScore>=0 ); | |
572 if( iScore>iBestScore ){ | |
573 pFragment->iPos = iPos; | |
574 pFragment->hlmask = mHighlite; | |
575 pFragment->covered = mCover; | |
576 iBestScore = iScore; | |
577 } | |
578 } | |
579 | |
580 *piScore = iBestScore; | |
581 } | |
582 sqlite3_free(sIter.aPhrase); | |
583 return rc; | |
584 } | |
585 | |
586 | |
587 /* | |
588 ** Append a string to the string-buffer passed as the first argument. | |
589 ** | |
590 ** If nAppend is negative, then the length of the string zAppend is | |
591 ** determined using strlen(). | |
592 */ | |
593 static int fts3StringAppend( | |
594 StrBuffer *pStr, /* Buffer to append to */ | |
595 const char *zAppend, /* Pointer to data to append to buffer */ | |
596 int nAppend /* Size of zAppend in bytes (or -1) */ | |
597 ){ | |
598 if( nAppend<0 ){ | |
599 nAppend = (int)strlen(zAppend); | |
600 } | |
601 | |
602 /* If there is insufficient space allocated at StrBuffer.z, use realloc() | |
603 ** to grow the buffer until so that it is big enough to accomadate the | |
604 ** appended data. | |
605 */ | |
606 if( pStr->n+nAppend+1>=pStr->nAlloc ){ | |
607 int nAlloc = pStr->nAlloc+nAppend+100; | |
608 char *zNew = sqlite3_realloc(pStr->z, nAlloc); | |
609 if( !zNew ){ | |
610 return SQLITE_NOMEM; | |
611 } | |
612 pStr->z = zNew; | |
613 pStr->nAlloc = nAlloc; | |
614 } | |
615 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); | |
616 | |
617 /* Append the data to the string buffer. */ | |
618 memcpy(&pStr->z[pStr->n], zAppend, nAppend); | |
619 pStr->n += nAppend; | |
620 pStr->z[pStr->n] = '\0'; | |
621 | |
622 return SQLITE_OK; | |
623 } | |
624 | |
625 /* | |
626 ** The fts3BestSnippet() function often selects snippets that end with a | |
627 ** query term. That is, the final term of the snippet is always a term | |
628 ** that requires highlighting. For example, if 'X' is a highlighted term | |
629 ** and '.' is a non-highlighted term, BestSnippet() may select: | |
630 ** | |
631 ** ........X.....X | |
632 ** | |
633 ** This function "shifts" the beginning of the snippet forward in the | |
634 ** document so that there are approximately the same number of | |
635 ** non-highlighted terms to the right of the final highlighted term as there | |
636 ** are to the left of the first highlighted term. For example, to this: | |
637 ** | |
638 ** ....X.....X.... | |
639 ** | |
640 ** This is done as part of extracting the snippet text, not when selecting | |
641 ** the snippet. Snippet selection is done based on doclists only, so there | |
642 ** is no way for fts3BestSnippet() to know whether or not the document | |
643 ** actually contains terms that follow the final highlighted term. | |
644 */ | |
645 static int fts3SnippetShift( | |
646 Fts3Table *pTab, /* FTS3 table snippet comes from */ | |
647 int iLangid, /* Language id to use in tokenizing */ | |
648 int nSnippet, /* Number of tokens desired for snippet */ | |
649 const char *zDoc, /* Document text to extract snippet from */ | |
650 int nDoc, /* Size of buffer zDoc in bytes */ | |
651 int *piPos, /* IN/OUT: First token of snippet */ | |
652 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ | |
653 ){ | |
654 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ | |
655 | |
656 if( hlmask ){ | |
657 int nLeft; /* Tokens to the left of first highlight */ | |
658 int nRight; /* Tokens to the right of last highlight */ | |
659 int nDesired; /* Ideal number of tokens to shift forward */ | |
660 | |
661 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); | |
662 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); | |
663 nDesired = (nLeft-nRight)/2; | |
664 | |
665 /* Ideally, the start of the snippet should be pushed forward in the | |
666 ** document nDesired tokens. This block checks if there are actually | |
667 ** nDesired tokens to the right of the snippet. If so, *piPos and | |
668 ** *pHlMask are updated to shift the snippet nDesired tokens to the | |
669 ** right. Otherwise, the snippet is shifted by the number of tokens | |
670 ** available. | |
671 */ | |
672 if( nDesired>0 ){ | |
673 int nShift; /* Number of tokens to shift snippet by */ | |
674 int iCurrent = 0; /* Token counter */ | |
675 int rc; /* Return Code */ | |
676 sqlite3_tokenizer_module *pMod; | |
677 sqlite3_tokenizer_cursor *pC; | |
678 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | |
679 | |
680 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) | |
681 ** or more tokens in zDoc/nDoc. | |
682 */ | |
683 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); | |
684 if( rc!=SQLITE_OK ){ | |
685 return rc; | |
686 } | |
687 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ | |
688 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; | |
689 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); | |
690 } | |
691 pMod->xClose(pC); | |
692 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } | |
693 | |
694 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; | |
695 assert( nShift<=nDesired ); | |
696 if( nShift>0 ){ | |
697 *piPos += nShift; | |
698 *pHlmask = hlmask >> nShift; | |
699 } | |
700 } | |
701 } | |
702 return SQLITE_OK; | |
703 } | |
704 | |
705 /* | |
706 ** Extract the snippet text for fragment pFragment from cursor pCsr and | |
707 ** append it to string buffer pOut. | |
708 */ | |
709 static int fts3SnippetText( | |
710 Fts3Cursor *pCsr, /* FTS3 Cursor */ | |
711 SnippetFragment *pFragment, /* Snippet to extract */ | |
712 int iFragment, /* Fragment number */ | |
713 int isLast, /* True for final fragment in snippet */ | |
714 int nSnippet, /* Number of tokens in extracted snippet */ | |
715 const char *zOpen, /* String inserted before highlighted term */ | |
716 const char *zClose, /* String inserted after highlighted term */ | |
717 const char *zEllipsis, /* String inserted between snippets */ | |
718 StrBuffer *pOut /* Write output here */ | |
719 ){ | |
720 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
721 int rc; /* Return code */ | |
722 const char *zDoc; /* Document text to extract snippet from */ | |
723 int nDoc; /* Size of zDoc in bytes */ | |
724 int iCurrent = 0; /* Current token number of document */ | |
725 int iEnd = 0; /* Byte offset of end of current token */ | |
726 int isShiftDone = 0; /* True after snippet is shifted */ | |
727 int iPos = pFragment->iPos; /* First token of snippet */ | |
728 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ | |
729 int iCol = pFragment->iCol+1; /* Query column to extract text from */ | |
730 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ | |
731 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ | |
732 | |
733 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); | |
734 if( zDoc==0 ){ | |
735 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ | |
736 return SQLITE_NOMEM; | |
737 } | |
738 return SQLITE_OK; | |
739 } | |
740 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); | |
741 | |
742 /* Open a token cursor on the document. */ | |
743 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | |
744 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); | |
745 if( rc!=SQLITE_OK ){ | |
746 return rc; | |
747 } | |
748 | |
749 while( rc==SQLITE_OK ){ | |
750 const char *ZDUMMY; /* Dummy argument used with tokenizer */ | |
751 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ | |
752 int iBegin = 0; /* Offset in zDoc of start of token */ | |
753 int iFin = 0; /* Offset in zDoc of end of token */ | |
754 int isHighlight = 0; /* True for highlighted terms */ | |
755 | |
756 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere | |
757 ** in the FTS code the variable that the third argument to xNext points to | |
758 ** is initialized to zero before the first (*but not necessarily | |
759 ** subsequent*) call to xNext(). This is done for a particular application | |
760 ** that needs to know whether or not the tokenizer is being used for | |
761 ** snippet generation or for some other purpose. | |
762 ** | |
763 ** Extreme care is required when writing code to depend on this | |
764 ** initialization. It is not a documented part of the tokenizer interface. | |
765 ** If a tokenizer is used directly by any code outside of FTS, this | |
766 ** convention might not be respected. */ | |
767 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); | |
768 if( rc!=SQLITE_OK ){ | |
769 if( rc==SQLITE_DONE ){ | |
770 /* Special case - the last token of the snippet is also the last token | |
771 ** of the column. Append any punctuation that occurred between the end | |
772 ** of the previous token and the end of the document to the output. | |
773 ** Then break out of the loop. */ | |
774 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); | |
775 } | |
776 break; | |
777 } | |
778 if( iCurrent<iPos ){ continue; } | |
779 | |
780 if( !isShiftDone ){ | |
781 int n = nDoc - iBegin; | |
782 rc = fts3SnippetShift( | |
783 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask | |
784 ); | |
785 isShiftDone = 1; | |
786 | |
787 /* Now that the shift has been done, check if the initial "..." are | |
788 ** required. They are required if (a) this is not the first fragment, | |
789 ** or (b) this fragment does not begin at position 0 of its column. | |
790 */ | |
791 if( rc==SQLITE_OK ){ | |
792 if( iPos>0 || iFragment>0 ){ | |
793 rc = fts3StringAppend(pOut, zEllipsis, -1); | |
794 }else if( iBegin ){ | |
795 rc = fts3StringAppend(pOut, zDoc, iBegin); | |
796 } | |
797 } | |
798 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; | |
799 } | |
800 | |
801 if( iCurrent>=(iPos+nSnippet) ){ | |
802 if( isLast ){ | |
803 rc = fts3StringAppend(pOut, zEllipsis, -1); | |
804 } | |
805 break; | |
806 } | |
807 | |
808 /* Set isHighlight to true if this term should be highlighted. */ | |
809 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; | |
810 | |
811 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); | |
812 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); | |
813 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); | |
814 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); | |
815 | |
816 iEnd = iFin; | |
817 } | |
818 | |
819 pMod->xClose(pC); | |
820 return rc; | |
821 } | |
822 | |
823 | |
824 /* | |
825 ** This function is used to count the entries in a column-list (a | |
826 ** delta-encoded list of term offsets within a single column of a single | |
827 ** row). When this function is called, *ppCollist should point to the | |
828 ** beginning of the first varint in the column-list (the varint that | |
829 ** contains the position of the first matching term in the column data). | |
830 ** Before returning, *ppCollist is set to point to the first byte after | |
831 ** the last varint in the column-list (either the 0x00 signifying the end | |
832 ** of the position-list, or the 0x01 that precedes the column number of | |
833 ** the next column in the position-list). | |
834 ** | |
835 ** The number of elements in the column-list is returned. | |
836 */ | |
837 static int fts3ColumnlistCount(char **ppCollist){ | |
838 char *pEnd = *ppCollist; | |
839 char c = 0; | |
840 int nEntry = 0; | |
841 | |
842 /* A column-list is terminated by either a 0x01 or 0x00. */ | |
843 while( 0xFE & (*pEnd | c) ){ | |
844 c = *pEnd++ & 0x80; | |
845 if( !c ) nEntry++; | |
846 } | |
847 | |
848 *ppCollist = pEnd; | |
849 return nEntry; | |
850 } | |
851 | |
852 /* | |
853 ** This function gathers 'y' or 'b' data for a single phrase. | |
854 */ | |
855 static void fts3ExprLHits( | |
856 Fts3Expr *pExpr, /* Phrase expression node */ | |
857 MatchInfo *p /* Matchinfo context */ | |
858 ){ | |
859 Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; | |
860 int iStart; | |
861 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
862 char *pIter = pPhrase->doclist.pList; | |
863 int iCol = 0; | |
864 | |
865 assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); | |
866 if( p->flag==FTS3_MATCHINFO_LHITS ){ | |
867 iStart = pExpr->iPhrase * p->nCol; | |
868 }else{ | |
869 iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); | |
870 } | |
871 | |
872 while( 1 ){ | |
873 int nHit = fts3ColumnlistCount(&pIter); | |
874 if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ | |
875 if( p->flag==FTS3_MATCHINFO_LHITS ){ | |
876 p->aMatchinfo[iStart + iCol] = (u32)nHit; | |
877 }else if( nHit ){ | |
878 p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); | |
879 } | |
880 } | |
881 assert( *pIter==0x00 || *pIter==0x01 ); | |
882 if( *pIter!=0x01 ) break; | |
883 pIter++; | |
884 pIter += fts3GetVarint32(pIter, &iCol); | |
885 } | |
886 } | |
887 | |
888 /* | |
889 ** Gather the results for matchinfo directives 'y' and 'b'. | |
890 */ | |
891 static void fts3ExprLHitGather( | |
892 Fts3Expr *pExpr, | |
893 MatchInfo *p | |
894 ){ | |
895 assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); | |
896 if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ | |
897 if( pExpr->pLeft ){ | |
898 fts3ExprLHitGather(pExpr->pLeft, p); | |
899 fts3ExprLHitGather(pExpr->pRight, p); | |
900 }else{ | |
901 fts3ExprLHits(pExpr, p); | |
902 } | |
903 } | |
904 } | |
905 | |
906 /* | |
907 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats | |
908 ** for a single query. | |
909 ** | |
910 ** fts3ExprIterate() callback to load the 'global' elements of a | |
911 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements | |
912 ** of the matchinfo array that are constant for all rows returned by the | |
913 ** current query. | |
914 ** | |
915 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This | |
916 ** function populates Matchinfo.aMatchinfo[] as follows: | |
917 ** | |
918 ** for(iCol=0; iCol<nCol; iCol++){ | |
919 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; | |
920 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; | |
921 ** } | |
922 ** | |
923 ** where X is the number of matches for phrase iPhrase is column iCol of all | |
924 ** rows of the table. Y is the number of rows for which column iCol contains | |
925 ** at least one instance of phrase iPhrase. | |
926 ** | |
927 ** If the phrase pExpr consists entirely of deferred tokens, then all X and | |
928 ** Y values are set to nDoc, where nDoc is the number of documents in the | |
929 ** file system. This is done because the full-text index doclist is required | |
930 ** to calculate these values properly, and the full-text index doclist is | |
931 ** not available for deferred tokens. | |
932 */ | |
933 static int fts3ExprGlobalHitsCb( | |
934 Fts3Expr *pExpr, /* Phrase expression node */ | |
935 int iPhrase, /* Phrase number (numbered from zero) */ | |
936 void *pCtx /* Pointer to MatchInfo structure */ | |
937 ){ | |
938 MatchInfo *p = (MatchInfo *)pCtx; | |
939 return sqlite3Fts3EvalPhraseStats( | |
940 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] | |
941 ); | |
942 } | |
943 | |
944 /* | |
945 ** fts3ExprIterate() callback used to collect the "local" part of the | |
946 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the | |
947 ** array that are different for each row returned by the query. | |
948 */ | |
949 static int fts3ExprLocalHitsCb( | |
950 Fts3Expr *pExpr, /* Phrase expression node */ | |
951 int iPhrase, /* Phrase number */ | |
952 void *pCtx /* Pointer to MatchInfo structure */ | |
953 ){ | |
954 int rc = SQLITE_OK; | |
955 MatchInfo *p = (MatchInfo *)pCtx; | |
956 int iStart = iPhrase * p->nCol * 3; | |
957 int i; | |
958 | |
959 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ | |
960 char *pCsr; | |
961 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); | |
962 if( pCsr ){ | |
963 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); | |
964 }else{ | |
965 p->aMatchinfo[iStart+i*3] = 0; | |
966 } | |
967 } | |
968 | |
969 return rc; | |
970 } | |
971 | |
972 static int fts3MatchinfoCheck( | |
973 Fts3Table *pTab, | |
974 char cArg, | |
975 char **pzErr | |
976 ){ | |
977 if( (cArg==FTS3_MATCHINFO_NPHRASE) | |
978 || (cArg==FTS3_MATCHINFO_NCOL) | |
979 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) | |
980 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) | |
981 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) | |
982 || (cArg==FTS3_MATCHINFO_LCS) | |
983 || (cArg==FTS3_MATCHINFO_HITS) | |
984 || (cArg==FTS3_MATCHINFO_LHITS) | |
985 || (cArg==FTS3_MATCHINFO_LHITS_BM) | |
986 ){ | |
987 return SQLITE_OK; | |
988 } | |
989 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); | |
990 return SQLITE_ERROR; | |
991 } | |
992 | |
993 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ | |
994 int nVal; /* Number of integers output by cArg */ | |
995 | |
996 switch( cArg ){ | |
997 case FTS3_MATCHINFO_NDOC: | |
998 case FTS3_MATCHINFO_NPHRASE: | |
999 case FTS3_MATCHINFO_NCOL: | |
1000 nVal = 1; | |
1001 break; | |
1002 | |
1003 case FTS3_MATCHINFO_AVGLENGTH: | |
1004 case FTS3_MATCHINFO_LENGTH: | |
1005 case FTS3_MATCHINFO_LCS: | |
1006 nVal = pInfo->nCol; | |
1007 break; | |
1008 | |
1009 case FTS3_MATCHINFO_LHITS: | |
1010 nVal = pInfo->nCol * pInfo->nPhrase; | |
1011 break; | |
1012 | |
1013 case FTS3_MATCHINFO_LHITS_BM: | |
1014 nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); | |
1015 break; | |
1016 | |
1017 default: | |
1018 assert( cArg==FTS3_MATCHINFO_HITS ); | |
1019 nVal = pInfo->nCol * pInfo->nPhrase * 3; | |
1020 break; | |
1021 } | |
1022 | |
1023 return nVal; | |
1024 } | |
1025 | |
1026 static int fts3MatchinfoSelectDoctotal( | |
1027 Fts3Table *pTab, | |
1028 sqlite3_stmt **ppStmt, | |
1029 sqlite3_int64 *pnDoc, | |
1030 const char **paLen | |
1031 ){ | |
1032 sqlite3_stmt *pStmt; | |
1033 const char *a; | |
1034 sqlite3_int64 nDoc; | |
1035 | |
1036 if( !*ppStmt ){ | |
1037 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); | |
1038 if( rc!=SQLITE_OK ) return rc; | |
1039 } | |
1040 pStmt = *ppStmt; | |
1041 assert( sqlite3_data_count(pStmt)==1 ); | |
1042 | |
1043 a = sqlite3_column_blob(pStmt, 0); | |
1044 a += sqlite3Fts3GetVarint(a, &nDoc); | |
1045 if( nDoc==0 ) return FTS_CORRUPT_VTAB; | |
1046 *pnDoc = (u32)nDoc; | |
1047 | |
1048 if( paLen ) *paLen = a; | |
1049 return SQLITE_OK; | |
1050 } | |
1051 | |
1052 /* | |
1053 ** An instance of the following structure is used to store state while | |
1054 ** iterating through a multi-column position-list corresponding to the | |
1055 ** hits for a single phrase on a single row in order to calculate the | |
1056 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. | |
1057 */ | |
1058 typedef struct LcsIterator LcsIterator; | |
1059 struct LcsIterator { | |
1060 Fts3Expr *pExpr; /* Pointer to phrase expression */ | |
1061 int iPosOffset; /* Tokens count up to end of this phrase */ | |
1062 char *pRead; /* Cursor used to iterate through aDoclist */ | |
1063 int iPos; /* Current position */ | |
1064 }; | |
1065 | |
1066 /* | |
1067 ** If LcsIterator.iCol is set to the following value, the iterator has | |
1068 ** finished iterating through all offsets for all columns. | |
1069 */ | |
1070 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; | |
1071 | |
1072 static int fts3MatchinfoLcsCb( | |
1073 Fts3Expr *pExpr, /* Phrase expression node */ | |
1074 int iPhrase, /* Phrase number (numbered from zero) */ | |
1075 void *pCtx /* Pointer to MatchInfo structure */ | |
1076 ){ | |
1077 LcsIterator *aIter = (LcsIterator *)pCtx; | |
1078 aIter[iPhrase].pExpr = pExpr; | |
1079 return SQLITE_OK; | |
1080 } | |
1081 | |
1082 /* | |
1083 ** Advance the iterator passed as an argument to the next position. Return | |
1084 ** 1 if the iterator is at EOF or if it now points to the start of the | |
1085 ** position list for the next column. | |
1086 */ | |
1087 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ | |
1088 char *pRead = pIter->pRead; | |
1089 sqlite3_int64 iRead; | |
1090 int rc = 0; | |
1091 | |
1092 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
1093 if( iRead==0 || iRead==1 ){ | |
1094 pRead = 0; | |
1095 rc = 1; | |
1096 }else{ | |
1097 pIter->iPos += (int)(iRead-2); | |
1098 } | |
1099 | |
1100 pIter->pRead = pRead; | |
1101 return rc; | |
1102 } | |
1103 | |
1104 /* | |
1105 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. | |
1106 ** | |
1107 ** If the call is successful, the longest-common-substring lengths for each | |
1108 ** column are written into the first nCol elements of the pInfo->aMatchinfo[] | |
1109 ** array before returning. SQLITE_OK is returned in this case. | |
1110 ** | |
1111 ** Otherwise, if an error occurs, an SQLite error code is returned and the | |
1112 ** data written to the first nCol elements of pInfo->aMatchinfo[] is | |
1113 ** undefined. | |
1114 */ | |
1115 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ | |
1116 LcsIterator *aIter; | |
1117 int i; | |
1118 int iCol; | |
1119 int nToken = 0; | |
1120 | |
1121 /* Allocate and populate the array of LcsIterator objects. The array | |
1122 ** contains one element for each matchable phrase in the query. | |
1123 **/ | |
1124 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); | |
1125 if( !aIter ) return SQLITE_NOMEM; | |
1126 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); | |
1127 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); | |
1128 | |
1129 for(i=0; i<pInfo->nPhrase; i++){ | |
1130 LcsIterator *pIter = &aIter[i]; | |
1131 nToken -= pIter->pExpr->pPhrase->nToken; | |
1132 pIter->iPosOffset = nToken; | |
1133 } | |
1134 | |
1135 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
1136 int nLcs = 0; /* LCS value for this column */ | |
1137 int nLive = 0; /* Number of iterators in aIter not at EOF */ | |
1138 | |
1139 for(i=0; i<pInfo->nPhrase; i++){ | |
1140 int rc; | |
1141 LcsIterator *pIt = &aIter[i]; | |
1142 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); | |
1143 if( rc!=SQLITE_OK ) return rc; | |
1144 if( pIt->pRead ){ | |
1145 pIt->iPos = pIt->iPosOffset; | |
1146 fts3LcsIteratorAdvance(&aIter[i]); | |
1147 nLive++; | |
1148 } | |
1149 } | |
1150 | |
1151 while( nLive>0 ){ | |
1152 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ | |
1153 int nThisLcs = 0; /* LCS for the current iterator positions */ | |
1154 | |
1155 for(i=0; i<pInfo->nPhrase; i++){ | |
1156 LcsIterator *pIter = &aIter[i]; | |
1157 if( pIter->pRead==0 ){ | |
1158 /* This iterator is already at EOF for this column. */ | |
1159 nThisLcs = 0; | |
1160 }else{ | |
1161 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ | |
1162 pAdv = pIter; | |
1163 } | |
1164 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ | |
1165 nThisLcs++; | |
1166 }else{ | |
1167 nThisLcs = 1; | |
1168 } | |
1169 if( nThisLcs>nLcs ) nLcs = nThisLcs; | |
1170 } | |
1171 } | |
1172 if( fts3LcsIteratorAdvance(pAdv) ) nLive--; | |
1173 } | |
1174 | |
1175 pInfo->aMatchinfo[iCol] = nLcs; | |
1176 } | |
1177 | |
1178 sqlite3_free(aIter); | |
1179 return SQLITE_OK; | |
1180 } | |
1181 | |
1182 /* | |
1183 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to | |
1184 ** be returned by the matchinfo() function. Argument zArg contains the | |
1185 ** format string passed as the second argument to matchinfo (or the | |
1186 ** default value "pcx" if no second argument was specified). The format | |
1187 ** string has already been validated and the pInfo->aMatchinfo[] array | |
1188 ** is guaranteed to be large enough for the output. | |
1189 ** | |
1190 ** If bGlobal is true, then populate all fields of the matchinfo() output. | |
1191 ** If it is false, then assume that those fields that do not change between | |
1192 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) | |
1193 ** have already been populated. | |
1194 ** | |
1195 ** Return SQLITE_OK if successful, or an SQLite error code if an error | |
1196 ** occurs. If a value other than SQLITE_OK is returned, the state the | |
1197 ** pInfo->aMatchinfo[] buffer is left in is undefined. | |
1198 */ | |
1199 static int fts3MatchinfoValues( | |
1200 Fts3Cursor *pCsr, /* FTS3 cursor object */ | |
1201 int bGlobal, /* True to grab the global stats */ | |
1202 MatchInfo *pInfo, /* Matchinfo context object */ | |
1203 const char *zArg /* Matchinfo format string */ | |
1204 ){ | |
1205 int rc = SQLITE_OK; | |
1206 int i; | |
1207 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
1208 sqlite3_stmt *pSelect = 0; | |
1209 | |
1210 for(i=0; rc==SQLITE_OK && zArg[i]; i++){ | |
1211 pInfo->flag = zArg[i]; | |
1212 switch( zArg[i] ){ | |
1213 case FTS3_MATCHINFO_NPHRASE: | |
1214 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; | |
1215 break; | |
1216 | |
1217 case FTS3_MATCHINFO_NCOL: | |
1218 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; | |
1219 break; | |
1220 | |
1221 case FTS3_MATCHINFO_NDOC: | |
1222 if( bGlobal ){ | |
1223 sqlite3_int64 nDoc = 0; | |
1224 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); | |
1225 pInfo->aMatchinfo[0] = (u32)nDoc; | |
1226 } | |
1227 break; | |
1228 | |
1229 case FTS3_MATCHINFO_AVGLENGTH: | |
1230 if( bGlobal ){ | |
1231 sqlite3_int64 nDoc; /* Number of rows in table */ | |
1232 const char *a; /* Aggregate column length array */ | |
1233 | |
1234 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); | |
1235 if( rc==SQLITE_OK ){ | |
1236 int iCol; | |
1237 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
1238 u32 iVal; | |
1239 sqlite3_int64 nToken; | |
1240 a += sqlite3Fts3GetVarint(a, &nToken); | |
1241 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); | |
1242 pInfo->aMatchinfo[iCol] = iVal; | |
1243 } | |
1244 } | |
1245 } | |
1246 break; | |
1247 | |
1248 case FTS3_MATCHINFO_LENGTH: { | |
1249 sqlite3_stmt *pSelectDocsize = 0; | |
1250 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); | |
1251 if( rc==SQLITE_OK ){ | |
1252 int iCol; | |
1253 const char *a = sqlite3_column_blob(pSelectDocsize, 0); | |
1254 for(iCol=0; iCol<pInfo->nCol; iCol++){ | |
1255 sqlite3_int64 nToken; | |
1256 a += sqlite3Fts3GetVarint(a, &nToken); | |
1257 pInfo->aMatchinfo[iCol] = (u32)nToken; | |
1258 } | |
1259 } | |
1260 sqlite3_reset(pSelectDocsize); | |
1261 break; | |
1262 } | |
1263 | |
1264 case FTS3_MATCHINFO_LCS: | |
1265 rc = fts3ExprLoadDoclists(pCsr, 0, 0); | |
1266 if( rc==SQLITE_OK ){ | |
1267 rc = fts3MatchinfoLcs(pCsr, pInfo); | |
1268 } | |
1269 break; | |
1270 | |
1271 case FTS3_MATCHINFO_LHITS_BM: | |
1272 case FTS3_MATCHINFO_LHITS: { | |
1273 int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); | |
1274 memset(pInfo->aMatchinfo, 0, nZero); | |
1275 fts3ExprLHitGather(pCsr->pExpr, pInfo); | |
1276 break; | |
1277 } | |
1278 | |
1279 default: { | |
1280 Fts3Expr *pExpr; | |
1281 assert( zArg[i]==FTS3_MATCHINFO_HITS ); | |
1282 pExpr = pCsr->pExpr; | |
1283 rc = fts3ExprLoadDoclists(pCsr, 0, 0); | |
1284 if( rc!=SQLITE_OK ) break; | |
1285 if( bGlobal ){ | |
1286 if( pCsr->pDeferred ){ | |
1287 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); | |
1288 if( rc!=SQLITE_OK ) break; | |
1289 } | |
1290 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); | |
1291 sqlite3Fts3EvalTestDeferred(pCsr, &rc); | |
1292 if( rc!=SQLITE_OK ) break; | |
1293 } | |
1294 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); | |
1295 break; | |
1296 } | |
1297 } | |
1298 | |
1299 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); | |
1300 } | |
1301 | |
1302 sqlite3_reset(pSelect); | |
1303 return rc; | |
1304 } | |
1305 | |
1306 | |
1307 /* | |
1308 ** Populate pCsr->aMatchinfo[] with data for the current row. The | |
1309 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). | |
1310 */ | |
1311 static void fts3GetMatchinfo( | |
1312 sqlite3_context *pCtx, /* Return results here */ | |
1313 Fts3Cursor *pCsr, /* FTS3 Cursor object */ | |
1314 const char *zArg /* Second argument to matchinfo() function */ | |
1315 ){ | |
1316 MatchInfo sInfo; | |
1317 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
1318 int rc = SQLITE_OK; | |
1319 int bGlobal = 0; /* Collect 'global' stats as well as local */ | |
1320 | |
1321 u32 *aOut = 0; | |
1322 void (*xDestroyOut)(void*) = 0; | |
1323 | |
1324 memset(&sInfo, 0, sizeof(MatchInfo)); | |
1325 sInfo.pCursor = pCsr; | |
1326 sInfo.nCol = pTab->nColumn; | |
1327 | |
1328 /* If there is cached matchinfo() data, but the format string for the | |
1329 ** cache does not match the format string for this request, discard | |
1330 ** the cached data. */ | |
1331 if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ | |
1332 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); | |
1333 pCsr->pMIBuffer = 0; | |
1334 } | |
1335 | |
1336 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the | |
1337 ** matchinfo function has been called for this query. In this case | |
1338 ** allocate the array used to accumulate the matchinfo data and | |
1339 ** initialize those elements that are constant for every row. | |
1340 */ | |
1341 if( pCsr->pMIBuffer==0 ){ | |
1342 int nMatchinfo = 0; /* Number of u32 elements in match-info */ | |
1343 int i; /* Used to iterate through zArg */ | |
1344 | |
1345 /* Determine the number of phrases in the query */ | |
1346 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); | |
1347 sInfo.nPhrase = pCsr->nPhrase; | |
1348 | |
1349 /* Determine the number of integers in the buffer returned by this call. */ | |
1350 for(i=0; zArg[i]; i++){ | |
1351 char *zErr = 0; | |
1352 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ | |
1353 sqlite3_result_error(pCtx, zErr, -1); | |
1354 sqlite3_free(zErr); | |
1355 return; | |
1356 } | |
1357 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); | |
1358 } | |
1359 | |
1360 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ | |
1361 pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); | |
1362 if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; | |
1363 | |
1364 pCsr->isMatchinfoNeeded = 1; | |
1365 bGlobal = 1; | |
1366 } | |
1367 | |
1368 if( rc==SQLITE_OK ){ | |
1369 xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); | |
1370 if( xDestroyOut==0 ){ | |
1371 rc = SQLITE_NOMEM; | |
1372 } | |
1373 } | |
1374 | |
1375 if( rc==SQLITE_OK ){ | |
1376 sInfo.aMatchinfo = aOut; | |
1377 sInfo.nPhrase = pCsr->nPhrase; | |
1378 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); | |
1379 if( bGlobal ){ | |
1380 fts3MIBufferSetGlobal(pCsr->pMIBuffer); | |
1381 } | |
1382 } | |
1383 | |
1384 if( rc!=SQLITE_OK ){ | |
1385 sqlite3_result_error_code(pCtx, rc); | |
1386 if( xDestroyOut ) xDestroyOut(aOut); | |
1387 }else{ | |
1388 int n = pCsr->pMIBuffer->nElem * sizeof(u32); | |
1389 sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); | |
1390 } | |
1391 } | |
1392 | |
1393 /* | |
1394 ** Implementation of snippet() function. | |
1395 */ | |
1396 void sqlite3Fts3Snippet( | |
1397 sqlite3_context *pCtx, /* SQLite function call context */ | |
1398 Fts3Cursor *pCsr, /* Cursor object */ | |
1399 const char *zStart, /* Snippet start text - "<b>" */ | |
1400 const char *zEnd, /* Snippet end text - "</b>" */ | |
1401 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ | |
1402 int iCol, /* Extract snippet from this column */ | |
1403 int nToken /* Approximate number of tokens in snippet */ | |
1404 ){ | |
1405 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
1406 int rc = SQLITE_OK; | |
1407 int i; | |
1408 StrBuffer res = {0, 0, 0}; | |
1409 | |
1410 /* The returned text includes up to four fragments of text extracted from | |
1411 ** the data in the current row. The first iteration of the for(...) loop | |
1412 ** below attempts to locate a single fragment of text nToken tokens in | |
1413 ** size that contains at least one instance of all phrases in the query | |
1414 ** expression that appear in the current row. If such a fragment of text | |
1415 ** cannot be found, the second iteration of the loop attempts to locate | |
1416 ** a pair of fragments, and so on. | |
1417 */ | |
1418 int nSnippet = 0; /* Number of fragments in this snippet */ | |
1419 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ | |
1420 int nFToken = -1; /* Number of tokens in each fragment */ | |
1421 | |
1422 if( !pCsr->pExpr ){ | |
1423 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | |
1424 return; | |
1425 } | |
1426 | |
1427 for(nSnippet=1; 1; nSnippet++){ | |
1428 | |
1429 int iSnip; /* Loop counter 0..nSnippet-1 */ | |
1430 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ | |
1431 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ | |
1432 | |
1433 if( nToken>=0 ){ | |
1434 nFToken = (nToken+nSnippet-1) / nSnippet; | |
1435 }else{ | |
1436 nFToken = -1 * nToken; | |
1437 } | |
1438 | |
1439 for(iSnip=0; iSnip<nSnippet; iSnip++){ | |
1440 int iBestScore = -1; /* Best score of columns checked so far */ | |
1441 int iRead; /* Used to iterate through columns */ | |
1442 SnippetFragment *pFragment = &aSnippet[iSnip]; | |
1443 | |
1444 memset(pFragment, 0, sizeof(*pFragment)); | |
1445 | |
1446 /* Loop through all columns of the table being considered for snippets. | |
1447 ** If the iCol argument to this function was negative, this means all | |
1448 ** columns of the FTS3 table. Otherwise, only column iCol is considered. | |
1449 */ | |
1450 for(iRead=0; iRead<pTab->nColumn; iRead++){ | |
1451 SnippetFragment sF = {0, 0, 0, 0}; | |
1452 int iS = 0; | |
1453 if( iCol>=0 && iRead!=iCol ) continue; | |
1454 | |
1455 /* Find the best snippet of nFToken tokens in column iRead. */ | |
1456 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); | |
1457 if( rc!=SQLITE_OK ){ | |
1458 goto snippet_out; | |
1459 } | |
1460 if( iS>iBestScore ){ | |
1461 *pFragment = sF; | |
1462 iBestScore = iS; | |
1463 } | |
1464 } | |
1465 | |
1466 mCovered |= pFragment->covered; | |
1467 } | |
1468 | |
1469 /* If all query phrases seen by fts3BestSnippet() are present in at least | |
1470 ** one of the nSnippet snippet fragments, break out of the loop. | |
1471 */ | |
1472 assert( (mCovered&mSeen)==mCovered ); | |
1473 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; | |
1474 } | |
1475 | |
1476 assert( nFToken>0 ); | |
1477 | |
1478 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ | |
1479 rc = fts3SnippetText(pCsr, &aSnippet[i], | |
1480 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res | |
1481 ); | |
1482 } | |
1483 | |
1484 snippet_out: | |
1485 sqlite3Fts3SegmentsClose(pTab); | |
1486 if( rc!=SQLITE_OK ){ | |
1487 sqlite3_result_error_code(pCtx, rc); | |
1488 sqlite3_free(res.z); | |
1489 }else{ | |
1490 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); | |
1491 } | |
1492 } | |
1493 | |
1494 | |
1495 typedef struct TermOffset TermOffset; | |
1496 typedef struct TermOffsetCtx TermOffsetCtx; | |
1497 | |
1498 struct TermOffset { | |
1499 char *pList; /* Position-list */ | |
1500 int iPos; /* Position just read from pList */ | |
1501 int iOff; /* Offset of this term from read positions */ | |
1502 }; | |
1503 | |
1504 struct TermOffsetCtx { | |
1505 Fts3Cursor *pCsr; | |
1506 int iCol; /* Column of table to populate aTerm for */ | |
1507 int iTerm; | |
1508 sqlite3_int64 iDocid; | |
1509 TermOffset *aTerm; | |
1510 }; | |
1511 | |
1512 /* | |
1513 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). | |
1514 */ | |
1515 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ | |
1516 TermOffsetCtx *p = (TermOffsetCtx *)ctx; | |
1517 int nTerm; /* Number of tokens in phrase */ | |
1518 int iTerm; /* For looping through nTerm phrase terms */ | |
1519 char *pList; /* Pointer to position list for phrase */ | |
1520 int iPos = 0; /* First position in position-list */ | |
1521 int rc; | |
1522 | |
1523 UNUSED_PARAMETER(iPhrase); | |
1524 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); | |
1525 nTerm = pExpr->pPhrase->nToken; | |
1526 if( pList ){ | |
1527 fts3GetDeltaPosition(&pList, &iPos); | |
1528 assert( iPos>=0 ); | |
1529 } | |
1530 | |
1531 for(iTerm=0; iTerm<nTerm; iTerm++){ | |
1532 TermOffset *pT = &p->aTerm[p->iTerm++]; | |
1533 pT->iOff = nTerm-iTerm-1; | |
1534 pT->pList = pList; | |
1535 pT->iPos = iPos; | |
1536 } | |
1537 | |
1538 return rc; | |
1539 } | |
1540 | |
1541 /* | |
1542 ** Implementation of offsets() function. | |
1543 */ | |
1544 void sqlite3Fts3Offsets( | |
1545 sqlite3_context *pCtx, /* SQLite function call context */ | |
1546 Fts3Cursor *pCsr /* Cursor object */ | |
1547 ){ | |
1548 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
1549 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; | |
1550 int rc; /* Return Code */ | |
1551 int nToken; /* Number of tokens in query */ | |
1552 int iCol; /* Column currently being processed */ | |
1553 StrBuffer res = {0, 0, 0}; /* Result string */ | |
1554 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ | |
1555 | |
1556 if( !pCsr->pExpr ){ | |
1557 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | |
1558 return; | |
1559 } | |
1560 | |
1561 memset(&sCtx, 0, sizeof(sCtx)); | |
1562 assert( pCsr->isRequireSeek==0 ); | |
1563 | |
1564 /* Count the number of terms in the query */ | |
1565 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); | |
1566 if( rc!=SQLITE_OK ) goto offsets_out; | |
1567 | |
1568 /* Allocate the array of TermOffset iterators. */ | |
1569 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); | |
1570 if( 0==sCtx.aTerm ){ | |
1571 rc = SQLITE_NOMEM; | |
1572 goto offsets_out; | |
1573 } | |
1574 sCtx.iDocid = pCsr->iPrevId; | |
1575 sCtx.pCsr = pCsr; | |
1576 | |
1577 /* Loop through the table columns, appending offset information to | |
1578 ** string-buffer res for each column. | |
1579 */ | |
1580 for(iCol=0; iCol<pTab->nColumn; iCol++){ | |
1581 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ | |
1582 const char *ZDUMMY; /* Dummy argument used with xNext() */ | |
1583 int NDUMMY = 0; /* Dummy argument used with xNext() */ | |
1584 int iStart = 0; | |
1585 int iEnd = 0; | |
1586 int iCurrent = 0; | |
1587 const char *zDoc; | |
1588 int nDoc; | |
1589 | |
1590 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is | |
1591 ** no way that this operation can fail, so the return code from | |
1592 ** fts3ExprIterate() can be discarded. | |
1593 */ | |
1594 sCtx.iCol = iCol; | |
1595 sCtx.iTerm = 0; | |
1596 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); | |
1597 | |
1598 /* Retreive the text stored in column iCol. If an SQL NULL is stored | |
1599 ** in column iCol, jump immediately to the next iteration of the loop. | |
1600 ** If an OOM occurs while retrieving the data (this can happen if SQLite | |
1601 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM | |
1602 ** to the caller. | |
1603 */ | |
1604 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); | |
1605 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); | |
1606 if( zDoc==0 ){ | |
1607 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ | |
1608 continue; | |
1609 } | |
1610 rc = SQLITE_NOMEM; | |
1611 goto offsets_out; | |
1612 } | |
1613 | |
1614 /* Initialize a tokenizer iterator to iterate through column iCol. */ | |
1615 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, | |
1616 zDoc, nDoc, &pC | |
1617 ); | |
1618 if( rc!=SQLITE_OK ) goto offsets_out; | |
1619 | |
1620 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | |
1621 while( rc==SQLITE_OK ){ | |
1622 int i; /* Used to loop through terms */ | |
1623 int iMinPos = 0x7FFFFFFF; /* Position of next token */ | |
1624 TermOffset *pTerm = 0; /* TermOffset associated with next token */ | |
1625 | |
1626 for(i=0; i<nToken; i++){ | |
1627 TermOffset *pT = &sCtx.aTerm[i]; | |
1628 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ | |
1629 iMinPos = pT->iPos-pT->iOff; | |
1630 pTerm = pT; | |
1631 } | |
1632 } | |
1633 | |
1634 if( !pTerm ){ | |
1635 /* All offsets for this column have been gathered. */ | |
1636 rc = SQLITE_DONE; | |
1637 }else{ | |
1638 assert( iCurrent<=iMinPos ); | |
1639 if( 0==(0xFE&*pTerm->pList) ){ | |
1640 pTerm->pList = 0; | |
1641 }else{ | |
1642 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); | |
1643 } | |
1644 while( rc==SQLITE_OK && iCurrent<iMinPos ){ | |
1645 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | |
1646 } | |
1647 if( rc==SQLITE_OK ){ | |
1648 char aBuffer[64]; | |
1649 sqlite3_snprintf(sizeof(aBuffer), aBuffer, | |
1650 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart | |
1651 ); | |
1652 rc = fts3StringAppend(&res, aBuffer, -1); | |
1653 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ | |
1654 rc = FTS_CORRUPT_VTAB; | |
1655 } | |
1656 } | |
1657 } | |
1658 if( rc==SQLITE_DONE ){ | |
1659 rc = SQLITE_OK; | |
1660 } | |
1661 | |
1662 pMod->xClose(pC); | |
1663 if( rc!=SQLITE_OK ) goto offsets_out; | |
1664 } | |
1665 | |
1666 offsets_out: | |
1667 sqlite3_free(sCtx.aTerm); | |
1668 assert( rc!=SQLITE_DONE ); | |
1669 sqlite3Fts3SegmentsClose(pTab); | |
1670 if( rc!=SQLITE_OK ){ | |
1671 sqlite3_result_error_code(pCtx, rc); | |
1672 sqlite3_free(res.z); | |
1673 }else{ | |
1674 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); | |
1675 } | |
1676 return; | |
1677 } | |
1678 | |
1679 /* | |
1680 ** Implementation of matchinfo() function. | |
1681 */ | |
1682 void sqlite3Fts3Matchinfo( | |
1683 sqlite3_context *pContext, /* Function call context */ | |
1684 Fts3Cursor *pCsr, /* FTS3 table cursor */ | |
1685 const char *zArg /* Second arg to matchinfo() function */ | |
1686 ){ | |
1687 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | |
1688 const char *zFormat; | |
1689 | |
1690 if( zArg ){ | |
1691 zFormat = zArg; | |
1692 }else{ | |
1693 zFormat = FTS3_MATCHINFO_DEFAULT; | |
1694 } | |
1695 | |
1696 if( !pCsr->pExpr ){ | |
1697 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); | |
1698 return; | |
1699 }else{ | |
1700 /* Retrieve matchinfo() data. */ | |
1701 fts3GetMatchinfo(pContext, pCsr, zFormat); | |
1702 sqlite3Fts3SegmentsClose(pTab); | |
1703 } | |
1704 } | |
1705 | |
1706 #endif | |
OLD | NEW |