OLD | NEW |
| (Empty) |
1 From 07efd3db8276f343fa9762a11ca3114ba8ffe532 Mon Sep 17 00:00:00 2001 | |
2 From: Matt Perry <mpcomplete@google.com> | |
3 Date: Wed, 17 Dec 2008 00:50:50 +0000 | |
4 Subject: [PATCH 20/23] [fts2] Interpret "foo*" as a prefix search. | |
5 | |
6 By default it interpreted it as "foo *" (two tokens). | |
7 | |
8 Original review URL: http://codereview.chromium.org/14176 | |
9 --- | |
10 third_party/sqlite/src/ext/fts2/fts2.c | 15 +++++++++++++++ | |
11 1 file changed, 15 insertions(+) | |
12 | |
13 diff --git a/third_party/sqlite/src/ext/fts2/fts2.c b/third_party/sqlite/src/ext
/fts2/fts2.c | |
14 index 4945cd9..7d07137 100644 | |
15 --- a/third_party/sqlite/src/ext/fts2/fts2.c | |
16 +++ b/third_party/sqlite/src/ext/fts2/fts2.c | |
17 @@ -3558,6 +3558,7 @@ static int tokenizeSegment( | |
18 int firstIndex = pQuery->nTerms; | |
19 int iCol; | |
20 int nTerm = 1; | |
21 + int iEndLast = -1; | |
22 | |
23 int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); | |
24 if( rc!=SQLITE_OK ) return rc; | |
25 @@ -3582,6 +3583,20 @@ static int tokenizeSegment( | |
26 pQuery->nextIsOr = 1; | |
27 continue; | |
28 } | |
29 + | |
30 + /* | |
31 + * The ICU tokenizer considers '*' a break character, so the code below | |
32 + * sets isPrefix correctly, but since that code doesn't eat the '*', the | |
33 + * ICU tokenizer returns it as the next token. So eat it here until a | |
34 + * better solution presents itself. | |
35 + */ | |
36 + if( pQuery->nTerms>0 && nToken==1 && pSegment[iBegin]=='*' && | |
37 + iEndLast==iBegin){ | |
38 + pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1; | |
39 + continue; | |
40 + } | |
41 + iEndLast = iEnd; | |
42 + | |
43 queryAdd(pQuery, pToken, nToken); | |
44 if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ | |
45 pQuery->pTerms[pQuery->nTerms-1].isNot = 1; | |
46 -- | |
47 2.2.1 | |
48 | |
OLD | NEW |