Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(114)

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5tokenizer.test

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # 2014 Dec 20
2 #
3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
5 #
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
9 #
10 #***********************************************************************
11 #
12 # Tests focusing on the built-in fts5 tokenizers.
13 #
14
15 source [file join [file dirname [info script]] fts5_common.tcl]
16 set testprefix fts5tokenizer
17
18 # If SQLITE_ENABLE_FTS5 is defined, omit this file.
19 ifcapable !fts5 {
20 finish_test
21 return
22 }
23
24
25 do_execsql_test 1.0 {
26 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
27 DROP TABLE ft1;
28 }
29 do_execsql_test 1.1 {
30 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter');
31 DROP TABLE ft1;
32 }
33 do_execsql_test 1.2 {
34 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter);
35 DROP TABLE ft1;
36 }
37 do_execsql_test 1.3 {
38 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter');
39 DROP TABLE ft1;
40 }
41 do_execsql_test 1.4 {
42 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii');
43 DROP TABLE ft1;
44 }
45
46 do_catchsql_test 1.5 {
47 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch');
48 } {1 {no such tokenizer: nosuch}}
49
50 do_catchsql_test 1.6 {
51 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch');
52 } {1 {error in tokenizer constructor}}
53
54 do_execsql_test 2.0 {
55 CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
56 INSERT INTO ft1 VALUES('embedded databases');
57 }
58 do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1
59 do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1
60 do_execsql_test 2.3 {
61 SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding'
62 } 1
63
64 proc tcl_create {args} {
65 set ::targs $args
66 error "failed"
67 }
68 sqlite3_fts5_create_tokenizer db tcl tcl_create
69
70 foreach {tn directive expected} {
71 1 {tokenize='tcl a b c'} {a b c}
72 2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f}
73 3 {tokenize="tcl 'g' 'h' 'i'"} {g h i}
74 4 {tokenize = tcl} {}
75 } {
76 do_catchsql_test 3.$tn.1 "
77 CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive)
78 " {1 {error in tokenizer constructor}}
79 do_test 3.$tn.2 { set ::targs } $expected
80 }
81
82 do_catchsql_test 4.1 {
83 CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc);
84 } {1 {parse error in "tokenize = tcl abc"}}
85 do_catchsql_test 4.2 {
86 CREATE VIRTUAL TABLE ft2 USING fts5(x y)
87 } {1 {unrecognized column option: y}}
88
89 #-------------------------------------------------------------------------
90 # Test the "separators" and "tokenchars" options a bit.
91 #
92 foreach {tn tokenizer} {1 ascii 2 unicode61} {
93 reset_db
94 set T "$tokenizer tokenchars ',.:' separators 'xyz'"
95 execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")"
96 do_execsql_test 5.$tn.1 {
97 INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz');
98 }
99 foreach {tn2 token res} {
100 1 abc 1 2 def 1 3 ghi 1 4 jkl {}
101 5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1
102 9 vw 1
103 } {
104 do_execsql_test 5.$tn.2.$tn2 "
105 SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"'
106 " $res
107 }
108 }
109
110 #-------------------------------------------------------------------------
111 # Miscellaneous tests for the ascii tokenizer.
112 #
113 # 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the
114 # 'separators' option. But unicode61 does not.
115 #
116 # 5.2.*: An option without an argument is an error.
117 #
118
119 do_test 5.1.1 {
120 execsql "
121 CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`);
122 INSERT INTO a1 VALUES('abc\u1234def');
123 "
124 execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' }
125 } {}
126
127 do_test 5.1.2 {
128 execsql "
129 CREATE VIRTUAL TABLE a2 USING fts5(
130 x, tokenize=`unicode61 separators '\u1234'`);
131 INSERT INTO a2 VALUES('abc\u1234def');
132 "
133 execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' }
134 } {1}
135
136 do_catchsql_test 5.2 {
137 CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars');
138 } {1 {error in tokenizer constructor}}
139 do_catchsql_test 5.3 {
140 CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg');
141 } {1 {error in tokenizer constructor}}
142
143 #-------------------------------------------------------------------------
144 # Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE
145 # correctly.
146 #
147
148 proc test_token_cb {varname token iStart iEnd} {
149 upvar $varname var
150 lappend var $token
151 if {[llength $var]==3} { return "SQLITE_DONE" }
152 return "SQLITE_OK"
153 }
154
155 proc tokenize {cmd} {
156 set res [list]
157 $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res]
158 set res
159 }
160 sqlite3_fts5_create_function db tokenize tokenize
161
162 do_execsql_test 6.0 {
163 CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii);
164 INSERT INTO x1 VALUES('q w e r t y');
165 INSERT INTO x1 VALUES('y t r e w q');
166 SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r';
167 } {
168 {q w e} {y t r}
169 }
170
171 do_execsql_test 6.1 {
172 CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61);
173 INSERT INTO x2 VALUES('q w e r t y');
174 INSERT INTO x2 VALUES('y t r e w q');
175 SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r';
176 } {
177 {q w e} {y t r}
178 }
179
180
181 #-------------------------------------------------------------------------
182 # Miscellaneous tests for the unicode tokenizer.
183 #
184 do_catchsql_test 6.1 {
185 CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
186 } {1 {error in tokenizer constructor}}
187 do_catchsql_test 6.2 {
188 CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
189 } {1 {error in tokenizer constructor}}
190 do_catchsql_test 6.3 {
191 CREATE VIRTUAL TABLE a3 USING fts5(
192 x, y, tokenize = 'unicode61 remove_diacritics 2'
193 );
194 } {1 {error in tokenizer constructor}}
195 do_catchsql_test 6.4 {
196 CREATE VIRTUAL TABLE a3 USING fts5(
197 x, y, tokenize = 'unicode61 remove_diacritics 10'
198 );
199 } {1 {error in tokenizer constructor}}
200
201 #-------------------------------------------------------------------------
202 # Porter tokenizer with very large tokens.
203 #
204 set a [string repeat a 100]
205 set b [string repeat b 500]
206 set c [string repeat c 1000]
207 do_execsql_test 7.0 {
208 CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter);
209 INSERT INTO e5 VALUES($a || ' ' || $b);
210 INSERT INTO e5 VALUES($b || ' ' || $c);
211 INSERT INTO e5 VALUES($c || ' ' || $a);
212 }
213
214 do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 }
215 do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
216 do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }
217
218 #-------------------------------------------------------------------------
219 # Test the 'separators' option with the unicode61 tokenizer.
220 #
221 do_execsql_test 8.1 {
222 BEGIN;
223 CREATE VIRTUAL TABLE e6 USING fts5(x,
224 tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
225 );
226 INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
227 CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
228 SELECT term FROM e7;
229 ROLLBACK;
230 } {
231 brown dog fox jumped lazy over quick the
232 }
233
234 do_execsql_test 8.2 [subst {
235 BEGIN;
236 CREATE VIRTUAL TABLE e6 USING fts5(x,
237 tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'"
238 );
239 INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01'
240 || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog'
241 );
242 INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09');
243 CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
244 SELECT term FROM e7;
245 ROLLBACK;
246 }] [subst {
247 brown dog fox jumped lazy over quick the \u0E08 \u0E09
248 }]
249
250 # Test that the porter tokenizer correctly passes arguments through to
251 # its parent tokenizer.
252 do_execsql_test 8.3 {
253 BEGIN;
254 CREATE VIRTUAL TABLE e6 USING fts5(x,
255 tokenize="porter unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
256 );
257 INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
258 CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
259 SELECT term FROM e7;
260 ROLLBACK;
261 } {
262 brown dog fox jump lazi over quick the
263 }
264
265 #-------------------------------------------------------------------------
266 # Check that the FTS5_TOKENIZE_PREFIX flag is passed to the tokenizer
267 # implementation.
268 #
269 reset_db
270 proc tcl_create {args} { return "tcl_tokenize" }
271 sqlite3_fts5_create_tokenizer db tcl tcl_create
272 set ::flags [list]
273 proc tcl_tokenize {tflags text} {
274 lappend ::flags $tflags
275 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
276 sqlite3_fts5_token $w $iStart $iEnd
277 }
278 }
279
280 do_execsql_test 9.1.1 {
281 CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl);
282 INSERT INTO t1 VALUES('abc');
283 INSERT INTO t1 VALUES('xyz');
284 } {}
285 do_test 9.1.2 { set ::flags } {document document}
286
287 set ::flags [list]
288 do_execsql_test 9.2.1 { SELECT * FROM t1('abc'); } {abc}
289 do_test 9.2.2 { set ::flags } {query}
290
291 set ::flags [list]
292 do_execsql_test 9.3.1 { SELECT * FROM t1('ab*'); } {abc}
293 do_test 9.3.2 { set ::flags } {prefixquery}
294
295 set ::flags [list]
296 do_execsql_test 9.4.1 { SELECT * FROM t1('"abc xyz" *'); } {}
297 do_test 9.4.2 { set ::flags } {prefixquery}
298
299 set ::flags [list]
300 do_execsql_test 9.5.1 { SELECT * FROM t1('"abc xyz*"'); } {}
301 do_test 9.5.2 { set ::flags } {query}
302
303
304 finish_test
305
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698