Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5synonym.test

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # 2014 Dec 20
2 #
3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
5 #
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
9 #
10 #***********************************************************************
11 #
12 # Tests focusing on custom tokenizers that support synonyms.
13 #
14
15 source [file join [file dirname [info script]] fts5_common.tcl]
16 set testprefix fts5synonym
17
18 # If SQLITE_ENABLE_FTS5 is defined, omit this file.
19 ifcapable !fts5 {
20 finish_test
21 return
22 }
23
24 foreach S {
25 {zero 0}
26 {one 1 i}
27 {two 2 ii}
28 {three 3 iii}
29 {four 4 iv}
30 {five 5 v}
31 {six 6 vi}
32 {seven 7 vii}
33 {eight 8 viii}
34 {nine 9 ix}
35 } {
36 foreach s $S {
37 set o [list]
38 foreach x $S {if {$x!=$s} {lappend o $x}}
39 set ::syn($s) $o
40 }
41 }
42
43 proc tcl_tokenize {tflags text} {
44 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
45 sqlite3_fts5_token $w $iStart $iEnd
46 }
47 }
48
49 proc tcl_create {args} {
50 return "tcl_tokenize"
51 }
52
53 sqlite3_fts5_create_tokenizer db tcl tcl_create
54
55 #-------------------------------------------------------------------------
56 # Warm body test for the code in fts5_tcl.c.
57 #
58 do_execsql_test 1.0 {
59 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
60 INSERT INTO ft VALUES('abc def ghi');
61 INSERT INTO ft VALUES('jkl mno pqr');
62 SELECT rowid, x FROM ft WHERE ft MATCH 'def';
63 SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
64 } {1 {abc def ghi} {jkl mno pqr} 2}
65
66 #-------------------------------------------------------------------------
67 # Test a tokenizer that supports synonyms by adding extra entries to the
68 # FTS index.
69 #
70
71 proc tcl_tokenize {tflags text} {
72 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
73 sqlite3_fts5_token $w $iStart $iEnd
74 if {$tflags=="document" && [info exists ::syn($w)]} {
75 foreach s $::syn($w) {
76 sqlite3_fts5_token -colo $s $iStart $iEnd
77 }
78 }
79 }
80 }
81 reset_db
82 sqlite3_fts5_create_tokenizer db tcl tcl_create
83
84 do_execsql_test 2.0 {
85 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
86 INSERT INTO ft VALUES('one two three');
87 INSERT INTO ft VALUES('four five six');
88 INSERT INTO ft VALUES('eight nine ten');
89 } {}
90
91 foreach {tn expr res} {
92 1 "3" 1
93 2 "eight OR 8 OR 5" {2 3}
94 3 "10" {}
95 4 "1*" {1}
96 5 "1 + 2" {1}
97 } {
98 do_execsql_test 2.1.$tn {
99 SELECT rowid FROM ft WHERE ft MATCH $expr
100 } $res
101 }
102
103 #-------------------------------------------------------------------------
104 # Test some broken tokenizers:
105 #
106 # 3.1.*: A tokenizer that declares the very first token to be colocated.
107 #
108 # 3.2.*: A tokenizer that reports two identical tokens at the same position.
109 # This is allowed.
110 #
111 reset_db
112 sqlite3_fts5_create_tokenizer db tcl tcl_create
113 proc tcl_tokenize {tflags text} {
114 set bColo 1
115 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
116 if {$bColo} {
117 sqlite3_fts5_token -colo $w $iStart $iEnd
118 set bColo 0
119 } {
120 sqlite3_fts5_token $w $iStart $iEnd
121 }
122 }
123 }
124 do_execsql_test 3.1.0 {
125 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
126 INSERT INTO ft VALUES('one two three');
127 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
128 SELECT * FROM vv;
129 } {
130 one 1 1 three 1 1 two 1 1
131 }
132
133 do_execsql_test 3.1.1 {
134 INSERT INTO ft(ft) VALUES('integrity-check');
135 } {}
136
137 proc tcl_tokenize {tflags text} {
138 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
139 sqlite3_fts5_token $w $iStart $iEnd
140 }
141 }
142
143 do_execsql_test 3.1.2 {
144 SELECT rowid FROM ft WHERE ft MATCH 'one two three'
145 } {1}
146
147 reset_db
148 sqlite3_fts5_create_tokenizer db tcl tcl_create
149 proc tcl_tokenize {tflags text} {
150 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
151 sqlite3_fts5_token $w $iStart $iEnd
152 sqlite3_fts5_token -colo $w $iStart $iEnd
153 }
154 }
155 do_execsql_test 3.2.0 {
156 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
157 INSERT INTO ft VALUES('one one two three');
158 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
159 SELECT * FROM vv;
160 } {
161 one 1 4 three 1 2 two 1 2
162 }
163 do_execsql_test 3.2.1 {
164 SELECT rowid FROM ft WHERE ft MATCH 'one';
165 } {1}
166 do_execsql_test 3.2.2 {
167 SELECT rowid FROM ft WHERE ft MATCH 'one two three';
168 } {1}
169 do_execsql_test 3.2.3 {
170 SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
171 } {1}
172 do_execsql_test 3.2.4 {
173 SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
174 } {1}
175 do_execsql_test 3.2.5 {
176 SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
177 } {}
178
179 #-------------------------------------------------------------------------
180 # Check that expressions with synonyms can be parsed and executed.
181 #
182 reset_db
183 sqlite3_fts5_create_tokenizer db tcl tcl_create
184 proc tcl_tokenize {tflags text} {
185 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
186 sqlite3_fts5_token $w $iStart $iEnd
187 if {$tflags=="query" && [info exists ::syn($w)]} {
188 foreach s $::syn($w) {
189 sqlite3_fts5_token -colo $s $iStart $iEnd
190 }
191 }
192 }
193 }
194
195 foreach {tn expr res} {
196 1 {abc} {"abc"}
197 2 {one} {"one"|"i"|"1"}
198 3 {3} {"3"|"iii"|"three"}
199 4 {3*} {"3"|"iii"|"three" *}
200 } {
201 do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]
202 }
203
204 do_execsql_test 4.2.1 {
205 CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
206 INSERT INTO xx VALUES('one two');
207 INSERT INTO xx VALUES('three four');
208 }
209
210 do_execsql_test 4.2.2 {
211 SELECT rowid FROM xx WHERE xx MATCH '2'
212 } {1}
213
214 do_execsql_test 4.2.3 {
215 SELECT rowid FROM xx WHERE xx MATCH '3'
216 } {2}
217
218 do_test 5.0 {
219 execsql {
220 CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
221 }
222 foreach {rowid a b} {
223 1 {four v 4 i three} {1 3 five five 4 one}
224 2 {5 1 3 4 i} {2 2 v two 4}
225 3 {5 i 5 2 four 4 1} {iii ii five two 1}
226 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
227 5 {three i v i four 4 1} {ii five five five iii}
228 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
229 7 {ii ii two three 2 5} {iii i ii iii iii one one}
230 8 {2 ii i two 3 three 2} {two iv v iii 3 five}
231 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
232 } {
233 execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
234 }
235 } {}
236
237
238 foreach {tn q res} {
239 1 {one} {
240 1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
241 2 {5 [1] 3 4 [i]} {2 2 v two 4}
242 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
243 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
244 5 {three [i] v [i] four 4 [1]} {ii five five five iii}
245 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
246 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
247 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
248 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
249 }
250 2 {five four} {
251 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
252 2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
253 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
254 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
255 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
256 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
257 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
258 }
259 3 {one OR two OR iii OR 4 OR v} {
260 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
261 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
262 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
263 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
264 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
265 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
266 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
267 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
268 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [ 1]}
269 }
270
271 4 {5 + 1} {
272 2 {[5 1] 3 4 i} {2 2 v two 4}
273 3 {[5 i] 5 2 four 4 1} {iii ii five two 1}
274 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3}
275 5 {three i [v i] four 4 1} {ii five five five iii}
276 }
277
278 5 {one + two + three} {
279 7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
280 8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
281 }
282
283 6 {"v v"} {
284 1 {four v 4 i three} {1 3 [five five] 4 one}
285 5 {three i v i four 4 1} {ii [five five five] iii}
286 }
287 } {
288 do_execsql_test 5.1.$tn {
289 SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
290 FROM t1 WHERE t1 MATCH $q
291 } $res
292 }
293
294 # Test that the xQueryPhrase() API works with synonyms.
295 #
296 proc mit {blob} {
297 set scan(littleEndian) i*
298 set scan(bigEndian) I*
299 binary scan $blob $scan($::tcl_platform(byteOrder)) r
300 return $r
301 }
302 db func mit mit
303 sqlite3_fts5_register_matchinfo db
304
305 foreach {tn q res} {
306 1 {one} {
307 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6}
308 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6}
309 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6}
310 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6}
311 9 {1 11 7 2 12 6}
312 }
313 } {
314 do_execsql_test 5.2.$tn {
315 SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
316 } $res
317 }
318
319
320 #-------------------------------------------------------------------------
321 # Test terms with more than 4 synonyms.
322 #
323 reset_db
324 sqlite3_fts5_create_tokenizer db tcl tcl_create
325 proc tcl_tokenize {tflags text} {
326 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
327 sqlite3_fts5_token $w $iStart $iEnd
328 if {$tflags=="query" && [string length $w]==1} {
329 for {set i 2} {$i<=10} {incr i} {
330 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
331 }
332 }
333 }
334 }
335
336 do_execsql_test 6.0.1 {
337 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
338 INSERT INTO t1 VALUES('yy xx qq');
339 INSERT INTO t1 VALUES('yy xx xx');
340 }
341 do_execsql_test 6.0.2 {
342 SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
343 } {{yy xx qq}}
344
345 do_test 6.0.3 {
346 execsql {
347 CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
348 }
349 foreach {rowid a b} {
350 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
351 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
352 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
353 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
354 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
355 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
356 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
357 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
358 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
359 } {
360 execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
361 }
362 } {}
363
364 foreach {tn q res} {
365 1 {a} {
366 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
367 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
368 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
369 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
370 }
371
372 2 {a AND q} {
373 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
374 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
375 }
376
377 3 {o OR (q AND a)} {
378 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
379 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
380 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
381 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
382 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
383 }
384
385 4 {NEAR(q y, 20)} {
386 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
387 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
388 }
389 } {
390 do_execsql_test 6.1.$tn.asc {
391 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
392 FROM t2 WHERE t2 MATCH $q
393 } $res
394
395 set res2 [list]
396 foreach {rowid a b} $res {
397 set res2 [concat [list $rowid $a $b] $res2]
398 }
399
400 do_execsql_test 6.1.$tn.desc {
401 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
402 FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
403 } $res2
404 }
405
406 do_execsql_test 6.2.1 {
407 INSERT INTO t2(rowid, a, b) VALUES(13,
408 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
409 );
410 SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
411 FROM t2 WHERE t2 MATCH 'x OR y'
412 } {
413 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
414 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
415 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
416 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
417 {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
418 }
419
420 #-------------------------------------------------------------------------
421 # Test that the xColumnSize() API is not confused by colocated tokens.
422 #
423 reset_db
424 sqlite3_fts5_create_tokenizer db tcl tcl_create
425 fts5_aux_test_functions db
426 proc tcl_tokenize {tflags text} {
427 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
428 sqlite3_fts5_token $w $iStart $iEnd
429 if {[string length $w]==1} {
430 for {set i 2} {$i<=10} {incr i} {
431 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
432 }
433 }
434 }
435 }
436
437 do_execsql_test 7.0.1 {
438 CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
439 INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
440 INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
441 SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
442 } {{3 4} {2 10}}
443
444 do_execsql_test 7.0.2 {
445 INSERT INTO t1(t1) VALUES('integrity-check');
446 }
447
448 do_execsql_test 7.1.1 {
449 CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
450 INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
451 INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
452 SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
453 } {{3 4} {2 10}}
454
455 do_execsql_test 7.1.2 {
456 INSERT INTO t2(t2) VALUES('integrity-check');
457 }
458
459 finish_test
460
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698