OLD | NEW |
| (Empty) |
1 # 2014 Dec 20 | |
2 # | |
3 # The author disclaims copyright to this source code. In place of | |
4 # a legal notice, here is a blessing: | |
5 # | |
6 # May you do good and not evil. | |
7 # May you find forgiveness for yourself and forgive others. | |
8 # May you share freely, never taking more than you give. | |
9 # | |
10 #*********************************************************************** | |
11 # | |
12 # Tests focusing on custom tokenizers that support synonyms. | |
13 # | |
14 | |
15 source [file join [file dirname [info script]] fts5_common.tcl] | |
16 set testprefix fts5synonym | |
17 | |
18 # If SQLITE_ENABLE_FTS5 is defined, omit this file. | |
19 ifcapable !fts5 { | |
20 finish_test | |
21 return | |
22 } | |
23 | |
24 foreach S { | |
25 {zero 0} | |
26 {one 1 i} | |
27 {two 2 ii} | |
28 {three 3 iii} | |
29 {four 4 iv} | |
30 {five 5 v} | |
31 {six 6 vi} | |
32 {seven 7 vii} | |
33 {eight 8 viii} | |
34 {nine 9 ix} | |
35 } { | |
36 foreach s $S { | |
37 set o [list] | |
38 foreach x $S {if {$x!=$s} {lappend o $x}} | |
39 set ::syn($s) $o | |
40 } | |
41 } | |
42 | |
43 proc tcl_tokenize {tflags text} { | |
44 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
45 sqlite3_fts5_token $w $iStart $iEnd | |
46 } | |
47 } | |
48 | |
49 proc tcl_create {args} { | |
50 return "tcl_tokenize" | |
51 } | |
52 | |
53 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
54 | |
55 #------------------------------------------------------------------------- | |
56 # Warm body test for the code in fts5_tcl.c. | |
57 # | |
58 do_execsql_test 1.0 { | |
59 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); | |
60 INSERT INTO ft VALUES('abc def ghi'); | |
61 INSERT INTO ft VALUES('jkl mno pqr'); | |
62 SELECT rowid, x FROM ft WHERE ft MATCH 'def'; | |
63 SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; | |
64 } {1 {abc def ghi} {jkl mno pqr} 2} | |
65 | |
66 #------------------------------------------------------------------------- | |
67 # Test a tokenizer that supports synonyms by adding extra entries to the | |
68 # FTS index. | |
69 # | |
70 | |
71 proc tcl_tokenize {tflags text} { | |
72 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
73 sqlite3_fts5_token $w $iStart $iEnd | |
74 if {$tflags=="document" && [info exists ::syn($w)]} { | |
75 foreach s $::syn($w) { | |
76 sqlite3_fts5_token -colo $s $iStart $iEnd | |
77 } | |
78 } | |
79 } | |
80 } | |
81 reset_db | |
82 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
83 | |
84 do_execsql_test 2.0 { | |
85 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); | |
86 INSERT INTO ft VALUES('one two three'); | |
87 INSERT INTO ft VALUES('four five six'); | |
88 INSERT INTO ft VALUES('eight nine ten'); | |
89 } {} | |
90 | |
91 foreach {tn expr res} { | |
92 1 "3" 1 | |
93 2 "eight OR 8 OR 5" {2 3} | |
94 3 "10" {} | |
95 4 "1*" {1} | |
96 5 "1 + 2" {1} | |
97 } { | |
98 do_execsql_test 2.1.$tn { | |
99 SELECT rowid FROM ft WHERE ft MATCH $expr | |
100 } $res | |
101 } | |
102 | |
103 #------------------------------------------------------------------------- | |
104 # Test some broken tokenizers: | |
105 # | |
106 # 3.1.*: A tokenizer that declares the very first token to be colocated. | |
107 # | |
108 # 3.2.*: A tokenizer that reports two identical tokens at the same position. | |
109 # This is allowed. | |
110 # | |
111 reset_db | |
112 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
113 proc tcl_tokenize {tflags text} { | |
114 set bColo 1 | |
115 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
116 if {$bColo} { | |
117 sqlite3_fts5_token -colo $w $iStart $iEnd | |
118 set bColo 0 | |
119 } { | |
120 sqlite3_fts5_token $w $iStart $iEnd | |
121 } | |
122 } | |
123 } | |
124 do_execsql_test 3.1.0 { | |
125 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); | |
126 INSERT INTO ft VALUES('one two three'); | |
127 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); | |
128 SELECT * FROM vv; | |
129 } { | |
130 one 1 1 three 1 1 two 1 1 | |
131 } | |
132 | |
133 do_execsql_test 3.1.1 { | |
134 INSERT INTO ft(ft) VALUES('integrity-check'); | |
135 } {} | |
136 | |
137 proc tcl_tokenize {tflags text} { | |
138 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
139 sqlite3_fts5_token $w $iStart $iEnd | |
140 } | |
141 } | |
142 | |
143 do_execsql_test 3.1.2 { | |
144 SELECT rowid FROM ft WHERE ft MATCH 'one two three' | |
145 } {1} | |
146 | |
147 reset_db | |
148 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
149 proc tcl_tokenize {tflags text} { | |
150 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
151 sqlite3_fts5_token $w $iStart $iEnd | |
152 sqlite3_fts5_token -colo $w $iStart $iEnd | |
153 } | |
154 } | |
155 do_execsql_test 3.2.0 { | |
156 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); | |
157 INSERT INTO ft VALUES('one one two three'); | |
158 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); | |
159 SELECT * FROM vv; | |
160 } { | |
161 one 1 4 three 1 2 two 1 2 | |
162 } | |
163 do_execsql_test 3.2.1 { | |
164 SELECT rowid FROM ft WHERE ft MATCH 'one'; | |
165 } {1} | |
166 do_execsql_test 3.2.2 { | |
167 SELECT rowid FROM ft WHERE ft MATCH 'one two three'; | |
168 } {1} | |
169 do_execsql_test 3.2.3 { | |
170 SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; | |
171 } {1} | |
172 do_execsql_test 3.2.4 { | |
173 SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; | |
174 } {1} | |
175 do_execsql_test 3.2.5 { | |
176 SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; | |
177 } {} | |
178 | |
179 #------------------------------------------------------------------------- | |
180 # Check that expressions with synonyms can be parsed and executed. | |
181 # | |
182 reset_db | |
183 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
184 proc tcl_tokenize {tflags text} { | |
185 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
186 sqlite3_fts5_token $w $iStart $iEnd | |
187 if {$tflags=="query" && [info exists ::syn($w)]} { | |
188 foreach s $::syn($w) { | |
189 sqlite3_fts5_token -colo $s $iStart $iEnd | |
190 } | |
191 } | |
192 } | |
193 } | |
194 | |
195 foreach {tn expr res} { | |
196 1 {abc} {"abc"} | |
197 2 {one} {"one"|"i"|"1"} | |
198 3 {3} {"3"|"iii"|"three"} | |
199 4 {3*} {"3"|"iii"|"three" *} | |
200 } { | |
201 do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] | |
202 } | |
203 | |
204 do_execsql_test 4.2.1 { | |
205 CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl); | |
206 INSERT INTO xx VALUES('one two'); | |
207 INSERT INTO xx VALUES('three four'); | |
208 } | |
209 | |
210 do_execsql_test 4.2.2 { | |
211 SELECT rowid FROM xx WHERE xx MATCH '2' | |
212 } {1} | |
213 | |
214 do_execsql_test 4.2.3 { | |
215 SELECT rowid FROM xx WHERE xx MATCH '3' | |
216 } {2} | |
217 | |
218 do_test 5.0 { | |
219 execsql { | |
220 CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl) | |
221 } | |
222 foreach {rowid a b} { | |
223 1 {four v 4 i three} {1 3 five five 4 one} | |
224 2 {5 1 3 4 i} {2 2 v two 4} | |
225 3 {5 i 5 2 four 4 1} {iii ii five two 1} | |
226 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} | |
227 5 {three i v i four 4 1} {ii five five five iii} | |
228 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} | |
229 7 {ii ii two three 2 5} {iii i ii iii iii one one} | |
230 8 {2 ii i two 3 three 2} {two iv v iii 3 five} | |
231 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} | |
232 } { | |
233 execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } | |
234 } | |
235 } {} | |
236 | |
237 | |
238 foreach {tn q res} { | |
239 1 {one} { | |
240 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} | |
241 2 {5 [1] 3 4 [i]} {2 2 v two 4} | |
242 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} | |
243 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} | |
244 5 {three [i] v [i] four 4 [1]} {ii five five five iii} | |
245 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} | |
246 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} | |
247 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} | |
248 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} | |
249 } | |
250 2 {five four} { | |
251 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} | |
252 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} | |
253 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} | |
254 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} | |
255 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} | |
256 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} | |
257 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} | |
258 } | |
259 3 {one OR two OR iii OR 4 OR v} { | |
260 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} | |
261 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} | |
262 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} | |
263 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} | |
264 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} | |
265 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} | |
266 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} | |
267 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} | |
268 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [
1]} | |
269 } | |
270 | |
271 4 {5 + 1} { | |
272 2 {[5 1] 3 4 i} {2 2 v two 4} | |
273 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} | |
274 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} | |
275 5 {three i [v i] four 4 1} {ii five five five iii} | |
276 } | |
277 | |
278 5 {one + two + three} { | |
279 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} | |
280 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} | |
281 } | |
282 | |
283 6 {"v v"} { | |
284 1 {four v 4 i three} {1 3 [five five] 4 one} | |
285 5 {three i v i four 4 1} {ii [five five five] iii} | |
286 } | |
287 } { | |
288 do_execsql_test 5.1.$tn { | |
289 SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') | |
290 FROM t1 WHERE t1 MATCH $q | |
291 } $res | |
292 } | |
293 | |
294 # Test that the xQueryPhrase() API works with synonyms. | |
295 # | |
296 proc mit {blob} { | |
297 set scan(littleEndian) i* | |
298 set scan(bigEndian) I* | |
299 binary scan $blob $scan($::tcl_platform(byteOrder)) r | |
300 return $r | |
301 } | |
302 db func mit mit | |
303 sqlite3_fts5_register_matchinfo db | |
304 | |
305 foreach {tn q res} { | |
306 1 {one} { | |
307 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} | |
308 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} | |
309 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} | |
310 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} | |
311 9 {1 11 7 2 12 6} | |
312 } | |
313 } { | |
314 do_execsql_test 5.2.$tn { | |
315 SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q | |
316 } $res | |
317 } | |
318 | |
319 | |
320 #------------------------------------------------------------------------- | |
321 # Test terms with more than 4 synonyms. | |
322 # | |
323 reset_db | |
324 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
325 proc tcl_tokenize {tflags text} { | |
326 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
327 sqlite3_fts5_token $w $iStart $iEnd | |
328 if {$tflags=="query" && [string length $w]==1} { | |
329 for {set i 2} {$i<=10} {incr i} { | |
330 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd | |
331 } | |
332 } | |
333 } | |
334 } | |
335 | |
336 do_execsql_test 6.0.1 { | |
337 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl); | |
338 INSERT INTO t1 VALUES('yy xx qq'); | |
339 INSERT INTO t1 VALUES('yy xx xx'); | |
340 } | |
341 do_execsql_test 6.0.2 { | |
342 SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; | |
343 } {{yy xx qq}} | |
344 | |
345 do_test 6.0.3 { | |
346 execsql { | |
347 CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl) | |
348 } | |
349 foreach {rowid a b} { | |
350 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} | |
351 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} | |
352 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} | |
353 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} | |
354 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} | |
355 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} | |
356 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} | |
357 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} | |
358 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} | |
359 } { | |
360 execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } | |
361 } | |
362 } {} | |
363 | |
364 foreach {tn q res} { | |
365 1 {a} { | |
366 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} | |
367 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} | |
368 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} | |
369 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} | |
370 } | |
371 | |
372 2 {a AND q} { | |
373 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} | |
374 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} | |
375 } | |
376 | |
377 3 {o OR (q AND a)} { | |
378 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} | |
379 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} | |
380 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} | |
381 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} | |
382 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} | |
383 } | |
384 | |
385 4 {NEAR(q y, 20)} { | |
386 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} | |
387 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} | |
388 } | |
389 } { | |
390 do_execsql_test 6.1.$tn.asc { | |
391 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') | |
392 FROM t2 WHERE t2 MATCH $q | |
393 } $res | |
394 | |
395 set res2 [list] | |
396 foreach {rowid a b} $res { | |
397 set res2 [concat [list $rowid $a $b] $res2] | |
398 } | |
399 | |
400 do_execsql_test 6.1.$tn.desc { | |
401 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') | |
402 FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC | |
403 } $res2 | |
404 } | |
405 | |
406 do_execsql_test 6.2.1 { | |
407 INSERT INTO t2(rowid, a, b) VALUES(13, | |
408 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' | |
409 ); | |
410 SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') | |
411 FROM t2 WHERE t2 MATCH 'x OR y' | |
412 } { | |
413 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa} | |
414 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} | |
415 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} | |
416 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>} | |
417 {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} | |
418 } | |
419 | |
420 #------------------------------------------------------------------------- | |
421 # Test that the xColumnSize() API is not confused by colocated tokens. | |
422 # | |
423 reset_db | |
424 sqlite3_fts5_create_tokenizer db tcl tcl_create | |
425 fts5_aux_test_functions db | |
426 proc tcl_tokenize {tflags text} { | |
427 foreach {w iStart iEnd} [fts5_tokenize_split $text] { | |
428 sqlite3_fts5_token $w $iStart $iEnd | |
429 if {[string length $w]==1} { | |
430 for {set i 2} {$i<=10} {incr i} { | |
431 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd | |
432 } | |
433 } | |
434 } | |
435 } | |
436 | |
437 do_execsql_test 7.0.1 { | |
438 CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl); | |
439 INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); | |
440 INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); | |
441 SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; | |
442 } {{3 4} {2 10}} | |
443 | |
444 do_execsql_test 7.0.2 { | |
445 INSERT INTO t1(t1) VALUES('integrity-check'); | |
446 } | |
447 | |
448 do_execsql_test 7.1.1 { | |
449 CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl); | |
450 INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); | |
451 INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); | |
452 SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; | |
453 } {{3 4} {2 10}} | |
454 | |
455 do_execsql_test 7.1.2 { | |
456 INSERT INTO t2(t2) VALUES('integrity-check'); | |
457 } | |
458 | |
459 finish_test | |
460 | |
OLD | NEW |