OLD | NEW |
| (Empty) |
1 # 2012 March 01 | |
2 # | |
3 # The author disclaims copyright to this source code. In place of | |
4 # a legal notice, here is a blessing: | |
5 # | |
6 # May you do good and not evil. | |
7 # May you find forgiveness for yourself and forgive others. | |
8 # May you share freely, never taking more than you give. | |
9 # | |
10 #************************************************************************* | |
11 # This file implements regression tests for SQLite library. The | |
12 # focus of this script is testing the languageid=xxx FTS4 option. | |
13 # | |
14 | |
15 set testdir [file dirname $argv0] | |
16 source $testdir/tester.tcl | |
17 set ::testprefix fts4content | |
18 | |
19 # If SQLITE_ENABLE_FTS3 is defined, omit this file. | |
20 ifcapable !fts3 { | |
21 finish_test | |
22 return | |
23 } | |
24 | |
25 set ::testprefix fts4langid | |
26 | |
27 #--------------------------------------------------------------------------- | |
28 # Test plan: | |
29 # | |
30 # 1.* - Warm-body tests created for specific purposes during development. | |
31 # Passing these doesn't really prove much. | |
32 # | |
33 # 2.1.* - Test that FTS queries only ever return rows associated with | |
34 # the requested language. | |
35 # | |
36 # 2.2.* - Same as 2.1.*, after an 'optimize' command. | |
37 # | |
38 # 2.3.* - Same as 2.1.*, after a 'rebuild' command. | |
39 # | |
40 # 3.* - Tests with content= tables. Both where there is a real | |
41 # underlying content table and where there is not. | |
42 # | |
43 # 4.* - Test that if one is provided, the tokenizer xLanguage method | |
44 # is called to configure the tokenizer before tokenizing query | |
45 # or document text. | |
46 # | |
47 # 5.* - Test the fts4aux table when the associated FTS4 table contains | |
48 # multiple languages. | |
49 # | |
50 | |
51 do_execsql_test 1.1 { | |
52 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); | |
53 } | |
54 | |
55 do_execsql_test 1.2 { | |
56 SELECT sql FROM sqlite_master WHERE name = 't1_content'; | |
57 } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} | |
58 | |
59 do_execsql_test 1.3 {SELECT docid FROM t1} {} | |
60 do_execsql_test 1.4 {SELECT lang_id FROM t1} {} | |
61 | |
62 do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} | |
63 do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} | |
64 | |
65 do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} | |
66 do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} | |
67 | |
68 do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} | |
69 do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} | |
70 | |
71 do_execsql_test 1.11 { | |
72 CREATE VIRTUAL TABLE t2 USING fts4; | |
73 INSERT INTO t2 VALUES('abc'); | |
74 } | |
75 do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 | |
76 | |
77 do_execsql_test 1.13 { | |
78 DROP TABLE t1; | |
79 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); | |
80 INSERT INTO t1(content) VALUES('a b c'); | |
81 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); | |
82 } | |
83 | |
84 do_execsql_test 1.14 { | |
85 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; | |
86 } {1} | |
87 do_execsql_test 1.15 { | |
88 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; | |
89 } {1} | |
90 | |
91 do_execsql_test 1.16 { | |
92 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; | |
93 } {2} | |
94 | |
95 do_catchsql_test 1.17 { | |
96 INSERT INTO t1(content, lang_id) VALUES('123', -1); | |
97 } {1 {constraint failed}} | |
98 | |
99 do_execsql_test 1.18 { | |
100 DROP TABLE t1; | |
101 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); | |
102 INSERT INTO t1(content, lang_id) VALUES('A', 13); | |
103 INSERT INTO t1(content, lang_id) VALUES('B', 13); | |
104 INSERT INTO t1(content, lang_id) VALUES('C', 13); | |
105 INSERT INTO t1(content, lang_id) VALUES('D', 13); | |
106 INSERT INTO t1(content, lang_id) VALUES('E', 13); | |
107 INSERT INTO t1(content, lang_id) VALUES('F', 13); | |
108 INSERT INTO t1(content, lang_id) VALUES('G', 13); | |
109 INSERT INTO t1(content, lang_id) VALUES('H', 13); | |
110 INSERT INTO t1(content, lang_id) VALUES('I', 13); | |
111 INSERT INTO t1(content, lang_id) VALUES('J', 13); | |
112 INSERT INTO t1(content, lang_id) VALUES('K', 13); | |
113 INSERT INTO t1(content, lang_id) VALUES('L', 13); | |
114 INSERT INTO t1(content, lang_id) VALUES('M', 13); | |
115 INSERT INTO t1(content, lang_id) VALUES('N', 13); | |
116 INSERT INTO t1(content, lang_id) VALUES('O', 13); | |
117 INSERT INTO t1(content, lang_id) VALUES('P', 13); | |
118 INSERT INTO t1(content, lang_id) VALUES('Q', 13); | |
119 INSERT INTO t1(content, lang_id) VALUES('R', 13); | |
120 INSERT INTO t1(content, lang_id) VALUES('S', 13); | |
121 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; | |
122 } {} | |
123 | |
124 | |
125 #------------------------------------------------------------------------- | |
126 # Test cases 2.* | |
127 # | |
128 proc build_multilingual_db_1 {db} { | |
129 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } | |
130 | |
131 set xwords [list zero one two three four five six seven eight nine ten] | |
132 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] | |
133 | |
134 for {set i 0} {$i < 1000} {incr i} { | |
135 set iLangid [expr $i%9] | |
136 set x "" | |
137 set y "" | |
138 | |
139 set x [list] | |
140 lappend x [lindex $xwords [expr ($i / 1000) % 10]] | |
141 lappend x [lindex $xwords [expr ($i / 100) % 10]] | |
142 lappend x [lindex $xwords [expr ($i / 10) % 10]] | |
143 lappend x [lindex $xwords [expr ($i / 1) % 10]] | |
144 | |
145 set y [list] | |
146 lappend y [lindex $ywords [expr ($i / 1000) % 10]] | |
147 lappend y [lindex $ywords [expr ($i / 100) % 10]] | |
148 lappend y [lindex $ywords [expr ($i / 10) % 10]] | |
149 lappend y [lindex $ywords [expr ($i / 1) % 10]] | |
150 | |
151 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } | |
152 } | |
153 | |
154 $db eval { | |
155 CREATE TABLE data(x, y, l); | |
156 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; | |
157 } | |
158 } | |
159 | |
160 proc rowid_list_set_langid {langid} { | |
161 set ::rowid_list_langid $langid | |
162 } | |
163 proc rowid_list {pattern} { | |
164 set langid $::rowid_list_langid | |
165 set res [list] | |
166 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { | |
167 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { | |
168 lappend res $rowid | |
169 } | |
170 } | |
171 return $res | |
172 } | |
173 | |
174 proc or_merge_list {list1 list2} { | |
175 set res [list] | |
176 | |
177 set i1 0 | |
178 set i2 0 | |
179 | |
180 set n1 [llength $list1] | |
181 set n2 [llength $list2] | |
182 | |
183 while {$i1 < $n1 && $i2 < $n2} { | |
184 set e1 [lindex $list1 $i1] | |
185 set e2 [lindex $list2 $i2] | |
186 | |
187 if {$e1==$e2} { | |
188 lappend res $e1 | |
189 incr i1 | |
190 incr i2 | |
191 } elseif {$e1 < $e2} { | |
192 lappend res $e1 | |
193 incr i1 | |
194 } else { | |
195 lappend res $e2 | |
196 incr i2 | |
197 } | |
198 } | |
199 | |
200 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] | |
201 } | |
202 | |
203 proc or_merge_lists {args} { | |
204 set res [lindex $args 0] | |
205 for {set i 1} {$i < [llength $args]} {incr i} { | |
206 set res [or_merge_list $res [lindex $args $i]] | |
207 } | |
208 set res | |
209 } | |
210 | |
211 proc and_merge_list {list1 list2} { | |
212 foreach i $list2 { set a($i) 1 } | |
213 set res [list] | |
214 foreach i $list1 { | |
215 if {[info exists a($i)]} {lappend res $i} | |
216 } | |
217 set res | |
218 } | |
219 | |
220 | |
221 proc and_merge_lists {args} { | |
222 set res [lindex $args 0] | |
223 for {set i 1} {$i < [llength $args]} {incr i} { | |
224 set res [and_merge_list $res [lindex $args $i]] | |
225 } | |
226 set res | |
227 } | |
228 | |
229 proc filter_list {list langid} { | |
230 set res [list] | |
231 foreach i $list { | |
232 if {($i % 9) == $langid} {lappend res $i} | |
233 } | |
234 set res | |
235 } | |
236 | |
237 do_test 2.0 { | |
238 reset_db | |
239 build_multilingual_db_1 db | |
240 } {} | |
241 | |
242 proc do_test_query1 {tn query res_script} { | |
243 for {set langid 0} {$langid < 10} {incr langid} { | |
244 rowid_list_set_langid $langid | |
245 set res [eval $res_script] | |
246 | |
247 set actual [ | |
248 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} | |
249 ] | |
250 do_test $tn.$langid [list set {} $actual] $res | |
251 } | |
252 } | |
253 | |
254 # Run some queries. | |
255 do_test_query1 2.1.1 {delta} { rowid_list delta } | |
256 do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } | |
257 do_test_query1 2.1.3 {zero one two} { | |
258 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
259 } | |
260 do_test_query1 2.1.4 {"zero one" OR "one two"} { | |
261 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
262 } | |
263 | |
264 # Now try the same tests as above, but after running the 'optimize' | |
265 # command on the FTS table. | |
266 # | |
267 do_execsql_test 2.2 { | |
268 INSERT INTO t2(t2) VALUES('optimize'); | |
269 SELECT count(*) FROM t2_segdir; | |
270 } {9} | |
271 do_test_query1 2.2.1 {delta} { rowid_list delta } | |
272 do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } | |
273 do_test_query1 2.2.3 {zero one two} { | |
274 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
275 } | |
276 do_test_query1 2.2.4 {"zero one" OR "one two"} { | |
277 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
278 } | |
279 | |
280 # And rebuild. | |
281 # | |
282 do_test 2.3 { | |
283 reset_db | |
284 build_multilingual_db_1 db | |
285 execsql { INSERT INTO t2(t2) VALUES('rebuild') } | |
286 } {} | |
287 do_test_query1 2.3.1 {delta} { rowid_list delta } | |
288 do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } | |
289 do_test_query1 2.3.3 {zero one two} { | |
290 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
291 } | |
292 do_test_query1 2.3.4 {"zero one" OR "one two"} { | |
293 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
294 } | |
295 | |
296 #------------------------------------------------------------------------- | |
297 # Test cases 3.* | |
298 # | |
299 do_test 3.0 { | |
300 reset_db | |
301 build_multilingual_db_1 db | |
302 execsql { | |
303 CREATE TABLE t3_data(l, x, y); | |
304 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; | |
305 DROP TABLE t2; | |
306 } | |
307 } {} | |
308 do_execsql_test 3.1 { | |
309 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); | |
310 INSERT INTO t2(t2) VALUES('rebuild'); | |
311 } | |
312 | |
313 do_test_query1 3.1.1 {delta} { rowid_list delta } | |
314 do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } | |
315 do_test_query1 3.1.3 {zero one two} { | |
316 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
317 } | |
318 do_test_query1 3.1.4 {"zero one" OR "one two"} { | |
319 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
320 } | |
321 | |
322 do_execsql_test 3.2.1 { | |
323 DROP TABLE t2; | |
324 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); | |
325 } | |
326 | |
327 do_execsql_test 3.2.2 { | |
328 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; | |
329 } | |
330 | |
331 do_execsql_test 3.2.3 { | |
332 DROP TABLE t3_data; | |
333 } | |
334 | |
335 do_test_query1 3.3.1 {delta} { rowid_list delta } | |
336 do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } | |
337 do_test_query1 3.3.3 {zero one two} { | |
338 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
339 } | |
340 do_test_query1 3.3.4 {"zero one" OR "one two"} { | |
341 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
342 } | |
343 | |
344 #------------------------------------------------------------------------- | |
345 # Test cases 4.* | |
346 # | |
347 proc build_multilingual_db_2 {db} { | |
348 $db eval { | |
349 CREATE VIRTUAL TABLE t4 USING fts4( | |
350 tokenize=testtokenizer, | |
351 languageid=lid | |
352 ); | |
353 } | |
354 for {set i 0} {$i < 50} {incr i} { | |
355 execsql { | |
356 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) | |
357 } | |
358 } | |
359 } | |
360 | |
361 do_test 4.1.0 { | |
362 reset_db | |
363 set ptr [fts3_test_tokenizer] | |
364 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } | |
365 build_multilingual_db_2 db | |
366 } {} | |
367 do_execsql_test 4.1.1 { | |
368 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; | |
369 } {0} | |
370 do_execsql_test 4.1.2 { | |
371 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; | |
372 } {} | |
373 do_execsql_test 4.1.3 { | |
374 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; | |
375 } {1} | |
376 for {set i 0} {$i < 50} {incr i} { | |
377 do_execsql_test 4.1.4.$i { | |
378 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; | |
379 } [expr 0==($i%2)] | |
380 } | |
381 do_catchsql_test 4.1.5 { | |
382 INSERT INTO t4(content, lid) VALUES('hello world', 101) | |
383 } {1 {SQL logic error or missing database}} | |
384 | |
385 #------------------------------------------------------------------------- | |
386 # Test cases 5.* | |
387 # | |
388 # The following test cases are designed to detect a 32-bit overflow bug | |
389 # that existed at one point. | |
390 # | |
391 proc build_multilingual_db_3 {db} { | |
392 $db eval { | |
393 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); | |
394 } | |
395 set languages [list 0 1 2 [expr 1<<30]] | |
396 | |
397 foreach lid $languages { | |
398 execsql { | |
399 INSERT INTO t5(docid, content, lid) VALUES( | |
400 $lid, 'My language is ' || $lid, $lid | |
401 ) | |
402 } | |
403 } | |
404 } | |
405 | |
406 do_test 5.1.0 { | |
407 reset_db | |
408 build_multilingual_db_3 db | |
409 } {} | |
410 | |
411 do_execsql_test 5.1.1 { | |
412 SELECT level FROM t5_segdir; | |
413 } [list 0 1024 2048 [expr 1<<40]] | |
414 | |
415 do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 | |
416 foreach langid [list 0 1 2 [expr 1<<30]] { | |
417 do_execsql_test 5.2.$langid { | |
418 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid | |
419 } $langid | |
420 } | |
421 | |
422 set lid [expr 1<<30] | |
423 do_execsql_test 5.3.1 { | |
424 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); | |
425 INSERT INTO t6 VALUES('I belong to language 0!'); | |
426 } | |
427 do_test 5.3.2 { | |
428 for {set i 0} {$i < 20} {incr i} { | |
429 execsql { | |
430 INSERT INTO t6(content, lid) VALUES( | |
431 'I (row '||$i||') belong to langauge N!', $lid | |
432 ); | |
433 } | |
434 } | |
435 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } | |
436 } {1} | |
437 | |
438 do_test 5.3.3 { | |
439 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} | |
440 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} | |
441 | |
442 do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} | |
443 do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} | |
444 do_execsql_test 5.3.6 { | |
445 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid | |
446 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} | |
447 | |
448 | |
449 set lid [expr 1<<30] | |
450 foreach lid [list 4 [expr 1<<30]] { | |
451 do_execsql_test 5.4.$lid.1 { | |
452 DELETE FROM t6; | |
453 SELECT count(*) FROM t6_segdir; | |
454 SELECT count(*) FROM t6_segments; | |
455 } {0 0} | |
456 do_execsql_test 5.4.$lid.2 { | |
457 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); | |
458 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); | |
459 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); | |
460 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); | |
461 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); | |
462 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); | |
463 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); | |
464 INSERT INTO t6(content, lid) VALUES('one one one', $lid); | |
465 | |
466 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; | |
467 } {1 2 5} | |
468 | |
469 do_execsql_test 5.4.$lid.3 { | |
470 SELECT count(*) FROM t6_segdir; | |
471 SELECT count(*) FROM t6_segments; | |
472 } {8 0} | |
473 | |
474 do_execsql_test 5.4.$lid.4 { | |
475 INSERT INTO t6(t6) VALUES('merge=100,3'); | |
476 INSERT INTO t6(t6) VALUES('merge=100,3'); | |
477 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; | |
478 } {1 2 5} | |
479 | |
480 do_execsql_test 5.4.$lid.5 { | |
481 SELECT count(*) FROM t6_segdir; | |
482 SELECT count(*) FROM t6_segments; | |
483 } {4 4} | |
484 } | |
485 finish_test | |
OLD | NEW |