Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(346)

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/test/fts4langid.test

Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4. (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master
Patch Set: Hold back encoding change which is messing up patch. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # 2012 March 01
2 #
3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
5 #
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
9 #
10 #*************************************************************************
11 # This file implements regression tests for SQLite library. The
12 # focus of this script is testing the languageid=xxx FTS4 option.
13 #
14
15 set testdir [file dirname $argv0]
16 source $testdir/tester.tcl
17 set ::testprefix fts4content
18
19 # If SQLITE_ENABLE_FTS3 is defined, omit this file.
20 ifcapable !fts3 {
21 finish_test
22 return
23 }
24
25 set ::testprefix fts4langid
26
27 #---------------------------------------------------------------------------
28 # Test plan:
29 #
30 # 1.* - Warm-body tests created for specific purposes during development.
31 # Passing these doesn't really prove much.
32 #
33 # 2.1.* - Test that FTS queries only ever return rows associated with
34 # the requested language.
35 #
36 # 2.2.* - Same as 2.1.*, after an 'optimize' command.
37 #
38 # 2.3.* - Same as 2.1.*, after a 'rebuild' command.
39 #
40 # 3.* - Tests with content= tables. Both where there is a real
41 # underlying content table and where there is not.
42 #
43 # 4.* - Test that if one is provided, the tokenizer xLanguage method
44 # is called to configure the tokenizer before tokenizing query
45 # or document text.
46 #
47 # 5.* - Test the fts4aux table when the associated FTS4 table contains
48 # multiple languages.
49 #
50
51 do_execsql_test 1.1 {
52 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
53 }
54
55 do_execsql_test 1.2 {
56 SELECT sql FROM sqlite_master WHERE name = 't1_content';
57 } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}
58
59 do_execsql_test 1.3 {SELECT docid FROM t1} {}
60 do_execsql_test 1.4 {SELECT lang_id FROM t1} {}
61
62 do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
63 do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}
64
65 do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
66 do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}
67
68 do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
69 do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}
70
71 do_execsql_test 1.11 {
72 CREATE VIRTUAL TABLE t2 USING fts4;
73 INSERT INTO t2 VALUES('abc');
74 }
75 do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1
76
77 do_execsql_test 1.13 {
78 DROP TABLE t1;
79 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
80 INSERT INTO t1(content) VALUES('a b c');
81 INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
82 }
83
84 do_execsql_test 1.14 {
85 SELECT rowid FROM t1 WHERE t1 MATCH 'b';
86 } {1}
87 do_execsql_test 1.15 {
88 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
89 } {1}
90
91 do_execsql_test 1.16 {
92 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
93 } {2}
94
95 do_catchsql_test 1.17 {
96 INSERT INTO t1(content, lang_id) VALUES('123', -1);
97 } {1 {constraint failed}}
98
99 do_execsql_test 1.18 {
100 DROP TABLE t1;
101 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
102 INSERT INTO t1(content, lang_id) VALUES('A', 13);
103 INSERT INTO t1(content, lang_id) VALUES('B', 13);
104 INSERT INTO t1(content, lang_id) VALUES('C', 13);
105 INSERT INTO t1(content, lang_id) VALUES('D', 13);
106 INSERT INTO t1(content, lang_id) VALUES('E', 13);
107 INSERT INTO t1(content, lang_id) VALUES('F', 13);
108 INSERT INTO t1(content, lang_id) VALUES('G', 13);
109 INSERT INTO t1(content, lang_id) VALUES('H', 13);
110 INSERT INTO t1(content, lang_id) VALUES('I', 13);
111 INSERT INTO t1(content, lang_id) VALUES('J', 13);
112 INSERT INTO t1(content, lang_id) VALUES('K', 13);
113 INSERT INTO t1(content, lang_id) VALUES('L', 13);
114 INSERT INTO t1(content, lang_id) VALUES('M', 13);
115 INSERT INTO t1(content, lang_id) VALUES('N', 13);
116 INSERT INTO t1(content, lang_id) VALUES('O', 13);
117 INSERT INTO t1(content, lang_id) VALUES('P', 13);
118 INSERT INTO t1(content, lang_id) VALUES('Q', 13);
119 INSERT INTO t1(content, lang_id) VALUES('R', 13);
120 INSERT INTO t1(content, lang_id) VALUES('S', 13);
121 SELECT rowid FROM t1 WHERE t1 MATCH 'A';
122 } {}
123
124
125 #-------------------------------------------------------------------------
126 # Test cases 2.*
127 #
128 proc build_multilingual_db_1 {db} {
129 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }
130
131 set xwords [list zero one two three four five six seven eight nine ten]
132 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]
133
134 for {set i 0} {$i < 1000} {incr i} {
135 set iLangid [expr $i%9]
136 set x ""
137 set y ""
138
139 set x [list]
140 lappend x [lindex $xwords [expr ($i / 1000) % 10]]
141 lappend x [lindex $xwords [expr ($i / 100) % 10]]
142 lappend x [lindex $xwords [expr ($i / 10) % 10]]
143 lappend x [lindex $xwords [expr ($i / 1) % 10]]
144
145 set y [list]
146 lappend y [lindex $ywords [expr ($i / 1000) % 10]]
147 lappend y [lindex $ywords [expr ($i / 100) % 10]]
148 lappend y [lindex $ywords [expr ($i / 10) % 10]]
149 lappend y [lindex $ywords [expr ($i / 1) % 10]]
150
151 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
152 }
153
154 $db eval {
155 CREATE TABLE data(x, y, l);
156 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
157 }
158 }
159
160 proc rowid_list_set_langid {langid} {
161 set ::rowid_list_langid $langid
162 }
163 proc rowid_list {pattern} {
164 set langid $::rowid_list_langid
165 set res [list]
166 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
167 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
168 lappend res $rowid
169 }
170 }
171 return $res
172 }
173
174 proc or_merge_list {list1 list2} {
175 set res [list]
176
177 set i1 0
178 set i2 0
179
180 set n1 [llength $list1]
181 set n2 [llength $list2]
182
183 while {$i1 < $n1 && $i2 < $n2} {
184 set e1 [lindex $list1 $i1]
185 set e2 [lindex $list2 $i2]
186
187 if {$e1==$e2} {
188 lappend res $e1
189 incr i1
190 incr i2
191 } elseif {$e1 < $e2} {
192 lappend res $e1
193 incr i1
194 } else {
195 lappend res $e2
196 incr i2
197 }
198 }
199
200 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
201 }
202
203 proc or_merge_lists {args} {
204 set res [lindex $args 0]
205 for {set i 1} {$i < [llength $args]} {incr i} {
206 set res [or_merge_list $res [lindex $args $i]]
207 }
208 set res
209 }
210
211 proc and_merge_list {list1 list2} {
212 foreach i $list2 { set a($i) 1 }
213 set res [list]
214 foreach i $list1 {
215 if {[info exists a($i)]} {lappend res $i}
216 }
217 set res
218 }
219
220
221 proc and_merge_lists {args} {
222 set res [lindex $args 0]
223 for {set i 1} {$i < [llength $args]} {incr i} {
224 set res [and_merge_list $res [lindex $args $i]]
225 }
226 set res
227 }
228
229 proc filter_list {list langid} {
230 set res [list]
231 foreach i $list {
232 if {($i % 9) == $langid} {lappend res $i}
233 }
234 set res
235 }
236
237 do_test 2.0 {
238 reset_db
239 build_multilingual_db_1 db
240 } {}
241
242 proc do_test_query1 {tn query res_script} {
243 for {set langid 0} {$langid < 10} {incr langid} {
244 rowid_list_set_langid $langid
245 set res [eval $res_script]
246
247 set actual [
248 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
249 ]
250 do_test $tn.$langid [list set {} $actual] $res
251 }
252 }
253
254 # Run some queries.
255 do_test_query1 2.1.1 {delta} { rowid_list delta }
256 do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" }
257 do_test_query1 2.1.3 {zero one two} {
258 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
259 }
260 do_test_query1 2.1.4 {"zero one" OR "one two"} {
261 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
262 }
263
264 # Now try the same tests as above, but after running the 'optimize'
265 # command on the FTS table.
266 #
267 do_execsql_test 2.2 {
268 INSERT INTO t2(t2) VALUES('optimize');
269 SELECT count(*) FROM t2_segdir;
270 } {9}
271 do_test_query1 2.2.1 {delta} { rowid_list delta }
272 do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
273 do_test_query1 2.2.3 {zero one two} {
274 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
275 }
276 do_test_query1 2.2.4 {"zero one" OR "one two"} {
277 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
278 }
279
280 # And rebuild.
281 #
282 do_test 2.3 {
283 reset_db
284 build_multilingual_db_1 db
285 execsql { INSERT INTO t2(t2) VALUES('rebuild') }
286 } {}
287 do_test_query1 2.3.1 {delta} { rowid_list delta }
288 do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
289 do_test_query1 2.3.3 {zero one two} {
290 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
291 }
292 do_test_query1 2.3.4 {"zero one" OR "one two"} {
293 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
294 }
295
296 #-------------------------------------------------------------------------
297 # Test cases 3.*
298 #
299 do_test 3.0 {
300 reset_db
301 build_multilingual_db_1 db
302 execsql {
303 CREATE TABLE t3_data(l, x, y);
304 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
305 DROP TABLE t2;
306 }
307 } {}
308 do_execsql_test 3.1 {
309 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
310 INSERT INTO t2(t2) VALUES('rebuild');
311 }
312
313 do_test_query1 3.1.1 {delta} { rowid_list delta }
314 do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
315 do_test_query1 3.1.3 {zero one two} {
316 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
317 }
318 do_test_query1 3.1.4 {"zero one" OR "one two"} {
319 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
320 }
321
322 do_execsql_test 3.2.1 {
323 DROP TABLE t2;
324 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
325 }
326
327 do_execsql_test 3.2.2 {
328 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
329 }
330
331 do_execsql_test 3.2.3 {
332 DROP TABLE t3_data;
333 }
334
335 do_test_query1 3.3.1 {delta} { rowid_list delta }
336 do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
337 do_test_query1 3.3.3 {zero one two} {
338 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
339 }
340 do_test_query1 3.3.4 {"zero one" OR "one two"} {
341 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
342 }
343
344 #-------------------------------------------------------------------------
345 # Test cases 4.*
346 #
347 proc build_multilingual_db_2 {db} {
348 $db eval {
349 CREATE VIRTUAL TABLE t4 USING fts4(
350 tokenize=testtokenizer,
351 languageid=lid
352 );
353 }
354 for {set i 0} {$i < 50} {incr i} {
355 execsql {
356 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i)
357 }
358 }
359 }
360
361 do_test 4.1.0 {
362 reset_db
363 set ptr [fts3_test_tokenizer]
364 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
365 build_multilingual_db_2 db
366 } {}
367 do_execsql_test 4.1.1 {
368 SELECT docid FROM t4 WHERE t4 MATCH 'quick';
369 } {0}
370 do_execsql_test 4.1.2 {
371 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
372 } {}
373 do_execsql_test 4.1.3 {
374 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
375 } {1}
376 for {set i 0} {$i < 50} {incr i} {
377 do_execsql_test 4.1.4.$i {
378 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
379 } [expr 0==($i%2)]
380 }
381 do_catchsql_test 4.1.5 {
382 INSERT INTO t4(content, lid) VALUES('hello world', 101)
383 } {1 {SQL logic error or missing database}}
384
385 #-------------------------------------------------------------------------
386 # Test cases 5.*
387 #
388 # The following test cases are designed to detect a 32-bit overflow bug
389 # that existed at one point.
390 #
391 proc build_multilingual_db_3 {db} {
392 $db eval {
393 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid);
394 }
395 set languages [list 0 1 2 [expr 1<<30]]
396
397 foreach lid $languages {
398 execsql {
399 INSERT INTO t5(docid, content, lid) VALUES(
400 $lid, 'My language is ' || $lid, $lid
401 )
402 }
403 }
404 }
405
406 do_test 5.1.0 {
407 reset_db
408 build_multilingual_db_3 db
409 } {}
410
411 do_execsql_test 5.1.1 {
412 SELECT level FROM t5_segdir;
413 } [list 0 1024 2048 [expr 1<<40]]
414
415 do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0
416 foreach langid [list 0 1 2 [expr 1<<30]] {
417 do_execsql_test 5.2.$langid {
418 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid
419 } $langid
420 }
421
422 set lid [expr 1<<30]
423 do_execsql_test 5.3.1 {
424 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid);
425 INSERT INTO t6 VALUES('I belong to language 0!');
426 }
427 do_test 5.3.2 {
428 for {set i 0} {$i < 20} {incr i} {
429 execsql {
430 INSERT INTO t6(content, lid) VALUES(
431 'I (row '||$i||') belong to langauge N!', $lid
432 );
433 }
434 }
435 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' }
436 } {1}
437
438 do_test 5.3.3 {
439 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid}
440 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21}
441
442 do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {}
443 do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1}
444 do_execsql_test 5.3.6 {
445 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid
446 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21}
447
448
449 set lid [expr 1<<30]
450 foreach lid [list 4 [expr 1<<30]] {
451 do_execsql_test 5.4.$lid.1 {
452 DELETE FROM t6;
453 SELECT count(*) FROM t6_segdir;
454 SELECT count(*) FROM t6_segments;
455 } {0 0}
456 do_execsql_test 5.4.$lid.2 {
457 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid);
458 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid);
459 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid);
460 INSERT INTO t6(content, lid) VALUES('zero one one', $lid);
461 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid);
462 INSERT INTO t6(content, lid) VALUES('one zero one', $lid);
463 INSERT INTO t6(content, lid) VALUES('one one zero', $lid);
464 INSERT INTO t6(content, lid) VALUES('one one one', $lid);
465
466 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid;
467 } {1 2 5}
468
469 do_execsql_test 5.4.$lid.3 {
470 SELECT count(*) FROM t6_segdir;
471 SELECT count(*) FROM t6_segments;
472 } {8 0}
473
474 do_execsql_test 5.4.$lid.4 {
475 INSERT INTO t6(t6) VALUES('merge=100,3');
476 INSERT INTO t6(t6) VALUES('merge=100,3');
477 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid;
478 } {1 2 5}
479
480 do_execsql_test 5.4.$lid.5 {
481 SELECT count(*) FROM t6_segdir;
482 SELECT count(*) FROM t6_segments;
483 } {4 4}
484 }
485 finish_test
OLDNEW
« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/test/fts4incr.test ('k') | third_party/sqlite/sqlite-src-3080704/test/fts4merge.test » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698