| OLD | NEW |
| (Empty) |
| 1 # 2012 March 01 | |
| 2 # | |
| 3 # The author disclaims copyright to this source code. In place of | |
| 4 # a legal notice, here is a blessing: | |
| 5 # | |
| 6 # May you do good and not evil. | |
| 7 # May you find forgiveness for yourself and forgive others. | |
| 8 # May you share freely, never taking more than you give. | |
| 9 # | |
| 10 #************************************************************************* | |
| 11 # This file implements regression tests for SQLite library. The | |
| 12 # focus of this script is testing the languageid=xxx FTS4 option. | |
| 13 # | |
| 14 | |
| 15 set testdir [file dirname $argv0] | |
| 16 source $testdir/tester.tcl | |
| 17 set ::testprefix fts4content | |
| 18 | |
| 19 # If SQLITE_ENABLE_FTS3 is defined, omit this file. | |
| 20 ifcapable !fts3 { | |
| 21 finish_test | |
| 22 return | |
| 23 } | |
| 24 | |
| 25 set ::testprefix fts4langid | |
| 26 | |
| 27 #--------------------------------------------------------------------------- | |
| 28 # Test plan: | |
| 29 # | |
| 30 # 1.* - Warm-body tests created for specific purposes during development. | |
| 31 # Passing these doesn't really prove much. | |
| 32 # | |
| 33 # 2.1.* - Test that FTS queries only ever return rows associated with | |
| 34 # the requested language. | |
| 35 # | |
| 36 # 2.2.* - Same as 2.1.*, after an 'optimize' command. | |
| 37 # | |
| 38 # 2.3.* - Same as 2.1.*, after a 'rebuild' command. | |
| 39 # | |
| 40 # 3.* - Tests with content= tables. Both where there is a real | |
| 41 # underlying content table and where there is not. | |
| 42 # | |
| 43 # 4.* - Test that if one is provided, the tokenizer xLanguage method | |
| 44 # is called to configure the tokenizer before tokenizing query | |
| 45 # or document text. | |
| 46 # | |
| 47 # 5.* - Test the fts4aux table when the associated FTS4 table contains | |
| 48 # multiple languages. | |
| 49 # | |
| 50 | |
| 51 do_execsql_test 1.1 { | |
| 52 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); | |
| 53 } | |
| 54 | |
| 55 do_execsql_test 1.2 { | |
| 56 SELECT sql FROM sqlite_master WHERE name = 't1_content'; | |
| 57 } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} | |
| 58 | |
| 59 do_execsql_test 1.3 {SELECT docid FROM t1} {} | |
| 60 do_execsql_test 1.4 {SELECT lang_id FROM t1} {} | |
| 61 | |
| 62 do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} | |
| 63 do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} | |
| 64 | |
| 65 do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} | |
| 66 do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} | |
| 67 | |
| 68 do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} | |
| 69 do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} | |
| 70 | |
| 71 do_execsql_test 1.11 { | |
| 72 CREATE VIRTUAL TABLE t2 USING fts4; | |
| 73 INSERT INTO t2 VALUES('abc'); | |
| 74 } | |
| 75 do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 | |
| 76 | |
| 77 do_execsql_test 1.13 { | |
| 78 DROP TABLE t1; | |
| 79 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); | |
| 80 INSERT INTO t1(content) VALUES('a b c'); | |
| 81 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); | |
| 82 } | |
| 83 | |
| 84 do_execsql_test 1.14 { | |
| 85 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; | |
| 86 } {1} | |
| 87 do_execsql_test 1.15 { | |
| 88 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; | |
| 89 } {1} | |
| 90 | |
| 91 do_execsql_test 1.16 { | |
| 92 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; | |
| 93 } {2} | |
| 94 | |
| 95 do_catchsql_test 1.17 { | |
| 96 INSERT INTO t1(content, lang_id) VALUES('123', -1); | |
| 97 } {1 {constraint failed}} | |
| 98 | |
| 99 do_execsql_test 1.18 { | |
| 100 DROP TABLE t1; | |
| 101 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); | |
| 102 INSERT INTO t1(content, lang_id) VALUES('A', 13); | |
| 103 INSERT INTO t1(content, lang_id) VALUES('B', 13); | |
| 104 INSERT INTO t1(content, lang_id) VALUES('C', 13); | |
| 105 INSERT INTO t1(content, lang_id) VALUES('D', 13); | |
| 106 INSERT INTO t1(content, lang_id) VALUES('E', 13); | |
| 107 INSERT INTO t1(content, lang_id) VALUES('F', 13); | |
| 108 INSERT INTO t1(content, lang_id) VALUES('G', 13); | |
| 109 INSERT INTO t1(content, lang_id) VALUES('H', 13); | |
| 110 INSERT INTO t1(content, lang_id) VALUES('I', 13); | |
| 111 INSERT INTO t1(content, lang_id) VALUES('J', 13); | |
| 112 INSERT INTO t1(content, lang_id) VALUES('K', 13); | |
| 113 INSERT INTO t1(content, lang_id) VALUES('L', 13); | |
| 114 INSERT INTO t1(content, lang_id) VALUES('M', 13); | |
| 115 INSERT INTO t1(content, lang_id) VALUES('N', 13); | |
| 116 INSERT INTO t1(content, lang_id) VALUES('O', 13); | |
| 117 INSERT INTO t1(content, lang_id) VALUES('P', 13); | |
| 118 INSERT INTO t1(content, lang_id) VALUES('Q', 13); | |
| 119 INSERT INTO t1(content, lang_id) VALUES('R', 13); | |
| 120 INSERT INTO t1(content, lang_id) VALUES('S', 13); | |
| 121 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; | |
| 122 } {} | |
| 123 | |
| 124 | |
| 125 #------------------------------------------------------------------------- | |
| 126 # Test cases 2.* | |
| 127 # | |
| 128 proc build_multilingual_db_1 {db} { | |
| 129 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } | |
| 130 | |
| 131 set xwords [list zero one two three four five six seven eight nine ten] | |
| 132 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] | |
| 133 | |
| 134 for {set i 0} {$i < 1000} {incr i} { | |
| 135 set iLangid [expr $i%9] | |
| 136 set x "" | |
| 137 set y "" | |
| 138 | |
| 139 set x [list] | |
| 140 lappend x [lindex $xwords [expr ($i / 1000) % 10]] | |
| 141 lappend x [lindex $xwords [expr ($i / 100) % 10]] | |
| 142 lappend x [lindex $xwords [expr ($i / 10) % 10]] | |
| 143 lappend x [lindex $xwords [expr ($i / 1) % 10]] | |
| 144 | |
| 145 set y [list] | |
| 146 lappend y [lindex $ywords [expr ($i / 1000) % 10]] | |
| 147 lappend y [lindex $ywords [expr ($i / 100) % 10]] | |
| 148 lappend y [lindex $ywords [expr ($i / 10) % 10]] | |
| 149 lappend y [lindex $ywords [expr ($i / 1) % 10]] | |
| 150 | |
| 151 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } | |
| 152 } | |
| 153 | |
| 154 $db eval { | |
| 155 CREATE TABLE data(x, y, l); | |
| 156 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; | |
| 157 } | |
| 158 } | |
| 159 | |
| 160 proc rowid_list_set_langid {langid} { | |
| 161 set ::rowid_list_langid $langid | |
| 162 } | |
| 163 proc rowid_list {pattern} { | |
| 164 set langid $::rowid_list_langid | |
| 165 set res [list] | |
| 166 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { | |
| 167 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { | |
| 168 lappend res $rowid | |
| 169 } | |
| 170 } | |
| 171 return $res | |
| 172 } | |
| 173 | |
| 174 proc or_merge_list {list1 list2} { | |
| 175 set res [list] | |
| 176 | |
| 177 set i1 0 | |
| 178 set i2 0 | |
| 179 | |
| 180 set n1 [llength $list1] | |
| 181 set n2 [llength $list2] | |
| 182 | |
| 183 while {$i1 < $n1 && $i2 < $n2} { | |
| 184 set e1 [lindex $list1 $i1] | |
| 185 set e2 [lindex $list2 $i2] | |
| 186 | |
| 187 if {$e1==$e2} { | |
| 188 lappend res $e1 | |
| 189 incr i1 | |
| 190 incr i2 | |
| 191 } elseif {$e1 < $e2} { | |
| 192 lappend res $e1 | |
| 193 incr i1 | |
| 194 } else { | |
| 195 lappend res $e2 | |
| 196 incr i2 | |
| 197 } | |
| 198 } | |
| 199 | |
| 200 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] | |
| 201 } | |
| 202 | |
| 203 proc or_merge_lists {args} { | |
| 204 set res [lindex $args 0] | |
| 205 for {set i 1} {$i < [llength $args]} {incr i} { | |
| 206 set res [or_merge_list $res [lindex $args $i]] | |
| 207 } | |
| 208 set res | |
| 209 } | |
| 210 | |
| 211 proc and_merge_list {list1 list2} { | |
| 212 foreach i $list2 { set a($i) 1 } | |
| 213 set res [list] | |
| 214 foreach i $list1 { | |
| 215 if {[info exists a($i)]} {lappend res $i} | |
| 216 } | |
| 217 set res | |
| 218 } | |
| 219 | |
| 220 | |
| 221 proc and_merge_lists {args} { | |
| 222 set res [lindex $args 0] | |
| 223 for {set i 1} {$i < [llength $args]} {incr i} { | |
| 224 set res [and_merge_list $res [lindex $args $i]] | |
| 225 } | |
| 226 set res | |
| 227 } | |
| 228 | |
| 229 proc filter_list {list langid} { | |
| 230 set res [list] | |
| 231 foreach i $list { | |
| 232 if {($i % 9) == $langid} {lappend res $i} | |
| 233 } | |
| 234 set res | |
| 235 } | |
| 236 | |
| 237 do_test 2.0 { | |
| 238 reset_db | |
| 239 build_multilingual_db_1 db | |
| 240 } {} | |
| 241 | |
| 242 proc do_test_query1 {tn query res_script} { | |
| 243 for {set langid 0} {$langid < 10} {incr langid} { | |
| 244 rowid_list_set_langid $langid | |
| 245 set res [eval $res_script] | |
| 246 | |
| 247 set actual [ | |
| 248 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} | |
| 249 ] | |
| 250 do_test $tn.$langid [list set {} $actual] $res | |
| 251 } | |
| 252 } | |
| 253 | |
| 254 # Run some queries. | |
| 255 do_test_query1 2.1.1 {delta} { rowid_list delta } | |
| 256 do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } | |
| 257 do_test_query1 2.1.3 {zero one two} { | |
| 258 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
| 259 } | |
| 260 do_test_query1 2.1.4 {"zero one" OR "one two"} { | |
| 261 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
| 262 } | |
| 263 | |
| 264 # Now try the same tests as above, but after running the 'optimize' | |
| 265 # command on the FTS table. | |
| 266 # | |
| 267 do_execsql_test 2.2 { | |
| 268 INSERT INTO t2(t2) VALUES('optimize'); | |
| 269 SELECT count(*) FROM t2_segdir; | |
| 270 } {9} | |
| 271 do_test_query1 2.2.1 {delta} { rowid_list delta } | |
| 272 do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } | |
| 273 do_test_query1 2.2.3 {zero one two} { | |
| 274 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
| 275 } | |
| 276 do_test_query1 2.2.4 {"zero one" OR "one two"} { | |
| 277 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
| 278 } | |
| 279 | |
| 280 # And rebuild. | |
| 281 # | |
| 282 do_test 2.3 { | |
| 283 reset_db | |
| 284 build_multilingual_db_1 db | |
| 285 execsql { INSERT INTO t2(t2) VALUES('rebuild') } | |
| 286 } {} | |
| 287 do_test_query1 2.3.1 {delta} { rowid_list delta } | |
| 288 do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } | |
| 289 do_test_query1 2.3.3 {zero one two} { | |
| 290 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
| 291 } | |
| 292 do_test_query1 2.3.4 {"zero one" OR "one two"} { | |
| 293 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
| 294 } | |
| 295 | |
| 296 #------------------------------------------------------------------------- | |
| 297 # Test cases 3.* | |
| 298 # | |
| 299 do_test 3.0 { | |
| 300 reset_db | |
| 301 build_multilingual_db_1 db | |
| 302 execsql { | |
| 303 CREATE TABLE t3_data(l, x, y); | |
| 304 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; | |
| 305 DROP TABLE t2; | |
| 306 } | |
| 307 } {} | |
| 308 do_execsql_test 3.1 { | |
| 309 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); | |
| 310 INSERT INTO t2(t2) VALUES('rebuild'); | |
| 311 } | |
| 312 | |
| 313 do_test_query1 3.1.1 {delta} { rowid_list delta } | |
| 314 do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } | |
| 315 do_test_query1 3.1.3 {zero one two} { | |
| 316 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
| 317 } | |
| 318 do_test_query1 3.1.4 {"zero one" OR "one two"} { | |
| 319 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
| 320 } | |
| 321 | |
| 322 do_execsql_test 3.2.1 { | |
| 323 DROP TABLE t2; | |
| 324 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); | |
| 325 } | |
| 326 | |
| 327 do_execsql_test 3.2.2 { | |
| 328 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; | |
| 329 } | |
| 330 | |
| 331 do_execsql_test 3.2.3 { | |
| 332 DROP TABLE t3_data; | |
| 333 } | |
| 334 | |
| 335 do_test_query1 3.3.1 {delta} { rowid_list delta } | |
| 336 do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } | |
| 337 do_test_query1 3.3.3 {zero one two} { | |
| 338 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] | |
| 339 } | |
| 340 do_test_query1 3.3.4 {"zero one" OR "one two"} { | |
| 341 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] | |
| 342 } | |
| 343 | |
| 344 #------------------------------------------------------------------------- | |
| 345 # Test cases 4.* | |
| 346 # | |
| 347 proc build_multilingual_db_2 {db} { | |
| 348 $db eval { | |
| 349 CREATE VIRTUAL TABLE t4 USING fts4( | |
| 350 tokenize=testtokenizer, | |
| 351 languageid=lid | |
| 352 ); | |
| 353 } | |
| 354 for {set i 0} {$i < 50} {incr i} { | |
| 355 execsql { | |
| 356 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) | |
| 357 } | |
| 358 } | |
| 359 } | |
| 360 | |
| 361 do_test 4.1.0 { | |
| 362 reset_db | |
| 363 set ptr [fts3_test_tokenizer] | |
| 364 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } | |
| 365 build_multilingual_db_2 db | |
| 366 } {} | |
| 367 do_execsql_test 4.1.1 { | |
| 368 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; | |
| 369 } {0} | |
| 370 do_execsql_test 4.1.2 { | |
| 371 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; | |
| 372 } {} | |
| 373 do_execsql_test 4.1.3 { | |
| 374 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; | |
| 375 } {1} | |
| 376 for {set i 0} {$i < 50} {incr i} { | |
| 377 do_execsql_test 4.1.4.$i { | |
| 378 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; | |
| 379 } [expr 0==($i%2)] | |
| 380 } | |
| 381 do_catchsql_test 4.1.5 { | |
| 382 INSERT INTO t4(content, lid) VALUES('hello world', 101) | |
| 383 } {1 {SQL logic error or missing database}} | |
| 384 | |
| 385 #------------------------------------------------------------------------- | |
| 386 # Test cases 5.* | |
| 387 # | |
| 388 # The following test cases are designed to detect a 32-bit overflow bug | |
| 389 # that existed at one point. | |
| 390 # | |
| 391 proc build_multilingual_db_3 {db} { | |
| 392 $db eval { | |
| 393 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); | |
| 394 } | |
| 395 set languages [list 0 1 2 [expr 1<<30]] | |
| 396 | |
| 397 foreach lid $languages { | |
| 398 execsql { | |
| 399 INSERT INTO t5(docid, content, lid) VALUES( | |
| 400 $lid, 'My language is ' || $lid, $lid | |
| 401 ) | |
| 402 } | |
| 403 } | |
| 404 } | |
| 405 | |
| 406 do_test 5.1.0 { | |
| 407 reset_db | |
| 408 build_multilingual_db_3 db | |
| 409 } {} | |
| 410 | |
| 411 do_execsql_test 5.1.1 { | |
| 412 SELECT level FROM t5_segdir; | |
| 413 } [list 0 1024 2048 [expr 1<<40]] | |
| 414 | |
| 415 do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 | |
| 416 foreach langid [list 0 1 2 [expr 1<<30]] { | |
| 417 do_execsql_test 5.2.$langid { | |
| 418 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid | |
| 419 } $langid | |
| 420 } | |
| 421 | |
| 422 set lid [expr 1<<30] | |
| 423 do_execsql_test 5.3.1 { | |
| 424 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); | |
| 425 INSERT INTO t6 VALUES('I belong to language 0!'); | |
| 426 } | |
| 427 do_test 5.3.2 { | |
| 428 for {set i 0} {$i < 20} {incr i} { | |
| 429 execsql { | |
| 430 INSERT INTO t6(content, lid) VALUES( | |
| 431 'I (row '||$i||') belong to langauge N!', $lid | |
| 432 ); | |
| 433 } | |
| 434 } | |
| 435 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } | |
| 436 } {1} | |
| 437 | |
| 438 do_test 5.3.3 { | |
| 439 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} | |
| 440 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} | |
| 441 | |
| 442 do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} | |
| 443 do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} | |
| 444 do_execsql_test 5.3.6 { | |
| 445 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid | |
| 446 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} | |
| 447 | |
| 448 | |
| 449 set lid [expr 1<<30] | |
| 450 foreach lid [list 4 [expr 1<<30]] { | |
| 451 do_execsql_test 5.4.$lid.1 { | |
| 452 DELETE FROM t6; | |
| 453 SELECT count(*) FROM t6_segdir; | |
| 454 SELECT count(*) FROM t6_segments; | |
| 455 } {0 0} | |
| 456 do_execsql_test 5.4.$lid.2 { | |
| 457 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); | |
| 458 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); | |
| 459 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); | |
| 460 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); | |
| 461 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); | |
| 462 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); | |
| 463 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); | |
| 464 INSERT INTO t6(content, lid) VALUES('one one one', $lid); | |
| 465 | |
| 466 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; | |
| 467 } {1 2 5} | |
| 468 | |
| 469 do_execsql_test 5.4.$lid.3 { | |
| 470 SELECT count(*) FROM t6_segdir; | |
| 471 SELECT count(*) FROM t6_segments; | |
| 472 } {8 0} | |
| 473 | |
| 474 do_execsql_test 5.4.$lid.4 { | |
| 475 INSERT INTO t6(t6) VALUES('merge=100,3'); | |
| 476 INSERT INTO t6(t6) VALUES('merge=100,3'); | |
| 477 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; | |
| 478 } {1 2 5} | |
| 479 | |
| 480 do_execsql_test 5.4.$lid.5 { | |
| 481 SELECT count(*) FROM t6_segdir; | |
| 482 SELECT count(*) FROM t6_segments; | |
| 483 } {4 4} | |
| 484 } | |
| 485 finish_test | |
| OLD | NEW |