OLD | NEW |
(Empty) | |
| 1 # 2012 March 01 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #************************************************************************* |
| 11 # This file implements regression tests for SQLite library. The |
| 12 # focus of this script is testing the languageid=xxx FTS4 option. |
| 13 # |
| 14 |
| 15 set testdir [file dirname $argv0] |
| 16 source $testdir/tester.tcl |
| 17 set ::testprefix fts4content |
| 18 |
| 19 # If SQLITE_ENABLE_FTS3 is defined, omit this file. |
| 20 ifcapable !fts3 { |
| 21 finish_test |
| 22 return |
| 23 } |
| 24 |
| 25 set ::testprefix fts4langid |
| 26 |
| 27 #--------------------------------------------------------------------------- |
| 28 # Test plan: |
| 29 # |
| 30 # 1.* - Warm-body tests created for specific purposes during development. |
| 31 # Passing these doesn't really prove much. |
| 32 # |
| 33 # 2.1.* - Test that FTS queries only ever return rows associated with |
| 34 # the requested language. |
| 35 # |
| 36 # 2.2.* - Same as 2.1.*, after an 'optimize' command. |
| 37 # |
| 38 # 2.3.* - Same as 2.1.*, after a 'rebuild' command. |
| 39 # |
| 40 # 3.* - Tests with content= tables. Both where there is a real |
| 41 # underlying content table and where there is not. |
| 42 # |
| 43 # 4.* - Test that if one is provided, the tokenizer xLanguage method |
| 44 # is called to configure the tokenizer before tokenizing query |
| 45 # or document text. |
| 46 # |
| 47 # 5.* - Test the fts4aux table when the associated FTS4 table contains |
| 48 # multiple languages. |
| 49 # |
| 50 |
| 51 do_execsql_test 1.1 { |
| 52 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); |
| 53 } |
| 54 |
| 55 do_execsql_test 1.2 { |
| 56 SELECT sql FROM sqlite_master WHERE name = 't1_content'; |
| 57 } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} |
| 58 |
| 59 do_execsql_test 1.3 {SELECT docid FROM t1} {} |
| 60 do_execsql_test 1.4 {SELECT lang_id FROM t1} {} |
| 61 |
| 62 do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} |
| 63 do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} |
| 64 |
| 65 do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} |
| 66 do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} |
| 67 |
| 68 do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} |
| 69 do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} |
| 70 |
| 71 do_execsql_test 1.11 { |
| 72 CREATE VIRTUAL TABLE t2 USING fts4; |
| 73 INSERT INTO t2 VALUES('abc'); |
| 74 } |
| 75 do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 |
| 76 |
| 77 do_execsql_test 1.13 { |
| 78 DROP TABLE t1; |
| 79 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); |
| 80 INSERT INTO t1(content) VALUES('a b c'); |
| 81 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); |
| 82 } |
| 83 |
| 84 do_execsql_test 1.14 { |
| 85 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; |
| 86 } {1} |
| 87 do_execsql_test 1.15 { |
| 88 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; |
| 89 } {1} |
| 90 |
| 91 do_execsql_test 1.16 { |
| 92 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; |
| 93 } {2} |
| 94 |
| 95 do_catchsql_test 1.17 { |
| 96 INSERT INTO t1(content, lang_id) VALUES('123', -1); |
| 97 } {1 {constraint failed}} |
| 98 |
| 99 do_execsql_test 1.18 { |
| 100 DROP TABLE t1; |
| 101 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); |
| 102 INSERT INTO t1(content, lang_id) VALUES('A', 13); |
| 103 INSERT INTO t1(content, lang_id) VALUES('B', 13); |
| 104 INSERT INTO t1(content, lang_id) VALUES('C', 13); |
| 105 INSERT INTO t1(content, lang_id) VALUES('D', 13); |
| 106 INSERT INTO t1(content, lang_id) VALUES('E', 13); |
| 107 INSERT INTO t1(content, lang_id) VALUES('F', 13); |
| 108 INSERT INTO t1(content, lang_id) VALUES('G', 13); |
| 109 INSERT INTO t1(content, lang_id) VALUES('H', 13); |
| 110 INSERT INTO t1(content, lang_id) VALUES('I', 13); |
| 111 INSERT INTO t1(content, lang_id) VALUES('J', 13); |
| 112 INSERT INTO t1(content, lang_id) VALUES('K', 13); |
| 113 INSERT INTO t1(content, lang_id) VALUES('L', 13); |
| 114 INSERT INTO t1(content, lang_id) VALUES('M', 13); |
| 115 INSERT INTO t1(content, lang_id) VALUES('N', 13); |
| 116 INSERT INTO t1(content, lang_id) VALUES('O', 13); |
| 117 INSERT INTO t1(content, lang_id) VALUES('P', 13); |
| 118 INSERT INTO t1(content, lang_id) VALUES('Q', 13); |
| 119 INSERT INTO t1(content, lang_id) VALUES('R', 13); |
| 120 INSERT INTO t1(content, lang_id) VALUES('S', 13); |
| 121 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; |
| 122 } {} |
| 123 |
| 124 |
| 125 #------------------------------------------------------------------------- |
| 126 # Test cases 2.* |
| 127 # |
| 128 proc build_multilingual_db_1 {db} { |
| 129 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } |
| 130 |
| 131 set xwords [list zero one two three four five six seven eight nine ten] |
| 132 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] |
| 133 |
| 134 for {set i 0} {$i < 1000} {incr i} { |
| 135 set iLangid [expr $i%9] |
| 136 set x "" |
| 137 set y "" |
| 138 |
| 139 set x [list] |
| 140 lappend x [lindex $xwords [expr ($i / 1000) % 10]] |
| 141 lappend x [lindex $xwords [expr ($i / 100) % 10]] |
| 142 lappend x [lindex $xwords [expr ($i / 10) % 10]] |
| 143 lappend x [lindex $xwords [expr ($i / 1) % 10]] |
| 144 |
| 145 set y [list] |
| 146 lappend y [lindex $ywords [expr ($i / 1000) % 10]] |
| 147 lappend y [lindex $ywords [expr ($i / 100) % 10]] |
| 148 lappend y [lindex $ywords [expr ($i / 10) % 10]] |
| 149 lappend y [lindex $ywords [expr ($i / 1) % 10]] |
| 150 |
| 151 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } |
| 152 } |
| 153 |
| 154 $db eval { |
| 155 CREATE TABLE data(x, y, l); |
| 156 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; |
| 157 } |
| 158 } |
| 159 |
| 160 proc rowid_list_set_langid {langid} { |
| 161 set ::rowid_list_langid $langid |
| 162 } |
| 163 proc rowid_list {pattern} { |
| 164 set langid $::rowid_list_langid |
| 165 set res [list] |
| 166 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { |
| 167 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { |
| 168 lappend res $rowid |
| 169 } |
| 170 } |
| 171 return $res |
| 172 } |
| 173 |
| 174 proc or_merge_list {list1 list2} { |
| 175 set res [list] |
| 176 |
| 177 set i1 0 |
| 178 set i2 0 |
| 179 |
| 180 set n1 [llength $list1] |
| 181 set n2 [llength $list2] |
| 182 |
| 183 while {$i1 < $n1 && $i2 < $n2} { |
| 184 set e1 [lindex $list1 $i1] |
| 185 set e2 [lindex $list2 $i2] |
| 186 |
| 187 if {$e1==$e2} { |
| 188 lappend res $e1 |
| 189 incr i1 |
| 190 incr i2 |
| 191 } elseif {$e1 < $e2} { |
| 192 lappend res $e1 |
| 193 incr i1 |
| 194 } else { |
| 195 lappend res $e2 |
| 196 incr i2 |
| 197 } |
| 198 } |
| 199 |
| 200 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] |
| 201 } |
| 202 |
| 203 proc or_merge_lists {args} { |
| 204 set res [lindex $args 0] |
| 205 for {set i 1} {$i < [llength $args]} {incr i} { |
| 206 set res [or_merge_list $res [lindex $args $i]] |
| 207 } |
| 208 set res |
| 209 } |
| 210 |
| 211 proc and_merge_list {list1 list2} { |
| 212 foreach i $list2 { set a($i) 1 } |
| 213 set res [list] |
| 214 foreach i $list1 { |
| 215 if {[info exists a($i)]} {lappend res $i} |
| 216 } |
| 217 set res |
| 218 } |
| 219 |
| 220 |
| 221 proc and_merge_lists {args} { |
| 222 set res [lindex $args 0] |
| 223 for {set i 1} {$i < [llength $args]} {incr i} { |
| 224 set res [and_merge_list $res [lindex $args $i]] |
| 225 } |
| 226 set res |
| 227 } |
| 228 |
| 229 proc filter_list {list langid} { |
| 230 set res [list] |
| 231 foreach i $list { |
| 232 if {($i % 9) == $langid} {lappend res $i} |
| 233 } |
| 234 set res |
| 235 } |
| 236 |
| 237 do_test 2.0 { |
| 238 reset_db |
| 239 build_multilingual_db_1 db |
| 240 } {} |
| 241 |
| 242 proc do_test_query1 {tn query res_script} { |
| 243 for {set langid 0} {$langid < 10} {incr langid} { |
| 244 rowid_list_set_langid $langid |
| 245 set res [eval $res_script] |
| 246 |
| 247 set actual [ |
| 248 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} |
| 249 ] |
| 250 do_test $tn.$langid [list set {} $actual] $res |
| 251 } |
| 252 } |
| 253 |
| 254 # Run some queries. |
| 255 do_test_query1 2.1.1 {delta} { rowid_list delta } |
| 256 do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } |
| 257 do_test_query1 2.1.3 {zero one two} { |
| 258 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] |
| 259 } |
| 260 do_test_query1 2.1.4 {"zero one" OR "one two"} { |
| 261 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] |
| 262 } |
| 263 |
| 264 # Now try the same tests as above, but after running the 'optimize' |
| 265 # command on the FTS table. |
| 266 # |
| 267 do_execsql_test 2.2 { |
| 268 INSERT INTO t2(t2) VALUES('optimize'); |
| 269 SELECT count(*) FROM t2_segdir; |
| 270 } {9} |
| 271 do_test_query1 2.2.1 {delta} { rowid_list delta } |
| 272 do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } |
| 273 do_test_query1 2.2.3 {zero one two} { |
| 274 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] |
| 275 } |
| 276 do_test_query1 2.2.4 {"zero one" OR "one two"} { |
| 277 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] |
| 278 } |
| 279 |
| 280 # And rebuild. |
| 281 # |
| 282 do_test 2.3 { |
| 283 reset_db |
| 284 build_multilingual_db_1 db |
| 285 execsql { INSERT INTO t2(t2) VALUES('rebuild') } |
| 286 } {} |
| 287 do_test_query1 2.3.1 {delta} { rowid_list delta } |
| 288 do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } |
| 289 do_test_query1 2.3.3 {zero one two} { |
| 290 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] |
| 291 } |
| 292 do_test_query1 2.3.4 {"zero one" OR "one two"} { |
| 293 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] |
| 294 } |
| 295 |
| 296 #------------------------------------------------------------------------- |
| 297 # Test cases 3.* |
| 298 # |
| 299 do_test 3.0 { |
| 300 reset_db |
| 301 build_multilingual_db_1 db |
| 302 execsql { |
| 303 CREATE TABLE t3_data(l, x, y); |
| 304 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; |
| 305 DROP TABLE t2; |
| 306 } |
| 307 } {} |
| 308 do_execsql_test 3.1 { |
| 309 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); |
| 310 INSERT INTO t2(t2) VALUES('rebuild'); |
| 311 } |
| 312 |
| 313 do_test_query1 3.1.1 {delta} { rowid_list delta } |
| 314 do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } |
| 315 do_test_query1 3.1.3 {zero one two} { |
| 316 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] |
| 317 } |
| 318 do_test_query1 3.1.4 {"zero one" OR "one two"} { |
| 319 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] |
| 320 } |
| 321 |
| 322 do_execsql_test 3.2.1 { |
| 323 DROP TABLE t2; |
| 324 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); |
| 325 } |
| 326 |
| 327 do_execsql_test 3.2.2 { |
| 328 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; |
| 329 } |
| 330 |
| 331 do_execsql_test 3.2.3 { |
| 332 DROP TABLE t3_data; |
| 333 } |
| 334 |
| 335 do_test_query1 3.3.1 {delta} { rowid_list delta } |
| 336 do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } |
| 337 do_test_query1 3.3.3 {zero one two} { |
| 338 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] |
| 339 } |
| 340 do_test_query1 3.3.4 {"zero one" OR "one two"} { |
| 341 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] |
| 342 } |
| 343 |
| 344 #------------------------------------------------------------------------- |
| 345 # Test cases 4.* |
| 346 # |
| 347 proc build_multilingual_db_2 {db} { |
| 348 $db eval { |
| 349 CREATE VIRTUAL TABLE t4 USING fts4( |
| 350 tokenize=testtokenizer, |
| 351 languageid=lid |
| 352 ); |
| 353 } |
| 354 for {set i 0} {$i < 50} {incr i} { |
| 355 execsql { |
| 356 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) |
| 357 } |
| 358 } |
| 359 } |
| 360 |
| 361 do_test 4.1.0 { |
| 362 reset_db |
| 363 set ptr [fts3_test_tokenizer] |
| 364 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } |
| 365 build_multilingual_db_2 db |
| 366 } {} |
| 367 do_execsql_test 4.1.1 { |
| 368 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; |
| 369 } {0} |
| 370 do_execsql_test 4.1.2 { |
| 371 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; |
| 372 } {} |
| 373 do_execsql_test 4.1.3 { |
| 374 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; |
| 375 } {1} |
| 376 for {set i 0} {$i < 50} {incr i} { |
| 377 do_execsql_test 4.1.4.$i { |
| 378 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; |
| 379 } [expr 0==($i%2)] |
| 380 } |
| 381 do_catchsql_test 4.1.5 { |
| 382 INSERT INTO t4(content, lid) VALUES('hello world', 101) |
| 383 } {1 {SQL logic error or missing database}} |
| 384 |
| 385 #------------------------------------------------------------------------- |
| 386 # Test cases 5.* |
| 387 # |
| 388 # The following test cases are designed to detect a 32-bit overflow bug |
| 389 # that existed at one point. |
| 390 # |
| 391 proc build_multilingual_db_3 {db} { |
| 392 $db eval { |
| 393 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); |
| 394 } |
| 395 set languages [list 0 1 2 [expr 1<<30]] |
| 396 |
| 397 foreach lid $languages { |
| 398 execsql { |
| 399 INSERT INTO t5(docid, content, lid) VALUES( |
| 400 $lid, 'My language is ' || $lid, $lid |
| 401 ) |
| 402 } |
| 403 } |
| 404 } |
| 405 |
| 406 do_test 5.1.0 { |
| 407 reset_db |
| 408 build_multilingual_db_3 db |
| 409 } {} |
| 410 |
| 411 do_execsql_test 5.1.1 { |
| 412 SELECT level FROM t5_segdir; |
| 413 } [list 0 1024 2048 [expr 1<<40]] |
| 414 |
| 415 do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 |
| 416 foreach langid [list 0 1 2 [expr 1<<30]] { |
| 417 do_execsql_test 5.2.$langid { |
| 418 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid |
| 419 } $langid |
| 420 } |
| 421 |
| 422 set lid [expr 1<<30] |
| 423 do_execsql_test 5.3.1 { |
| 424 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); |
| 425 INSERT INTO t6 VALUES('I belong to language 0!'); |
| 426 } |
| 427 do_test 5.3.2 { |
| 428 for {set i 0} {$i < 20} {incr i} { |
| 429 execsql { |
| 430 INSERT INTO t6(content, lid) VALUES( |
| 431 'I (row '||$i||') belong to langauge N!', $lid |
| 432 ); |
| 433 } |
| 434 } |
| 435 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } |
| 436 } {1} |
| 437 |
| 438 do_test 5.3.3 { |
| 439 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} |
| 440 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} |
| 441 |
| 442 do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} |
| 443 do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} |
| 444 do_execsql_test 5.3.6 { |
| 445 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid |
| 446 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} |
| 447 |
| 448 |
| 449 set lid [expr 1<<30] |
| 450 foreach lid [list 4 [expr 1<<30]] { |
| 451 do_execsql_test 5.4.$lid.1 { |
| 452 DELETE FROM t6; |
| 453 SELECT count(*) FROM t6_segdir; |
| 454 SELECT count(*) FROM t6_segments; |
| 455 } {0 0} |
| 456 do_execsql_test 5.4.$lid.2 { |
| 457 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); |
| 458 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); |
| 459 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); |
| 460 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); |
| 461 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); |
| 462 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); |
| 463 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); |
| 464 INSERT INTO t6(content, lid) VALUES('one one one', $lid); |
| 465 |
| 466 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; |
| 467 } {1 2 5} |
| 468 |
| 469 do_execsql_test 5.4.$lid.3 { |
| 470 SELECT count(*) FROM t6_segdir; |
| 471 SELECT count(*) FROM t6_segments; |
| 472 } {8 0} |
| 473 |
| 474 do_execsql_test 5.4.$lid.4 { |
| 475 INSERT INTO t6(t6) VALUES('merge=100,3'); |
| 476 INSERT INTO t6(t6) VALUES('merge=100,3'); |
| 477 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; |
| 478 } {1 2 5} |
| 479 |
| 480 do_execsql_test 5.4.$lid.5 { |
| 481 SELECT count(*) FROM t6_segdir; |
| 482 SELECT count(*) FROM t6_segments; |
| 483 } {4 4} |
| 484 } |
| 485 finish_test |
OLD | NEW |