OLD | NEW |
(Empty) | |
| 1 # 2002 May 24 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # This file implements regression tests for SQLite library. The focus of |
| 12 # this file is testing the SQLite routines used for converting between the |
| 13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and |
| 14 # UTF-16be). |
| 15 # |
| 16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $ |
| 17 |
| 18 set testdir [file dirname $argv0] |
| 19 source $testdir/tester.tcl |
| 20 |
| 21 # If UTF16 support is disabled, ignore the tests in this file |
| 22 # |
| 23 ifcapable {!utf16} { |
| 24 finish_test |
| 25 return |
| 26 } |
| 27 |
| 28 # The rough organisation of tests in this file is: |
| 29 # |
| 30 # enc2.1.*: Simple tests with a UTF-8 db. |
| 31 # enc2.2.*: Simple tests with a UTF-16LE db. |
| 32 # enc2.3.*: Simple tests with a UTF-16BE db. |
| 33 # enc2.4.*: Test that attached databases must have the same text encoding |
| 34 # as the main database. |
| 35 # enc2.5.*: Test the behavior of the library when a collation sequence is |
| 36 # not available for the most desirable text encoding. |
| 37 # enc2.6.*: Similar test for user functions. |
| 38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the |
| 39 # wrong text encoding for the database. |
| 40 # enc2.8.*: Test sqlite3_complete16() |
| 41 # |
| 42 |
| 43 db close |
| 44 |
| 45 # Return the UTF-8 representation of the supplied UTF-16 string $str. |
| 46 proc utf8 {str} { |
| 47 # If $str ends in two 0x00 0x00 bytes, knock these off before |
| 48 # converting to UTF-8 using TCL. |
| 49 binary scan $str \c* vals |
| 50 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { |
| 51 set str [binary format \c* [lrange $vals 0 end-2]] |
| 52 } |
| 53 |
| 54 set r [encoding convertfrom unicode $str] |
| 55 return $r |
| 56 } |
| 57 |
| 58 # |
| 59 # This proc contains all the tests in this file. It is run |
| 60 # three times. Each time the file 'test.db' contains a database |
| 61 # with the following contents: |
| 62 set dbcontents { |
| 63 CREATE TABLE t1(a PRIMARY KEY, b, c); |
| 64 INSERT INTO t1 VALUES('one', 'I', 1); |
| 65 } |
| 66 # This proc tests that we can open and manipulate the test.db |
| 67 # database, and that it is possible to retreive values in |
| 68 # various text encodings. |
| 69 # |
| 70 proc run_test_script {t enc} { |
| 71 |
| 72 # Open the database and pull out a (the) row. |
| 73 do_test $t.1 { |
| 74 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 75 execsql {SELECT * FROM t1} |
| 76 } {one I 1} |
| 77 |
| 78 # Insert some data |
| 79 do_test $t.2 { |
| 80 execsql {INSERT INTO t1 VALUES('two', 'II', 2);} |
| 81 execsql {SELECT * FROM t1} |
| 82 } {one I 1 two II 2} |
| 83 |
| 84 # Insert some data |
| 85 do_test $t.3 { |
| 86 execsql { |
| 87 INSERT INTO t1 VALUES('three','III',3); |
| 88 INSERT INTO t1 VALUES('four','IV',4); |
| 89 INSERT INTO t1 VALUES('five','V',5); |
| 90 } |
| 91 execsql {SELECT * FROM t1} |
| 92 } {one I 1 two II 2 three III 3 four IV 4 five V 5} |
| 93 |
| 94 # Use the index |
| 95 do_test $t.4 { |
| 96 execsql { |
| 97 SELECT * FROM t1 WHERE a = 'one'; |
| 98 } |
| 99 } {one I 1} |
| 100 do_test $t.5 { |
| 101 execsql { |
| 102 SELECT * FROM t1 WHERE a = 'four'; |
| 103 } |
| 104 } {four IV 4} |
| 105 ifcapable subquery { |
| 106 do_test $t.6 { |
| 107 execsql { |
| 108 SELECT * FROM t1 WHERE a IN ('one', 'two'); |
| 109 } |
| 110 } {one I 1 two II 2} |
| 111 } |
| 112 |
| 113 # Now check that we can retrieve data in both UTF-16 and UTF-8 |
| 114 do_test $t.7 { |
| 115 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] |
| 116 sqlite3_step $STMT |
| 117 sqlite3_column_text $STMT 0 |
| 118 } {four} |
| 119 |
| 120 do_test $t.8 { |
| 121 sqlite3_step $STMT |
| 122 utf8 [sqlite3_column_text16 $STMT 0] |
| 123 } {five} |
| 124 |
| 125 do_test $t.9 { |
| 126 sqlite3_finalize $STMT |
| 127 } SQLITE_OK |
| 128 |
| 129 ifcapable vacuum { |
| 130 execsql VACUUM |
| 131 } |
| 132 |
| 133 do_test $t.10 { |
| 134 db eval {PRAGMA encoding} |
| 135 } $enc |
| 136 |
| 137 } |
| 138 |
| 139 # The three unicode encodings understood by SQLite. |
| 140 set encodings [list UTF-8 UTF-16le UTF-16be] |
| 141 |
| 142 set sqlite_os_trace 0 |
| 143 set i 1 |
| 144 foreach enc $encodings { |
| 145 forcedelete test.db |
| 146 sqlite3 db test.db |
| 147 db eval "PRAGMA encoding = \"$enc\"" |
| 148 execsql $dbcontents |
| 149 do_test enc2-$i.0.1 { |
| 150 db eval {PRAGMA encoding} |
| 151 } $enc |
| 152 do_test enc2-$i.0.2 { |
| 153 db eval {PRAGMA encoding=UTF8} |
| 154 db eval {PRAGMA encoding} |
| 155 } $enc |
| 156 do_test enc2-$i.0.3 { |
| 157 db eval {PRAGMA encoding=UTF16le} |
| 158 db eval {PRAGMA encoding} |
| 159 } $enc |
| 160 do_test enc2-$i.0.4 { |
| 161 db eval {PRAGMA encoding=UTF16be} |
| 162 db eval {PRAGMA encoding} |
| 163 } $enc |
| 164 |
| 165 db close |
| 166 run_test_script enc2-$i $enc |
| 167 db close |
| 168 incr i |
| 169 } |
| 170 |
| 171 # Test that it is an error to try to attach a database with a different |
| 172 # encoding to the main database. |
| 173 ifcapable attach { |
| 174 do_test enc2-4.1 { |
| 175 forcedelete test.db |
| 176 sqlite3 db test.db |
| 177 db eval "PRAGMA encoding = 'UTF-8'" |
| 178 db eval "CREATE TABLE abc(a, b, c);" |
| 179 } {} |
| 180 do_test enc2-4.2 { |
| 181 forcedelete test2.db |
| 182 sqlite3 db2 test2.db |
| 183 db2 eval "PRAGMA encoding = 'UTF-16'" |
| 184 db2 eval "CREATE TABLE abc(a, b, c);" |
| 185 } {} |
| 186 do_test enc2-4.3 { |
| 187 catchsql { |
| 188 ATTACH 'test2.db' as aux; |
| 189 } |
| 190 } {1 {attached databases must use the same text encoding as main database}} |
| 191 db2 close |
| 192 db close |
| 193 } |
| 194 |
| 195 # The following tests - enc2-5.* - test that SQLite selects the correct |
| 196 # collation sequence when more than one is available. |
| 197 |
| 198 set ::values [list one two three four five] |
| 199 set ::test_collate_enc INVALID |
| 200 proc test_collate {enc lhs rhs} { |
| 201 set ::test_collate_enc $enc |
| 202 set l [lsearch -exact $::values $lhs] |
| 203 set r [lsearch -exact $::values $rhs] |
| 204 set res [expr $l - $r] |
| 205 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" |
| 206 return $res |
| 207 } |
| 208 |
| 209 forcedelete test.db |
| 210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 211 do_test enc2-5.0 { |
| 212 execsql { |
| 213 CREATE TABLE t5(a); |
| 214 INSERT INTO t5 VALUES('one'); |
| 215 INSERT INTO t5 VALUES('two'); |
| 216 INSERT INTO t5 VALUES('five'); |
| 217 INSERT INTO t5 VALUES('three'); |
| 218 INSERT INTO t5 VALUES('four'); |
| 219 } |
| 220 } {} |
| 221 do_test enc2-5.1 { |
| 222 add_test_collate $DB 1 1 1 |
| 223 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] |
| 224 lappend res $::test_collate_enc |
| 225 } {one two three four five UTF-8} |
| 226 do_test enc2-5.2 { |
| 227 add_test_collate $DB 0 1 0 |
| 228 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 229 lappend res $::test_collate_enc |
| 230 } {one two three four five UTF-16LE} |
| 231 do_test enc2-5.3 { |
| 232 add_test_collate $DB 0 0 1 |
| 233 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 234 lappend res $::test_collate_enc |
| 235 } {one two three four five UTF-16BE} |
| 236 |
| 237 db close |
| 238 forcedelete test.db |
| 239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 240 execsql {pragma encoding = 'UTF-16LE'} |
| 241 do_test enc2-5.4 { |
| 242 execsql { |
| 243 CREATE TABLE t5(a); |
| 244 INSERT INTO t5 VALUES('one'); |
| 245 INSERT INTO t5 VALUES('two'); |
| 246 INSERT INTO t5 VALUES('five'); |
| 247 INSERT INTO t5 VALUES('three'); |
| 248 INSERT INTO t5 VALUES('four'); |
| 249 } |
| 250 } {} |
| 251 do_test enc2-5.5 { |
| 252 add_test_collate $DB 1 1 1 |
| 253 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 254 lappend res $::test_collate_enc |
| 255 } {one two three four five UTF-16LE} |
| 256 do_test enc2-5.6 { |
| 257 add_test_collate $DB 1 0 1 |
| 258 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 259 lappend res $::test_collate_enc |
| 260 } {one two three four five UTF-16BE} |
| 261 do_test enc2-5.7 { |
| 262 add_test_collate $DB 1 0 0 |
| 263 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 264 lappend res $::test_collate_enc |
| 265 } {one two three four five UTF-8} |
| 266 |
| 267 db close |
| 268 forcedelete test.db |
| 269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 270 execsql {pragma encoding = 'UTF-16BE'} |
| 271 do_test enc2-5.8 { |
| 272 execsql { |
| 273 CREATE TABLE t5(a); |
| 274 INSERT INTO t5 VALUES('one'); |
| 275 INSERT INTO t5 VALUES('two'); |
| 276 INSERT INTO t5 VALUES('five'); |
| 277 INSERT INTO t5 VALUES('three'); |
| 278 INSERT INTO t5 VALUES('four'); |
| 279 } |
| 280 } {} |
| 281 do_test enc2-5.9 { |
| 282 add_test_collate $DB 1 1 1 |
| 283 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 284 lappend res $::test_collate_enc |
| 285 } {one two three four five UTF-16BE} |
| 286 do_test enc2-5.10 { |
| 287 add_test_collate $DB 1 1 0 |
| 288 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 289 lappend res $::test_collate_enc |
| 290 } {one two three four five UTF-16LE} |
| 291 do_test enc2-5.11 { |
| 292 add_test_collate $DB 1 0 0 |
| 293 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
| 294 lappend res $::test_collate_enc |
| 295 } {one two three four five UTF-8} |
| 296 |
| 297 # Also test that a UTF-16 collation factory works. |
| 298 do_test enc2-5-12 { |
| 299 add_test_collate $DB 0 0 0 |
| 300 catchsql { |
| 301 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate |
| 302 } |
| 303 } {1 {no such collation sequence: test_collate}} |
| 304 do_test enc2-5.13 { |
| 305 add_test_collate_needed $DB |
| 306 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] |
| 307 lappend res $::test_collate_enc |
| 308 } {one two three four five UTF-16BE} |
| 309 do_test enc2-5.14 { |
| 310 set ::sqlite_last_needed_collation |
| 311 } test_collate |
| 312 |
| 313 db close |
| 314 forcedelete test.db |
| 315 |
| 316 do_test enc2-5.15 { |
| 317 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] |
| 318 add_test_collate_needed $::DB |
| 319 set ::sqlite_last_needed_collation |
| 320 } {} |
| 321 do_test enc2-5.16 { |
| 322 execsql {CREATE TABLE t1(a varchar collate test_collate);} |
| 323 } {} |
| 324 do_test enc2-5.17 { |
| 325 set ::sqlite_last_needed_collation |
| 326 } {test_collate} |
| 327 |
| 328 # The following tests - enc2-6.* - test that SQLite selects the correct |
| 329 # user function when more than one is available. |
| 330 |
| 331 proc test_function {enc arg} { |
| 332 return "$enc $arg" |
| 333 } |
| 334 |
| 335 db close |
| 336 forcedelete test.db |
| 337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 338 execsql {pragma encoding = 'UTF-8'} |
| 339 do_test enc2-6.0 { |
| 340 execsql { |
| 341 CREATE TABLE t5(a); |
| 342 INSERT INTO t5 VALUES('one'); |
| 343 } |
| 344 } {} |
| 345 do_test enc2-6.1 { |
| 346 add_test_function $DB 1 1 1 |
| 347 execsql { |
| 348 SELECT test_function('sqlite') |
| 349 } |
| 350 } {{UTF-8 sqlite}} |
| 351 db close |
| 352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 353 do_test enc2-6.2 { |
| 354 add_test_function $DB 0 1 0 |
| 355 execsql { |
| 356 SELECT test_function('sqlite') |
| 357 } |
| 358 } {{UTF-16LE sqlite}} |
| 359 db close |
| 360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 361 do_test enc2-6.3 { |
| 362 add_test_function $DB 0 0 1 |
| 363 execsql { |
| 364 SELECT test_function('sqlite') |
| 365 } |
| 366 } {{UTF-16BE sqlite}} |
| 367 |
| 368 db close |
| 369 forcedelete test.db |
| 370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 371 execsql {pragma encoding = 'UTF-16LE'} |
| 372 do_test enc2-6.3 { |
| 373 execsql { |
| 374 CREATE TABLE t5(a); |
| 375 INSERT INTO t5 VALUES('sqlite'); |
| 376 } |
| 377 } {} |
| 378 do_test enc2-6.4 { |
| 379 add_test_function $DB 1 1 1 |
| 380 execsql { |
| 381 SELECT test_function('sqlite') |
| 382 } |
| 383 } {{UTF-16LE sqlite}} |
| 384 db close |
| 385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 386 do_test enc2-6.5 { |
| 387 add_test_function $DB 0 1 0 |
| 388 execsql { |
| 389 SELECT test_function('sqlite') |
| 390 } |
| 391 } {{UTF-16LE sqlite}} |
| 392 db close |
| 393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 394 do_test enc2-6.6 { |
| 395 add_test_function $DB 0 0 1 |
| 396 execsql { |
| 397 SELECT test_function('sqlite') |
| 398 } |
| 399 } {{UTF-16BE sqlite}} |
| 400 |
| 401 db close |
| 402 forcedelete test.db |
| 403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 404 execsql {pragma encoding = 'UTF-16BE'} |
| 405 do_test enc2-6.7 { |
| 406 execsql { |
| 407 CREATE TABLE t5(a); |
| 408 INSERT INTO t5 VALUES('sqlite'); |
| 409 } |
| 410 } {} |
| 411 do_test enc2-6.8 { |
| 412 add_test_function $DB 1 1 1 |
| 413 execsql { |
| 414 SELECT test_function('sqlite') |
| 415 } |
| 416 } {{UTF-16BE sqlite}} |
| 417 db close |
| 418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 419 do_test enc2-6.9 { |
| 420 add_test_function $DB 0 1 0 |
| 421 execsql { |
| 422 SELECT test_function('sqlite') |
| 423 } |
| 424 } {{UTF-16LE sqlite}} |
| 425 db close |
| 426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
| 427 do_test enc2-6.10 { |
| 428 add_test_function $DB 0 0 1 |
| 429 execsql { |
| 430 SELECT test_function('sqlite') |
| 431 } |
| 432 } {{UTF-16BE sqlite}} |
| 433 |
| 434 |
| 435 db close |
| 436 forcedelete test.db |
| 437 |
| 438 # The following tests - enc2-7.* - function as follows: |
| 439 # |
| 440 # 1: Open an empty database file assuming UTF-16 encoding. |
| 441 # 2: Open the same database with a different handle assuming UTF-8. Create |
| 442 # a table using this handle. |
| 443 # 3: Read the sqlite_master table from the first handle. |
| 444 # 4: Ensure the first handle recognises the database encoding is UTF-8. |
| 445 # |
| 446 do_test enc2-7.1 { |
| 447 sqlite3 db test.db |
| 448 execsql { |
| 449 PRAGMA encoding = 'UTF-16'; |
| 450 SELECT * FROM sqlite_master; |
| 451 } |
| 452 } {} |
| 453 do_test enc2-7.2 { |
| 454 set enc [execsql { |
| 455 PRAGMA encoding; |
| 456 }] |
| 457 string range $enc 0 end-2 ;# Chop off the "le" or "be" |
| 458 } {UTF-16} |
| 459 do_test enc2-7.3 { |
| 460 sqlite3 db2 test.db |
| 461 execsql { |
| 462 PRAGMA encoding = 'UTF-8'; |
| 463 CREATE TABLE abc(a, b, c); |
| 464 } db2 |
| 465 } {} |
| 466 do_test enc2-7.4 { |
| 467 execsql { |
| 468 SELECT * FROM sqlite_master; |
| 469 } |
| 470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" |
| 471 do_test enc2-7.5 { |
| 472 execsql { |
| 473 PRAGMA encoding; |
| 474 } |
| 475 } {UTF-8} |
| 476 |
| 477 db close |
| 478 db2 close |
| 479 |
| 480 proc utf16 {utf8} { |
| 481 set utf16 [encoding convertto unicode $utf8] |
| 482 append utf16 "\x00\x00" |
| 483 return $utf16 |
| 484 } |
| 485 ifcapable {complete} { |
| 486 do_test enc2-8.1 { |
| 487 sqlite3_complete16 [utf16 "SELECT * FROM t1;"] |
| 488 } {1} |
| 489 do_test enc2-8.2 { |
| 490 sqlite3_complete16 [utf16 "SELECT * FROM"] |
| 491 } {0} |
| 492 } |
| 493 |
| 494 # Test that the encoding of an empty database may still be set after the |
| 495 # (empty) schema has been initialized. |
| 496 forcedelete test.db |
| 497 do_test enc2-9.1 { |
| 498 sqlite3 db test.db |
| 499 execsql { |
| 500 PRAGMA encoding = 'UTF-8'; |
| 501 PRAGMA encoding; |
| 502 } |
| 503 } {UTF-8} |
| 504 do_test enc2-9.2 { |
| 505 sqlite3 db test.db |
| 506 execsql { |
| 507 PRAGMA encoding = 'UTF-16le'; |
| 508 PRAGMA encoding; |
| 509 } |
| 510 } {UTF-16le} |
| 511 do_test enc2-9.3 { |
| 512 sqlite3 db test.db |
| 513 execsql { |
| 514 SELECT * FROM sqlite_master; |
| 515 PRAGMA encoding = 'UTF-8'; |
| 516 PRAGMA encoding; |
| 517 } |
| 518 } {UTF-8} |
| 519 do_test enc2-9.4 { |
| 520 sqlite3 db test.db |
| 521 execsql { |
| 522 PRAGMA encoding = 'UTF-16le'; |
| 523 CREATE TABLE abc(a, b, c); |
| 524 PRAGMA encoding; |
| 525 } |
| 526 } {UTF-16le} |
| 527 do_test enc2-9.5 { |
| 528 sqlite3 db test.db |
| 529 execsql { |
| 530 PRAGMA encoding = 'UTF-8'; |
| 531 PRAGMA encoding; |
| 532 } |
| 533 } {UTF-16le} |
| 534 |
| 535 # Ticket #1987. |
| 536 # Disallow encoding changes once the encoding has been set. |
| 537 # |
| 538 do_test enc2-10.1 { |
| 539 db close |
| 540 forcedelete test.db test.db-journal |
| 541 sqlite3 db test.db |
| 542 db eval { |
| 543 PRAGMA encoding=UTF16; |
| 544 CREATE TABLE t1(a); |
| 545 PRAGMA encoding=UTF8; |
| 546 CREATE TABLE t2(b); |
| 547 } |
| 548 db close |
| 549 sqlite3 db test.db |
| 550 db eval { |
| 551 SELECT name FROM sqlite_master |
| 552 } |
| 553 } {t1 t2} |
| 554 |
| 555 finish_test |
OLD | NEW |