OLD | NEW |
(Empty) | |
| 1 # 2002 May 24 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # This file implements regression tests for SQLite library. The focus of |
| 12 # this file is testing the SQLite routines used for converting between the |
| 13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and |
| 14 # UTF-16be). |
| 15 # |
| 16 # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ |
| 17 |
| 18 set testdir [file dirname $argv0] |
| 19 source $testdir/tester.tcl |
| 20 |
| 21 # Skip this test if the build does not support multiple encodings. |
| 22 # |
| 23 ifcapable {!utf16} { |
| 24 finish_test |
| 25 return |
| 26 } |
| 27 |
| 28 proc do_bincmp_test {testname got expect} { |
| 29 binary scan $expect \c* expectvals |
| 30 binary scan $got \c* gotvals |
| 31 do_test $testname [list set dummy $gotvals] $expectvals |
| 32 } |
| 33 |
| 34 # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around |
| 35 # to change the byte-order of the string. |
| 36 proc swap_byte_order {utf16} { |
| 37 binary scan $utf16 \c* ints |
| 38 |
| 39 foreach {a b} $ints { |
| 40 lappend ints2 $b |
| 41 lappend ints2 $a |
| 42 } |
| 43 |
| 44 return [binary format \c* $ints2] |
| 45 } |
| 46 |
| 47 # |
| 48 # Test that the SQLite routines for converting between UTF encodings |
| 49 # produce the same results as their TCL counterparts. |
| 50 # |
| 51 # $testname is the prefix to be used for the test names. |
| 52 # $str is a string to use for testing (encoded in UTF-8, as normal for TCL). |
| 53 # |
| 54 # The test procedure is: |
| 55 # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and |
| 56 # SQLite routines produce the same results. |
| 57 # |
| 58 # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and |
| 59 # SQLite routines produce the same results. |
| 60 # |
| 61 # 3. Use the SQLite routines to convert the native machine order UTF-16 |
| 62 # representation back to the original UTF-8. Check that the result |
| 63 # matches the original representation. |
| 64 # |
| 65 # 4. Add a byte-order mark to each of the UTF-16 representations and |
| 66 # check that the SQLite routines can convert them back to UTF-8. For |
| 67 # byte-order mark info, refer to section 3.10 of the unicode standard. |
| 68 # |
| 69 # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure |
| 70 # that SQLite can convert them both to native byte order UTF-16 |
| 71 # strings, sans BOM. |
| 72 # |
| 73 # Coverage: |
| 74 # |
| 75 # sqlite_utf8to16be (step 2) |
| 76 # sqlite_utf8to16le (step 1) |
| 77 # sqlite_utf16to8 (steps 3, 4) |
| 78 # sqlite_utf16to16le (step 5) |
| 79 # sqlite_utf16to16be (step 5) |
| 80 # |
| 81 proc test_conversion {testname str} { |
| 82 |
| 83 # Step 1. |
| 84 set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] |
| 85 set utf16le_tcl [encoding convertto unicode $str] |
| 86 append utf16le_tcl "\x00\x00" |
| 87 if { $::tcl_platform(byteOrder)!="littleEndian" } { |
| 88 set utf16le_tcl [swap_byte_order $utf16le_tcl] |
| 89 } |
| 90 do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl |
| 91 set utf16le $utf16le_tcl |
| 92 |
| 93 # Step 2. |
| 94 set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] |
| 95 set utf16be_tcl [encoding convertto unicode $str] |
| 96 append utf16be_tcl "\x00\x00" |
| 97 if { $::tcl_platform(byteOrder)=="littleEndian" } { |
| 98 set utf16be_tcl [swap_byte_order $utf16be_tcl] |
| 99 } |
| 100 do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl |
| 101 set utf16be $utf16be_tcl |
| 102 |
| 103 # Step 3. |
| 104 if { $::tcl_platform(byteOrder)=="littleEndian" } { |
| 105 set utf16 $utf16le |
| 106 } else { |
| 107 set utf16 $utf16be |
| 108 } |
| 109 set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] |
| 110 do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] |
| 111 |
| 112 # Step 4 (little endian). |
| 113 append utf16le_bom "\xFF\xFE" $utf16le |
| 114 set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] |
| 115 do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] |
| 116 |
| 117 # Step 4 (big endian). |
| 118 append utf16be_bom "\xFE\xFF" $utf16be |
| 119 set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] |
| 120 do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] |
| 121 |
| 122 # Step 5 (little endian to little endian). |
| 123 set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] |
| 124 do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le |
| 125 |
| 126 # Step 5 (big endian to big endian). |
| 127 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] |
| 128 do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be |
| 129 |
| 130 # Step 5 (big endian to little endian). |
| 131 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] |
| 132 do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le |
| 133 |
| 134 # Step 5 (little endian to big endian). |
| 135 set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] |
| 136 do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be |
| 137 } |
| 138 |
| 139 translate_selftest |
| 140 |
| 141 test_conversion enc-1 "hello world" |
| 142 test_conversion enc-2 "sqlite" |
| 143 test_conversion enc-3 "" |
| 144 test_conversion enc-X "\u0100" |
| 145 test_conversion enc-4 "\u1234" |
| 146 test_conversion enc-5 "\u4321abc" |
| 147 test_conversion enc-6 "\u4321\u1234" |
| 148 test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] |
| 149 test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] |
| 150 test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] |
| 151 test_conversion enc-10 [string repeat "\uE000" 100] |
| 152 |
| 153 proc test_collate {enc zLeft zRight} { |
| 154 return [string compare $zLeft $zRight] |
| 155 } |
| 156 add_test_collate $::DB 0 0 1 |
| 157 do_test enc-11.1 { |
| 158 execsql { |
| 159 CREATE TABLE ab(a COLLATE test_collate, b); |
| 160 INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); |
| 161 INSERT INTO ab VALUES(CAST (X'C080808080808080808080808080808080808080808080
8080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'88
8800'); |
| 162 CREATE INDEX ab_i ON ab(a, b); |
| 163 } |
| 164 } {} |
| 165 do_test enc-11.2 { |
| 166 set cp200 "\u00C8" |
| 167 execsql { |
| 168 SELECT count(*) FROM ab WHERE a = $::cp200; |
| 169 } |
| 170 } {2} |
| 171 |
| 172 finish_test |
OLD | NEW |