| OLD | NEW | 
 | (Empty) | 
|    1 # 2002 May 24 |  | 
|    2 # |  | 
|    3 # The author disclaims copyright to this source code.  In place of |  | 
|    4 # a legal notice, here is a blessing: |  | 
|    5 # |  | 
|    6 #    May you do good and not evil. |  | 
|    7 #    May you find forgiveness for yourself and forgive others. |  | 
|    8 #    May you share freely, never taking more than you give. |  | 
|    9 # |  | 
|   10 #*********************************************************************** |  | 
|   11 # This file implements regression tests for SQLite library.  The focus of |  | 
|   12 # this file is testing the SQLite routines used for converting between the |  | 
|   13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and |  | 
|   14 # UTF-16be). |  | 
|   15 # |  | 
|   16 # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ |  | 
|   17  |  | 
|   18 set testdir [file dirname $argv0] |  | 
|   19 source $testdir/tester.tcl |  | 
|   20  |  | 
|   21 # Skip this test if the build does not support multiple encodings. |  | 
|   22 # |  | 
|   23 ifcapable {!utf16} { |  | 
|   24   finish_test |  | 
|   25   return |  | 
|   26 } |  | 
|   27  |  | 
|   28 proc do_bincmp_test {testname got expect} { |  | 
|   29   binary scan $expect \c* expectvals |  | 
|   30   binary scan $got \c* gotvals |  | 
|   31   do_test $testname [list set dummy $gotvals] $expectvals |  | 
|   32 } |  | 
|   33  |  | 
|   34 # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around |  | 
|   35 # to change the byte-order of the string. |  | 
|   36 proc swap_byte_order {utf16} { |  | 
|   37   binary scan $utf16 \c* ints |  | 
|   38  |  | 
|   39   foreach {a b} $ints { |  | 
|   40     lappend ints2 $b |  | 
|   41     lappend ints2 $a |  | 
|   42   } |  | 
|   43  |  | 
|   44   return [binary format \c* $ints2] |  | 
|   45 } |  | 
|   46  |  | 
|   47 # |  | 
|   48 # Test that the SQLite routines for converting between UTF encodings |  | 
|   49 # produce the same results as their TCL counterparts. |  | 
|   50 # |  | 
|   51 # $testname is the prefix to be used for the test names. |  | 
|   52 # $str is a string to use for testing (encoded in UTF-8, as normal for TCL). |  | 
|   53 # |  | 
|   54 # The test procedure is: |  | 
|   55 # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and |  | 
|   56 #    SQLite routines produce the same results. |  | 
|   57 # |  | 
|   58 # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and |  | 
|   59 #    SQLite routines produce the same results. |  | 
|   60 # |  | 
|   61 # 3. Use the SQLite routines to convert the native machine order UTF-16 |  | 
|   62 #    representation back to the original UTF-8. Check that the result |  | 
|   63 #    matches the original representation. |  | 
|   64 # |  | 
|   65 # 4. Add a byte-order mark to each of the UTF-16 representations and |  | 
|   66 #    check that the SQLite routines can convert them back to UTF-8.  For |  | 
|   67 #    byte-order mark info, refer to section 3.10 of the unicode standard. |  | 
|   68 # |  | 
|   69 # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure |  | 
|   70 #    that SQLite can convert them both to native byte order UTF-16  |  | 
|   71 #    strings, sans BOM. |  | 
|   72 # |  | 
|   73 # Coverage: |  | 
|   74 # |  | 
|   75 # sqlite_utf8to16be (step 2) |  | 
|   76 # sqlite_utf8to16le (step 1) |  | 
|   77 # sqlite_utf16to8 (steps 3, 4) |  | 
|   78 # sqlite_utf16to16le (step 5) |  | 
|   79 # sqlite_utf16to16be (step 5) |  | 
|   80 # |  | 
|   81 proc test_conversion {testname str} { |  | 
|   82   |  | 
|   83   # Step 1. |  | 
|   84   set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] |  | 
|   85   set utf16le_tcl [encoding convertto unicode $str] |  | 
|   86   append utf16le_tcl "\x00\x00" |  | 
|   87   if { $::tcl_platform(byteOrder)!="littleEndian" } { |  | 
|   88     set utf16le_tcl [swap_byte_order $utf16le_tcl] |  | 
|   89   } |  | 
|   90   do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl |  | 
|   91   set utf16le $utf16le_tcl |  | 
|   92  |  | 
|   93   # Step 2. |  | 
|   94   set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] |  | 
|   95   set utf16be_tcl [encoding convertto unicode $str] |  | 
|   96   append utf16be_tcl "\x00\x00" |  | 
|   97   if { $::tcl_platform(byteOrder)=="littleEndian" } { |  | 
|   98     set utf16be_tcl [swap_byte_order $utf16be_tcl] |  | 
|   99   } |  | 
|  100   do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl |  | 
|  101   set utf16be $utf16be_tcl |  | 
|  102   |  | 
|  103   # Step 3. |  | 
|  104   if { $::tcl_platform(byteOrder)=="littleEndian" } { |  | 
|  105     set utf16 $utf16le |  | 
|  106   } else { |  | 
|  107     set utf16 $utf16be |  | 
|  108   } |  | 
|  109   set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] |  | 
|  110   do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] |  | 
|  111  |  | 
|  112   # Step 4 (little endian). |  | 
|  113   append utf16le_bom "\xFF\xFE" $utf16le |  | 
|  114   set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] |  | 
|  115   do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] |  | 
|  116  |  | 
|  117   # Step 4 (big endian). |  | 
|  118   append utf16be_bom "\xFE\xFF" $utf16be |  | 
|  119   set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] |  | 
|  120   do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] |  | 
|  121  |  | 
|  122   # Step 5 (little endian to little endian). |  | 
|  123   set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] |  | 
|  124   do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le |  | 
|  125  |  | 
|  126   # Step 5 (big endian to big endian). |  | 
|  127   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] |  | 
|  128   do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be |  | 
|  129  |  | 
|  130   # Step 5 (big endian to little endian). |  | 
|  131   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] |  | 
|  132   do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le |  | 
|  133  |  | 
|  134   # Step 5 (little endian to big endian). |  | 
|  135   set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] |  | 
|  136   do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be |  | 
|  137 } |  | 
|  138  |  | 
|  139 translate_selftest |  | 
|  140  |  | 
|  141 test_conversion enc-1 "hello world" |  | 
|  142 test_conversion enc-2 "sqlite" |  | 
|  143 test_conversion enc-3 "" |  | 
|  144 test_conversion enc-X "\u0100" |  | 
|  145 test_conversion enc-4 "\u1234" |  | 
|  146 test_conversion enc-5 "\u4321abc" |  | 
|  147 test_conversion enc-6 "\u4321\u1234" |  | 
|  148 test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] |  | 
|  149 test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] |  | 
|  150 test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] |  | 
|  151 test_conversion enc-10 [string repeat "\uE000" 100] |  | 
|  152  |  | 
|  153 proc test_collate {enc zLeft zRight} { |  | 
|  154   return [string compare $zLeft $zRight] |  | 
|  155 } |  | 
|  156 add_test_collate $::DB 0 0 1 |  | 
|  157 do_test enc-11.1 { |  | 
|  158   execsql { |  | 
|  159     CREATE TABLE ab(a COLLATE test_collate, b); |  | 
|  160     INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); |  | 
|  161     INSERT INTO ab VALUES(CAST (X'C080808080808080808080808080808080808080808080
     8080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'88
     8800'); |  | 
|  162     CREATE INDEX ab_i ON ab(a, b); |  | 
|  163   } |  | 
|  164 } {} |  | 
|  165 do_test enc-11.2 { |  | 
|  166   set cp200 "\u00C8" |  | 
|  167   execsql { |  | 
|  168     SELECT count(*) FROM ab WHERE a = $::cp200; |  | 
|  169   } |  | 
|  170 } {2} |  | 
|  171  |  | 
|  172 finish_test |  | 
| OLD | NEW |