| Index: third_party/sqlite/src/src/utf.c
|
| diff --git a/third_party/sqlite/src/src/utf.c b/third_party/sqlite/src/src/utf.c
|
| index c3d07be8287daf780c17ed45e1e65b56a75f0136..95182694d39d46a37f57e5ba2e9730240194d49e 100644
|
| --- a/third_party/sqlite/src/src/utf.c
|
| +++ b/third_party/sqlite/src/src/utf.c
|
| @@ -12,8 +12,6 @@
|
| ** This file contains routines used to translate between UTF-8,
|
| ** UTF-16, UTF-16BE, and UTF-16LE.
|
| **
|
| -** $Id: utf.c,v 1.73 2009/04/01 18:40:32 drh Exp $
|
| -**
|
| ** Notes on UTF-8:
|
| **
|
| ** Byte-0 Byte-1 Byte-2 Byte-3 Value
|
| @@ -107,20 +105,20 @@ static const unsigned char sqlite3Utf8Trans1[] = {
|
| } \
|
| }
|
|
|
| -#define READ_UTF16LE(zIn, c){ \
|
| +#define READ_UTF16LE(zIn, TERM, c){ \
|
| c = (*zIn++); \
|
| c += ((*zIn++)<<8); \
|
| - if( c>=0xD800 && c<0xE000 ){ \
|
| + if( c>=0xD800 && c<0xE000 && TERM ){ \
|
| int c2 = (*zIn++); \
|
| c2 += ((*zIn++)<<8); \
|
| c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
|
| } \
|
| }
|
|
|
| -#define READ_UTF16BE(zIn, c){ \
|
| +#define READ_UTF16BE(zIn, TERM, c){ \
|
| c = ((*zIn++)<<8); \
|
| c += (*zIn++); \
|
| - if( c>=0xD800 && c<0xE000 ){ \
|
| + if( c>=0xD800 && c<0xE000 && TERM ){ \
|
| int c2 = ((*zIn++)<<8); \
|
| c2 += (*zIn++); \
|
| c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
|
| @@ -169,7 +167,7 @@ int sqlite3Utf8Read(
|
| const unsigned char *zIn, /* First byte of UTF-8 character */
|
| const unsigned char **pzNext /* Write first byte past UTF-8 char here */
|
| ){
|
| - int c;
|
| + unsigned int c;
|
|
|
| /* Same as READ_UTF8() above but without the zTerm parameter.
|
| ** For this routine, we assume the UTF8 string is always zero-terminated.
|
| @@ -305,13 +303,13 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
|
| if( pMem->enc==SQLITE_UTF16LE ){
|
| /* UTF-16 Little-endian -> UTF-8 */
|
| while( zIn<zTerm ){
|
| - READ_UTF16LE(zIn, c);
|
| + READ_UTF16LE(zIn, zIn<zTerm, c);
|
| WRITE_UTF8(z, c);
|
| }
|
| }else{
|
| /* UTF-16 Big-endian -> UTF-8 */
|
| while( zIn<zTerm ){
|
| - READ_UTF16BE(zIn, c);
|
| + READ_UTF16BE(zIn, zIn<zTerm, c);
|
| WRITE_UTF8(z, c);
|
| }
|
| }
|
| @@ -412,15 +410,15 @@ int sqlite3Utf8CharLen(const char *zIn, int nByte){
|
| ** This has the effect of making sure that the string is well-formed
|
| ** UTF-8. Miscoded characters are removed.
|
| **
|
| -** The translation is done in-place (since it is impossible for the
|
| -** correct UTF-8 encoding to be longer than a malformed encoding).
|
| +** The translation is done in-place and aborted if the output
|
| +** overruns the input.
|
| */
|
| int sqlite3Utf8To8(unsigned char *zIn){
|
| unsigned char *zOut = zIn;
|
| unsigned char *zStart = zIn;
|
| u32 c;
|
|
|
| - while( zIn[0] ){
|
| + while( zIn[0] && zOut<=zIn ){
|
| c = sqlite3Utf8Read(zIn, (const u8**)&zIn);
|
| if( c!=0xfffd ){
|
| WRITE_UTF8(zOut, c);
|
| @@ -439,11 +437,11 @@ int sqlite3Utf8To8(unsigned char *zIn){
|
| **
|
| ** NULL is returned if there is an allocation error.
|
| */
|
| -char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){
|
| +char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){
|
| Mem m;
|
| memset(&m, 0, sizeof(m));
|
| m.db = db;
|
| - sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);
|
| + sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);
|
| sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
|
| if( db->mallocFailed ){
|
| sqlite3VdbeMemRelease(&m);
|
| @@ -451,7 +449,9 @@ char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){
|
| }
|
| assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
|
| assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
|
| - return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z);
|
| + assert( (m.flags & MEM_Dyn)!=0 || db->mallocFailed );
|
| + assert( m.z || db->mallocFailed );
|
| + return m.z;
|
| }
|
|
|
| /*
|
| @@ -481,7 +481,7 @@ char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){
|
| #endif
|
|
|
| /*
|
| -** pZ is a UTF-16 encoded unicode string at least nChar characters long.
|
| +** zIn is a UTF-16 encoded unicode string at least nChar characters long.
|
| ** Return the number of bytes in the first nChar unicode characters
|
| ** in pZ. nChar must be non-negative.
|
| */
|
| @@ -489,23 +489,15 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){
|
| int c;
|
| unsigned char const *z = zIn;
|
| int n = 0;
|
| +
|
| if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
|
| - /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
|
| - ** and in other parts of this file means that at one branch will
|
| - ** not be covered by coverage testing on any single host. But coverage
|
| - ** will be complete if the tests are run on both a little-endian and
|
| - ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
|
| - ** macros are constant at compile time the compiler can determine
|
| - ** which branch will be followed. It is therefore assumed that no runtime
|
| - ** penalty is paid for this "if" statement.
|
| - */
|
| while( n<nChar ){
|
| - READ_UTF16BE(z, c);
|
| + READ_UTF16BE(z, 1, c);
|
| n++;
|
| }
|
| }else{
|
| while( n<nChar ){
|
| - READ_UTF16LE(z, c);
|
| + READ_UTF16LE(z, 1, c);
|
| n++;
|
| }
|
| }
|
| @@ -547,7 +539,7 @@ void sqlite3UtfSelfTest(void){
|
| assert( n>0 && n<=4 );
|
| z[0] = 0;
|
| z = zBuf;
|
| - READ_UTF16LE(z, c);
|
| + READ_UTF16LE(z, 1, c);
|
| assert( c==i );
|
| assert( (z-zBuf)==n );
|
| }
|
| @@ -559,7 +551,7 @@ void sqlite3UtfSelfTest(void){
|
| assert( n>0 && n<=4 );
|
| z[0] = 0;
|
| z = zBuf;
|
| - READ_UTF16BE(z, c);
|
| + READ_UTF16BE(z, 1, c);
|
| assert( c==i );
|
| assert( (z-zBuf)==n );
|
| }
|
|
|