Index: third_party/sqlite/src/src/utf.c |
diff --git a/third_party/sqlite/src/src/utf.c b/third_party/sqlite/src/src/utf.c |
index c3d07be8287daf780c17ed45e1e65b56a75f0136..95182694d39d46a37f57e5ba2e9730240194d49e 100644 |
--- a/third_party/sqlite/src/src/utf.c |
+++ b/third_party/sqlite/src/src/utf.c |
@@ -12,8 +12,6 @@ |
** This file contains routines used to translate between UTF-8, |
** UTF-16, UTF-16BE, and UTF-16LE. |
** |
-** $Id: utf.c,v 1.73 2009/04/01 18:40:32 drh Exp $ |
-** |
** Notes on UTF-8: |
** |
** Byte-0 Byte-1 Byte-2 Byte-3 Value |
@@ -107,20 +105,20 @@ static const unsigned char sqlite3Utf8Trans1[] = { |
} \ |
} |
-#define READ_UTF16LE(zIn, c){ \ |
+#define READ_UTF16LE(zIn, TERM, c){ \ |
c = (*zIn++); \ |
c += ((*zIn++)<<8); \ |
- if( c>=0xD800 && c<0xE000 ){ \ |
+ if( c>=0xD800 && c<0xE000 && TERM ){ \ |
int c2 = (*zIn++); \ |
c2 += ((*zIn++)<<8); \ |
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ |
} \ |
} |
-#define READ_UTF16BE(zIn, c){ \ |
+#define READ_UTF16BE(zIn, TERM, c){ \ |
c = ((*zIn++)<<8); \ |
c += (*zIn++); \ |
- if( c>=0xD800 && c<0xE000 ){ \ |
+ if( c>=0xD800 && c<0xE000 && TERM ){ \ |
int c2 = ((*zIn++)<<8); \ |
c2 += (*zIn++); \ |
c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ |
@@ -169,7 +167,7 @@ int sqlite3Utf8Read( |
const unsigned char *zIn, /* First byte of UTF-8 character */ |
const unsigned char **pzNext /* Write first byte past UTF-8 char here */ |
){ |
- int c; |
+ unsigned int c; |
/* Same as READ_UTF8() above but without the zTerm parameter. |
** For this routine, we assume the UTF8 string is always zero-terminated. |
@@ -305,13 +303,13 @@ int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ |
if( pMem->enc==SQLITE_UTF16LE ){ |
/* UTF-16 Little-endian -> UTF-8 */ |
while( zIn<zTerm ){ |
- READ_UTF16LE(zIn, c); |
+ READ_UTF16LE(zIn, zIn<zTerm, c); |
WRITE_UTF8(z, c); |
} |
}else{ |
/* UTF-16 Big-endian -> UTF-8 */ |
while( zIn<zTerm ){ |
- READ_UTF16BE(zIn, c); |
+ READ_UTF16BE(zIn, zIn<zTerm, c); |
WRITE_UTF8(z, c); |
} |
} |
@@ -412,15 +410,15 @@ int sqlite3Utf8CharLen(const char *zIn, int nByte){ |
** This has the effect of making sure that the string is well-formed |
** UTF-8. Miscoded characters are removed. |
** |
-** The translation is done in-place (since it is impossible for the |
-** correct UTF-8 encoding to be longer than a malformed encoding). |
+** The translation is done in-place and aborted if the output |
+** overruns the input. |
*/ |
int sqlite3Utf8To8(unsigned char *zIn){ |
unsigned char *zOut = zIn; |
unsigned char *zStart = zIn; |
u32 c; |
- while( zIn[0] ){ |
+ while( zIn[0] && zOut<=zIn ){ |
c = sqlite3Utf8Read(zIn, (const u8**)&zIn); |
if( c!=0xfffd ){ |
WRITE_UTF8(zOut, c); |
@@ -439,11 +437,11 @@ int sqlite3Utf8To8(unsigned char *zIn){ |
** |
** NULL is returned if there is an allocation error. |
*/ |
-char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){ |
+char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){ |
Mem m; |
memset(&m, 0, sizeof(m)); |
m.db = db; |
- sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC); |
+ sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC); |
sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8); |
if( db->mallocFailed ){ |
sqlite3VdbeMemRelease(&m); |
@@ -451,7 +449,9 @@ char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte){ |
} |
assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); |
assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); |
- return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z); |
+ assert( (m.flags & MEM_Dyn)!=0 || db->mallocFailed ); |
+ assert( m.z || db->mallocFailed ); |
+ return m.z; |
} |
/* |
@@ -481,7 +481,7 @@ char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){ |
#endif |
/* |
-** pZ is a UTF-16 encoded unicode string at least nChar characters long. |
+** zIn is a UTF-16 encoded unicode string at least nChar characters long. |
** Return the number of bytes in the first nChar unicode characters |
** in pZ. nChar must be non-negative. |
*/ |
@@ -489,23 +489,15 @@ int sqlite3Utf16ByteLen(const void *zIn, int nChar){ |
int c; |
unsigned char const *z = zIn; |
int n = 0; |
+ |
if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ |
- /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here |
- ** and in other parts of this file means that at one branch will |
- ** not be covered by coverage testing on any single host. But coverage |
- ** will be complete if the tests are run on both a little-endian and |
- ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE |
- ** macros are constant at compile time the compiler can determine |
- ** which branch will be followed. It is therefore assumed that no runtime |
- ** penalty is paid for this "if" statement. |
- */ |
while( n<nChar ){ |
- READ_UTF16BE(z, c); |
+ READ_UTF16BE(z, 1, c); |
n++; |
} |
}else{ |
while( n<nChar ){ |
- READ_UTF16LE(z, c); |
+ READ_UTF16LE(z, 1, c); |
n++; |
} |
} |
@@ -547,7 +539,7 @@ void sqlite3UtfSelfTest(void){ |
assert( n>0 && n<=4 ); |
z[0] = 0; |
z = zBuf; |
- READ_UTF16LE(z, c); |
+ READ_UTF16LE(z, 1, c); |
assert( c==i ); |
assert( (z-zBuf)==n ); |
} |
@@ -559,7 +551,7 @@ void sqlite3UtfSelfTest(void){ |
assert( n>0 && n<=4 ); |
z[0] = 0; |
z = zBuf; |
- READ_UTF16BE(z, c); |
+ READ_UTF16BE(z, 1, c); |
assert( c==i ); |
assert( (z-zBuf)==n ); |
} |