third_party/sqlite/sqlite-src-3080704/src/utf.c - Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4.

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/src/utf.c

Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4. (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master

Patch Set: Hold back encoding change which is messing up patch. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 ** 2004 April 13	2 ** 2004 April 13

3 **	3 **

4 ** The author disclaims copyright to this source code. In place of	4 ** The author disclaims copyright to this source code. In place of

5 ** a legal notice, here is a blessing:	5 ** a legal notice, here is a blessing:

6 **	6 **

7 ** May you do good and not evil.	7 ** May you do good and not evil.

8 ** May you find forgiveness for yourself and forgive others.	8 ** May you find forgiveness for yourself and forgive others.

9 ** May you share freely, never taking more than you give.	9 ** May you share freely, never taking more than you give.

10 **	10 **

(...skipping 130 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
141 **	141 **

142 ** * This routine never allows a UTF16 surrogate value to be encoded.	142 ** * This routine never allows a UTF16 surrogate value to be encoded.

143 ** If a multi-byte character attempts to encode a value between	143 ** If a multi-byte character attempts to encode a value between

144 ** 0xd800 and 0xe000 then it is rendered as 0xfffd.	144 ** 0xd800 and 0xe000 then it is rendered as 0xfffd.

145 **	145 **

146 ** * Bytes in the range of 0x80 through 0xbf which occur as the first	146 ** * Bytes in the range of 0x80 through 0xbf which occur as the first

147 ** byte of a character are interpreted as single-byte characters	147 ** byte of a character are interpreted as single-byte characters

148 ** and rendered as themselves even though they are technically	148 ** and rendered as themselves even though they are technically

149 ** invalid characters.	149 ** invalid characters.

150 **	150 **

151 ** * This routine accepts an infinite number of different UTF8 encodings	151 ** * This routine accepts over-length UTF8 encodings

152 ** for unicode values 0x80 and greater. It do not change over-length	152 ** for unicode values 0x80 and greater. It does not change over-length

153 ** encodings to 0xfffd as some systems recommend.	153 ** encodings to 0xfffd as some systems recommend.

154 */	154 */

155 #define READ_UTF8(zIn, zTerm, c) \	155 #define READ_UTF8(zIn, zTerm, c) \

156 c = *(zIn++); \	156 c = *(zIn++); \

157 if( c>=0xc0 ){ \	157 if( c>=0xc0 ){ \

158 c = sqlite3Utf8Trans1[c-0xc0]; \	158 c = sqlite3Utf8Trans1[c-0xc0]; \

159 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \	159 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \

160 c = (c<<6) + (0x3f & *(zIn++)); \	160 c = (c<<6) + (0x3f & *(zIn++)); \

161 } \	161 } \

162 if( c<0x80 \	162 if( c<0x80 \

163 \|\| (c&0xFFFFF800)==0xD800 \	163 \|\| (c&0xFFFFF800)==0xD800 \

164 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \	164 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \

165 }	165 }

166 int sqlite3Utf8Read(	166 u32 sqlite3Utf8Read(

167 const unsigned char zIn, / First byte of UTF-8 character */	167 const unsigned char *pz / Pointer to string from which to read char */

168 const unsigned char *pzNext / Write first byte past UTF-8 char here */

169 ){	168 ){

170 unsigned int c;	169 unsigned int c;

171	170

172 /* Same as READ_UTF8() above but without the zTerm parameter.	171 /* Same as READ_UTF8() above but without the zTerm parameter.

173 ** For this routine, we assume the UTF8 string is always zero-terminated.	172 ** For this routine, we assume the UTF8 string is always zero-terminated.

174 */	173 */

175 c = *(zIn++);	174 c = ((pz)++);

176 if( c>=0xc0 ){	175 if( c>=0xc0 ){

177 c = sqlite3Utf8Trans1[c-0xc0];	176 c = sqlite3Utf8Trans1[c-0xc0];

178 while( (*zIn & 0xc0)==0x80 ){	177 while( ((pz) & 0xc0)==0x80 ){

179 c = (c<<6) + (0x3f & *(zIn++));	178 c = (c<<6) + (0x3f & ((pz)++));

180 }	179 }

181 if( c<0x80	180 if( c<0x80

182 \|\| (c&0xFFFFF800)==0xD800	181 \|\| (c&0xFFFFF800)==0xD800

183 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; }	182 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; }

184 }	183 }

185 *pzNext = zIn;

186 return c;	184 return c;

187 }	185 }

188	186

189	187

190	188

191	189

192 /*	190 /*

193 ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is	191 ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is

194 ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().	192 ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().

195 */	193 */

196 /* #define TRANSLATE_TRACE 1 */	194 /* #define TRANSLATE_TRACE 1 */

197	195

198 #ifndef SQLITE_OMIT_UTF16	196 #ifndef SQLITE_OMIT_UTF16

199 /*	197 /*

200 ** This routine transforms the internal text encoding used by pMem to	198 ** This routine transforms the internal text encoding used by pMem to

201 ** desiredEnc. It is an error if the string is already of the desired	199 ** desiredEnc. It is an error if the string is already of the desired

202 ** encoding, or if *pMem does not contain a string value.	200 ** encoding, or if *pMem does not contain a string value.

203 */	201 */

204 int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){	202 SQLITE_NOINLINE int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){

205 int len; /* Maximum length of output string in bytes */	203 int len; /* Maximum length of output string in bytes */

206 unsigned char zOut; / Output buffer */	204 unsigned char zOut; / Output buffer */

207 unsigned char zIn; / Input iterator */	205 unsigned char zIn; / Input iterator */

208 unsigned char zTerm; / End of input */	206 unsigned char zTerm; / End of input */

209 unsigned char z; / Output iterator */	207 unsigned char z; / Output iterator */

210 unsigned int c;	208 unsigned int c;

211	209

212 assert( pMem->db==0 \|\| sqlite3_mutex_held(pMem->db->mutex) );	210 assert( pMem->db==0 \|\| sqlite3_mutex_held(pMem->db->mutex) );

213 assert( pMem->flags&MEM_Str );	211 assert( pMem->flags&MEM_Str );

214 assert( pMem->enc!=desiredEnc );	212 assert( pMem->enc!=desiredEnc );

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
276 zOut = sqlite3DbMallocRaw(pMem->db, len);	274 zOut = sqlite3DbMallocRaw(pMem->db, len);

277 if( !zOut ){	275 if( !zOut ){

278 return SQLITE_NOMEM;	276 return SQLITE_NOMEM;

279 }	277 }

280 z = zOut;	278 z = zOut;

281	279

282 if( pMem->enc==SQLITE_UTF8 ){	280 if( pMem->enc==SQLITE_UTF8 ){

283 if( desiredEnc==SQLITE_UTF16LE ){	281 if( desiredEnc==SQLITE_UTF16LE ){

284 /* UTF-8 -> UTF-16 Little-endian */	282 /* UTF-8 -> UTF-16 Little-endian */

285 while( zIn<zTerm ){	283 while( zIn<zTerm ){

286 /* c = sqlite3Utf8Read(zIn, zTerm, (const u8*)&zIn); /

287 READ_UTF8(zIn, zTerm, c);	284 READ_UTF8(zIn, zTerm, c);

288 WRITE_UTF16LE(z, c);	285 WRITE_UTF16LE(z, c);

289 }	286 }

290 }else{	287 }else{

291 assert( desiredEnc==SQLITE_UTF16BE );	288 assert( desiredEnc==SQLITE_UTF16BE );

292 /* UTF-8 -> UTF-16 Big-endian */	289 /* UTF-8 -> UTF-16 Big-endian */

293 while( zIn<zTerm ){	290 while( zIn<zTerm ){

294 /* c = sqlite3Utf8Read(zIn, zTerm, (const u8*)&zIn); /

295 READ_UTF8(zIn, zTerm, c);	291 READ_UTF8(zIn, zTerm, c);

296 WRITE_UTF16BE(z, c);	292 WRITE_UTF16BE(z, c);

297 }	293 }

298 }	294 }

299 pMem->n = (int)(z - zOut);	295 pMem->n = (int)(z - zOut);

300 *z++ = 0;	296 *z++ = 0;

301 }else{	297 }else{

302 assert( desiredEnc==SQLITE_UTF8 );	298 assert( desiredEnc==SQLITE_UTF8 );

303 if( pMem->enc==SQLITE_UTF16LE ){	299 if( pMem->enc==SQLITE_UTF16LE ){

304 /* UTF-16 Little-endian -> UTF-8 */	300 /* UTF-16 Little-endian -> UTF-8 */

305 while( zIn<zTerm ){	301 while( zIn<zTerm ){

306 READ_UTF16LE(zIn, zIn<zTerm, c);	302 READ_UTF16LE(zIn, zIn<zTerm, c);

307 WRITE_UTF8(z, c);	303 WRITE_UTF8(z, c);

308 }	304 }

309 }else{	305 }else{

310 /* UTF-16 Big-endian -> UTF-8 */	306 /* UTF-16 Big-endian -> UTF-8 */

311 while( zIn<zTerm ){	307 while( zIn<zTerm ){

312 READ_UTF16BE(zIn, zIn<zTerm, c);	308 READ_UTF16BE(zIn, zIn<zTerm, c);

313 WRITE_UTF8(z, c);	309 WRITE_UTF8(z, c);

314 }	310 }

315 }	311 }

316 pMem->n = (int)(z - zOut);	312 pMem->n = (int)(z - zOut);

317 }	313 }

318 *z = 0;	314 *z = 0;

319 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );	315 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

320	316

	317 c = pMem->flags;

321 sqlite3VdbeMemRelease(pMem);	318 sqlite3VdbeMemRelease(pMem);

322 pMem->flags &= ~(MEM_Static\|MEM_Dyn\|MEM_Ephem);	319 pMem->flags = MEM_Str\|MEM_Term\|(c&MEM_AffMask);

323 pMem->enc = desiredEnc;	320 pMem->enc = desiredEnc;

324 pMem->flags \|= (MEM_Term\|MEM_Dyn);

325 pMem->z = (char*)zOut;	321 pMem->z = (char*)zOut;

326 pMem->zMalloc = pMem->z;	322 pMem->zMalloc = pMem->z;

	323 pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->z);

327	324

328 translate_out:	325 translate_out:

329 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)	326 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)

330 {	327 {

331 char zBuf[100];	328 char zBuf[100];

332 sqlite3VdbeMemPrettyPrint(pMem, zBuf);	329 sqlite3VdbeMemPrettyPrint(pMem, zBuf);

333 fprintf(stderr, "OUTPUT: %s\n", zBuf);	330 fprintf(stderr, "OUTPUT: %s\n", zBuf);

334 }	331 }

335 #endif	332 #endif

336 return SQLITE_OK;	333 return SQLITE_OK;

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
412 **	409 **

413 ** The translation is done in-place and aborted if the output	410 ** The translation is done in-place and aborted if the output

414 ** overruns the input.	411 ** overruns the input.

415 */	412 */

416 int sqlite3Utf8To8(unsigned char *zIn){	413 int sqlite3Utf8To8(unsigned char *zIn){

417 unsigned char *zOut = zIn;	414 unsigned char *zOut = zIn;

418 unsigned char *zStart = zIn;	415 unsigned char *zStart = zIn;

419 u32 c;	416 u32 c;

420	417

421 while( zIn[0] && zOut<=zIn ){	418 while( zIn[0] && zOut<=zIn ){

422 c = sqlite3Utf8Read(zIn, (const u8**)&zIn);	419 c = sqlite3Utf8Read((const u8**)&zIn);

423 if( c!=0xfffd ){	420 if( c!=0xfffd ){

424 WRITE_UTF8(zOut, c);	421 WRITE_UTF8(zOut, c);

425 }	422 }

426 }	423 }

427 *zOut = 0;	424 *zOut = 0;

428 return (int)(zOut - zStart);	425 return (int)(zOut - zStart);

429 }	426 }

430 #endif	427 #endif

431	428

432 #ifndef SQLITE_OMIT_UTF16	429 #ifndef SQLITE_OMIT_UTF16

433 /*	430 /*

434 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.	431 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.

435 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must	432 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must

436 ** be freed by the calling function.	433 ** be freed by the calling function.

437 **	434 **

438 ** NULL is returned if there is an allocation error.	435 ** NULL is returned if there is an allocation error.

439 */	436 */

440 char sqlite3Utf16to8(sqlite3 db, const void *z, int nByte, u8 enc){	437 char sqlite3Utf16to8(sqlite3 db, const void *z, int nByte, u8 enc){

441 Mem m;	438 Mem m;

442 memset(&m, 0, sizeof(m));	439 memset(&m, 0, sizeof(m));

443 m.db = db;	440 m.db = db;

444 sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);	441 sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);

445 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);	442 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);

446 if( db->mallocFailed ){	443 if( db->mallocFailed ){

447 sqlite3VdbeMemRelease(&m);	444 sqlite3VdbeMemRelease(&m);

448 m.z = 0;	445 m.z = 0;

449 }	446 }

450 assert( (m.flags & MEM_Term)!=0 \|\| db->mallocFailed );	447 assert( (m.flags & MEM_Term)!=0 \|\| db->mallocFailed );

451 assert( (m.flags & MEM_Str)!=0 \|\| db->mallocFailed );	448 assert( (m.flags & MEM_Str)!=0 \|\| db->mallocFailed );

452 assert( (m.flags & MEM_Dyn)!=0 \|\| db->mallocFailed );

453 assert( m.z \|\| db->mallocFailed );	449 assert( m.z \|\| db->mallocFailed );

454 return m.z;	450 return m.z;

455 }	451 }

456	452

457 /*	453 /*

458 ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter

459 ** enc. A pointer to the new string is returned, and the value of *pnOut

460 ** is set to the length of the returned string in bytes. The call should

461 ** arrange to call sqlite3DbFree() on the returned pointer when it is

462 ** no longer required.

463 **

464 ** If a malloc failure occurs, NULL is returned and the db.mallocFailed

465 ** flag set.

466 */

467 #ifdef SQLITE_ENABLE_STAT2

468 char sqlite3Utf8to16(sqlite3 db, u8 enc, char z, int n, int pnOut){

469 Mem m;

470 memset(&m, 0, sizeof(m));

471 m.db = db;

472 sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC);

473 if( sqlite3VdbeMemTranslate(&m, enc) ){

474 assert( db->mallocFailed );

475 return 0;

476 }

477 assert( m.z==m.zMalloc );

478 *pnOut = m.n;

479 return m.z;

480 }

481 #endif

482

483 /*

484 ** zIn is a UTF-16 encoded unicode string at least nChar characters long.	454 ** zIn is a UTF-16 encoded unicode string at least nChar characters long.

485 ** Return the number of bytes in the first nChar unicode characters	455 ** Return the number of bytes in the first nChar unicode characters

486 ** in pZ. nChar must be non-negative.	456 ** in pZ. nChar must be non-negative.

487 */	457 */

488 int sqlite3Utf16ByteLen(const void *zIn, int nChar){	458 int sqlite3Utf16ByteLen(const void *zIn, int nChar){

489 int c;	459 int c;

490 unsigned char const *z = zIn;	460 unsigned char const *z = zIn;

491 int n = 0;	461 int n = 0;

492	462

493 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){	463 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){

(...skipping 23 matching lines...) Expand all Loading...
517 int n;	487 int n;

518 unsigned int c;	488 unsigned int c;

519	489

520 for(i=0; i<0x00110000; i++){	490 for(i=0; i<0x00110000; i++){

521 z = zBuf;	491 z = zBuf;

522 WRITE_UTF8(z, i);	492 WRITE_UTF8(z, i);

523 n = (int)(z-zBuf);	493 n = (int)(z-zBuf);

524 assert( n>0 && n<=4 );	494 assert( n>0 && n<=4 );

525 z[0] = 0;	495 z[0] = 0;

526 z = zBuf;	496 z = zBuf;

527 c = sqlite3Utf8Read(z, (const u8**)&z);	497 c = sqlite3Utf8Read((const u8**)&z);

528 t = i;	498 t = i;

529 if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;	499 if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;

530 if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;	500 if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;

531 assert( c==t );	501 assert( c==t );

532 assert( (z-zBuf)==n );	502 assert( (z-zBuf)==n );

533 }	503 }

534 for(i=0; i<0x00110000; i++){	504 for(i=0; i<0x00110000; i++){

535 if( i>=0xD800 && i<0xE000 ) continue;	505 if( i>=0xD800 && i<0xE000 ) continue;

536 z = zBuf;	506 z = zBuf;

537 WRITE_UTF16LE(z, i);	507 WRITE_UTF16LE(z, i);

(...skipping 13 matching lines...) Expand all Loading...
551 assert( n>0 && n<=4 );	521 assert( n>0 && n<=4 );

552 z[0] = 0;	522 z[0] = 0;

553 z = zBuf;	523 z = zBuf;

554 READ_UTF16BE(z, 1, c);	524 READ_UTF16BE(z, 1, c);

555 assert( c==i );	525 assert( c==i );

556 assert( (z-zBuf)==n );	526 assert( (z-zBuf)==n );

557 }	527 }

558 }	528 }

559 #endif /* SQLITE_TEST */	529 #endif /* SQLITE_TEST */

560 #endif /* SQLITE_OMIT_UTF16 */	530 #endif /* SQLITE_OMIT_UTF16 */

OLD	NEW

« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/src/update.c ('k') | third_party/sqlite/sqlite-src-3080704/src/util.c » ('j') | no next file with comments »