third_party/sqlite/src/src/utf.c - Issue 6990047: Import SQLite 3.7.6.3.

Side by Side Diff: third_party/sqlite/src/src/utf.c

Issue 6990047: Import SQLite 3.7.6.3. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 ** 2004 April 13	2 ** 2004 April 13

3 **	3 **

4 ** The author disclaims copyright to this source code. In place of	4 ** The author disclaims copyright to this source code. In place of

5 ** a legal notice, here is a blessing:	5 ** a legal notice, here is a blessing:

6 **	6 **

7 ** May you do good and not evil.	7 ** May you do good and not evil.

8 ** May you find forgiveness for yourself and forgive others.	8 ** May you find forgiveness for yourself and forgive others.

9 ** May you share freely, never taking more than you give.	9 ** May you share freely, never taking more than you give.

10 **	10 **

11 *************************************************************************	11 *************************************************************************

12 ** This file contains routines used to translate between UTF-8,	12 ** This file contains routines used to translate between UTF-8,

13 ** UTF-16, UTF-16BE, and UTF-16LE.	13 ** UTF-16, UTF-16BE, and UTF-16LE.

14 **	14 **

15 ** $Id: utf.c,v 1.73 2009/04/01 18:40:32 drh Exp $

16 **

17 ** Notes on UTF-8:	15 ** Notes on UTF-8:

18 **	16 **

19 ** Byte-0 Byte-1 Byte-2 Byte-3 Value	17 ** Byte-0 Byte-1 Byte-2 Byte-3 Value

20 ** 0xxxxxxx 00000000 00000000 0xxxxxxx	18 ** 0xxxxxxx 00000000 00000000 0xxxxxxx

21 ** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx	19 ** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx

22 ** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx	20 ** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx

23 ** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx	21 ** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx

24 **	22 **

25 **	23 **

26 ** Notes on UTF-16: (with wwww+1==uuuuu)	24 ** Notes on UTF-16: (with wwww+1==uuuuu)

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
100 *zOut++ = (u8)((c>>8)&0x00FF); \	98 *zOut++ = (u8)((c>>8)&0x00FF); \

101 *zOut++ = (u8)(c&0x00FF); \	99 *zOut++ = (u8)(c&0x00FF); \

102 }else{ \	100 }else{ \

103 *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \	101 *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \

104 *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \	102 *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \

105 *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \	103 *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \

106 *zOut++ = (u8)(c&0x00FF); \	104 *zOut++ = (u8)(c&0x00FF); \

107 } \	105 } \

108 }	106 }

109	107

110 #define READ_UTF16LE(zIn, c){ \	108 #define READ_UTF16LE(zIn, TERM, c){ \

111 c = (*zIn++); \	109 c = (*zIn++); \

112 c += ((*zIn++)<<8); \	110 c += ((*zIn++)<<8); \

113 if( c>=0xD800 && c<0xE000 ){ \	111 if( c>=0xD800 && c<0xE000 && TERM ){ \

114 int c2 = (*zIn++); \	112 int c2 = (*zIn++); \

115 c2 += ((*zIn++)<<8); \	113 c2 += ((*zIn++)<<8); \

116 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \	114 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \

117 } \	115 } \

118 }	116 }

119	117

120 #define READ_UTF16BE(zIn, c){ \	118 #define READ_UTF16BE(zIn, TERM, c){ \

121 c = ((*zIn++)<<8); \	119 c = ((*zIn++)<<8); \

122 c += (*zIn++); \	120 c += (*zIn++); \

123 if( c>=0xD800 && c<0xE000 ){ \	121 if( c>=0xD800 && c<0xE000 && TERM ){ \

124 int c2 = ((*zIn++)<<8); \	122 int c2 = ((*zIn++)<<8); \

125 c2 += (*zIn++); \	123 c2 += (*zIn++); \

126 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \	124 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \

127 } \	125 } \

128 }	126 }

129	127

130 /*	128 /*

131 ** Translate a single UTF-8 character. Return the unicode value.	129 ** Translate a single UTF-8 character. Return the unicode value.

132 **	130 **

133 ** During translation, assume that the byte that zTerm points	131 ** During translation, assume that the byte that zTerm points

(...skipping 28 matching lines...) Expand all Loading...
162 c = (c<<6) + (0x3f & *(zIn++)); \	160 c = (c<<6) + (0x3f & *(zIn++)); \

163 } \	161 } \

164 if( c<0x80 \	162 if( c<0x80 \

165 \|\| (c&0xFFFFF800)==0xD800 \	163 \|\| (c&0xFFFFF800)==0xD800 \

166 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \	164 \|\| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \

167 }	165 }

168 int sqlite3Utf8Read(	166 int sqlite3Utf8Read(

169 const unsigned char zIn, / First byte of UTF-8 character */	167 const unsigned char zIn, / First byte of UTF-8 character */

170 const unsigned char *pzNext / Write first byte past UTF-8 char here */	168 const unsigned char *pzNext / Write first byte past UTF-8 char here */

171 ){	169 ){

172 int c;	170 unsigned int c;

173	171

174 /* Same as READ_UTF8() above but without the zTerm parameter.	172 /* Same as READ_UTF8() above but without the zTerm parameter.

175 ** For this routine, we assume the UTF8 string is always zero-terminated.	173 ** For this routine, we assume the UTF8 string is always zero-terminated.

176 */	174 */

177 c = *(zIn++);	175 c = *(zIn++);

178 if( c>=0xc0 ){	176 if( c>=0xc0 ){

179 c = sqlite3Utf8Trans1[c-0xc0];	177 c = sqlite3Utf8Trans1[c-0xc0];

180 while( (*zIn & 0xc0)==0x80 ){	178 while( (*zIn & 0xc0)==0x80 ){

181 c = (c<<6) + (0x3f & *(zIn++));	179 c = (c<<6) + (0x3f & *(zIn++));

182 }	180 }

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
298 WRITE_UTF16BE(z, c);	296 WRITE_UTF16BE(z, c);

299 }	297 }

300 }	298 }

301 pMem->n = (int)(z - zOut);	299 pMem->n = (int)(z - zOut);

302 *z++ = 0;	300 *z++ = 0;

303 }else{	301 }else{

304 assert( desiredEnc==SQLITE_UTF8 );	302 assert( desiredEnc==SQLITE_UTF8 );

305 if( pMem->enc==SQLITE_UTF16LE ){	303 if( pMem->enc==SQLITE_UTF16LE ){

306 /* UTF-16 Little-endian -> UTF-8 */	304 /* UTF-16 Little-endian -> UTF-8 */

307 while( zIn<zTerm ){	305 while( zIn<zTerm ){

308 READ_UTF16LE(zIn, c);	306 READ_UTF16LE(zIn, zIn<zTerm, c);

309 WRITE_UTF8(z, c);	307 WRITE_UTF8(z, c);

310 }	308 }

311 }else{	309 }else{

312 /* UTF-16 Big-endian -> UTF-8 */	310 /* UTF-16 Big-endian -> UTF-8 */

313 while( zIn<zTerm ){	311 while( zIn<zTerm ){

314 READ_UTF16BE(zIn, c);	312 READ_UTF16BE(zIn, zIn<zTerm, c);

315 WRITE_UTF8(z, c);	313 WRITE_UTF8(z, c);

316 }	314 }

317 }	315 }

318 pMem->n = (int)(z - zOut);	316 pMem->n = (int)(z - zOut);

319 }	317 }

320 *z = 0;	318 *z = 0;

321 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );	319 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );

322	320

323 sqlite3VdbeMemRelease(pMem);	321 sqlite3VdbeMemRelease(pMem);

324 pMem->flags &= ~(MEM_Static\|MEM_Dyn\|MEM_Ephem);	322 pMem->flags &= ~(MEM_Static\|MEM_Dyn\|MEM_Ephem);

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
405 /* This test function is not currently used by the automated test-suite.	403 /* This test function is not currently used by the automated test-suite.

406 ** Hence it is only available in debug builds.	404 ** Hence it is only available in debug builds.

407 */	405 */

408 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)	406 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)

409 /*	407 /*

410 ** Translate UTF-8 to UTF-8.	408 ** Translate UTF-8 to UTF-8.

411 **	409 **

412 ** This has the effect of making sure that the string is well-formed	410 ** This has the effect of making sure that the string is well-formed

413 ** UTF-8. Miscoded characters are removed.	411 ** UTF-8. Miscoded characters are removed.

414 **	412 **

415 ** The translation is done in-place (since it is impossible for the	413 ** The translation is done in-place and aborted if the output

416 ** correct UTF-8 encoding to be longer than a malformed encoding).	414 ** overruns the input.

417 */	415 */

418 int sqlite3Utf8To8(unsigned char *zIn){	416 int sqlite3Utf8To8(unsigned char *zIn){

419 unsigned char *zOut = zIn;	417 unsigned char *zOut = zIn;

420 unsigned char *zStart = zIn;	418 unsigned char *zStart = zIn;

421 u32 c;	419 u32 c;

422	420

423 while( zIn[0] ){	421 while( zIn[0] && zOut<=zIn ){

424 c = sqlite3Utf8Read(zIn, (const u8**)&zIn);	422 c = sqlite3Utf8Read(zIn, (const u8**)&zIn);

425 if( c!=0xfffd ){	423 if( c!=0xfffd ){

426 WRITE_UTF8(zOut, c);	424 WRITE_UTF8(zOut, c);

427 }	425 }

428 }	426 }

429 *zOut = 0;	427 *zOut = 0;

430 return (int)(zOut - zStart);	428 return (int)(zOut - zStart);

431 }	429 }

432 #endif	430 #endif

433	431

434 #ifndef SQLITE_OMIT_UTF16	432 #ifndef SQLITE_OMIT_UTF16

435 /*	433 /*

436 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.	434 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.

437 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must	435 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must

438 ** be freed by the calling function.	436 ** be freed by the calling function.

439 **	437 **

440 ** NULL is returned if there is an allocation error.	438 ** NULL is returned if there is an allocation error.

441 */	439 */

442 char sqlite3Utf16to8(sqlite3 db, const void *z, int nByte){	440 char sqlite3Utf16to8(sqlite3 db, const void *z, int nByte, u8 enc){

443 Mem m;	441 Mem m;

444 memset(&m, 0, sizeof(m));	442 memset(&m, 0, sizeof(m));

445 m.db = db;	443 m.db = db;

446 sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);	444 sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);

447 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);	445 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);

448 if( db->mallocFailed ){	446 if( db->mallocFailed ){

449 sqlite3VdbeMemRelease(&m);	447 sqlite3VdbeMemRelease(&m);

450 m.z = 0;	448 m.z = 0;

451 }	449 }

452 assert( (m.flags & MEM_Term)!=0 \|\| db->mallocFailed );	450 assert( (m.flags & MEM_Term)!=0 \|\| db->mallocFailed );

453 assert( (m.flags & MEM_Str)!=0 \|\| db->mallocFailed );	451 assert( (m.flags & MEM_Str)!=0 \|\| db->mallocFailed );

454 return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z);	452 assert( (m.flags & MEM_Dyn)!=0 \|\| db->mallocFailed );

	453 assert( m.z \|\| db->mallocFailed );

	454 return m.z;

455 }	455 }

456	456

457 /*	457 /*

458 ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter	458 ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter

459 ** enc. A pointer to the new string is returned, and the value of *pnOut	459 ** enc. A pointer to the new string is returned, and the value of *pnOut

460 ** is set to the length of the returned string in bytes. The call should	460 ** is set to the length of the returned string in bytes. The call should

461 ** arrange to call sqlite3DbFree() on the returned pointer when it is	461 ** arrange to call sqlite3DbFree() on the returned pointer when it is

462 ** no longer required.	462 ** no longer required.

463 **	463 **

464 ** If a malloc failure occurs, NULL is returned and the db.mallocFailed	464 ** If a malloc failure occurs, NULL is returned and the db.mallocFailed

465 ** flag set.	465 ** flag set.

466 */	466 */

467 #ifdef SQLITE_ENABLE_STAT2	467 #ifdef SQLITE_ENABLE_STAT2

468 char sqlite3Utf8to16(sqlite3 db, u8 enc, char z, int n, int pnOut){	468 char sqlite3Utf8to16(sqlite3 db, u8 enc, char z, int n, int pnOut){

469 Mem m;	469 Mem m;

470 memset(&m, 0, sizeof(m));	470 memset(&m, 0, sizeof(m));

471 m.db = db;	471 m.db = db;

472 sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC);	472 sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC);

473 if( sqlite3VdbeMemTranslate(&m, enc) ){	473 if( sqlite3VdbeMemTranslate(&m, enc) ){

474 assert( db->mallocFailed );	474 assert( db->mallocFailed );

475 return 0;	475 return 0;

476 }	476 }

477 assert( m.z==m.zMalloc );	477 assert( m.z==m.zMalloc );

478 *pnOut = m.n;	478 *pnOut = m.n;

479 return m.z;	479 return m.z;

480 }	480 }

481 #endif	481 #endif

482	482

483 /*	483 /*

484 ** pZ is a UTF-16 encoded unicode string at least nChar characters long.	484 ** zIn is a UTF-16 encoded unicode string at least nChar characters long.

485 ** Return the number of bytes in the first nChar unicode characters	485 ** Return the number of bytes in the first nChar unicode characters

486 ** in pZ. nChar must be non-negative.	486 ** in pZ. nChar must be non-negative.

487 */	487 */

488 int sqlite3Utf16ByteLen(const void *zIn, int nChar){	488 int sqlite3Utf16ByteLen(const void *zIn, int nChar){

489 int c;	489 int c;

490 unsigned char const *z = zIn;	490 unsigned char const *z = zIn;

491 int n = 0;	491 int n = 0;

	492

492 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){	493 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){

493 /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here

494 ** and in other parts of this file means that at one branch will

495 ** not be covered by coverage testing on any single host. But coverage

496 ** will be complete if the tests are run on both a little-endian and

497 ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE

498 ** macros are constant at compile time the compiler can determine

499 ** which branch will be followed. It is therefore assumed that no runtime

500 ** penalty is paid for this "if" statement.

501 */

502 while( n<nChar ){	494 while( n<nChar ){

503 READ_UTF16BE(z, c);	495 READ_UTF16BE(z, 1, c);

504 n++;	496 n++;

505 }	497 }

506 }else{	498 }else{

507 while( n<nChar ){	499 while( n<nChar ){

508 READ_UTF16LE(z, c);	500 READ_UTF16LE(z, 1, c);

509 n++;	501 n++;

510 }	502 }

511 }	503 }

512 return (int)(z-(unsigned char const *)zIn);	504 return (int)(z-(unsigned char const *)zIn);

513 }	505 }

514	506

515 #if defined(SQLITE_TEST)	507 #if defined(SQLITE_TEST)

516 /*	508 /*

517 ** This routine is called from the TCL test function "translate_selftest".	509 ** This routine is called from the TCL test function "translate_selftest".

518 ** It checks that the primitives for serializing and deserializing	510 ** It checks that the primitives for serializing and deserializing

(...skipping 21 matching lines...) Expand all Loading...
540 assert( (z-zBuf)==n );	532 assert( (z-zBuf)==n );

541 }	533 }

542 for(i=0; i<0x00110000; i++){	534 for(i=0; i<0x00110000; i++){

543 if( i>=0xD800 && i<0xE000 ) continue;	535 if( i>=0xD800 && i<0xE000 ) continue;

544 z = zBuf;	536 z = zBuf;

545 WRITE_UTF16LE(z, i);	537 WRITE_UTF16LE(z, i);

546 n = (int)(z-zBuf);	538 n = (int)(z-zBuf);

547 assert( n>0 && n<=4 );	539 assert( n>0 && n<=4 );

548 z[0] = 0;	540 z[0] = 0;

549 z = zBuf;	541 z = zBuf;

550 READ_UTF16LE(z, c);	542 READ_UTF16LE(z, 1, c);

551 assert( c==i );	543 assert( c==i );

552 assert( (z-zBuf)==n );	544 assert( (z-zBuf)==n );

553 }	545 }

554 for(i=0; i<0x00110000; i++){	546 for(i=0; i<0x00110000; i++){

555 if( i>=0xD800 && i<0xE000 ) continue;	547 if( i>=0xD800 && i<0xE000 ) continue;

556 z = zBuf;	548 z = zBuf;

557 WRITE_UTF16BE(z, i);	549 WRITE_UTF16BE(z, i);

558 n = (int)(z-zBuf);	550 n = (int)(z-zBuf);

559 assert( n>0 && n<=4 );	551 assert( n>0 && n<=4 );

560 z[0] = 0;	552 z[0] = 0;

561 z = zBuf;	553 z = zBuf;

562 READ_UTF16BE(z, c);	554 READ_UTF16BE(z, 1, c);

563 assert( c==i );	555 assert( c==i );

564 assert( (z-zBuf)==n );	556 assert( (z-zBuf)==n );

565 }	557 }

566 }	558 }

567 #endif /* SQLITE_TEST */	559 #endif /* SQLITE_TEST */

568 #endif /* SQLITE_OMIT_UTF16 */	560 #endif /* SQLITE_OMIT_UTF16 */

OLD	NEW

« no previous file with comments | « third_party/sqlite/src/src/update.c ('k') | third_party/sqlite/src/src/util.c » ('j') | no next file with comments »