Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/src/utf.c

Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4. (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master
Patch Set: Hold back encoding change which is messing up patch. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2004 April 13 2 ** 2004 April 13
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
141 ** 141 **
142 ** * This routine never allows a UTF16 surrogate value to be encoded. 142 ** * This routine never allows a UTF16 surrogate value to be encoded.
143 ** If a multi-byte character attempts to encode a value between 143 ** If a multi-byte character attempts to encode a value between
144 ** 0xd800 and 0xe000 then it is rendered as 0xfffd. 144 ** 0xd800 and 0xe000 then it is rendered as 0xfffd.
145 ** 145 **
146 ** * Bytes in the range of 0x80 through 0xbf which occur as the first 146 ** * Bytes in the range of 0x80 through 0xbf which occur as the first
147 ** byte of a character are interpreted as single-byte characters 147 ** byte of a character are interpreted as single-byte characters
148 ** and rendered as themselves even though they are technically 148 ** and rendered as themselves even though they are technically
149 ** invalid characters. 149 ** invalid characters.
150 ** 150 **
151 ** * This routine accepts an infinite number of different UTF8 encodings 151 ** * This routine accepts over-length UTF8 encodings
152 ** for unicode values 0x80 and greater. It do not change over-length 152 ** for unicode values 0x80 and greater. It does not change over-length
153 ** encodings to 0xfffd as some systems recommend. 153 ** encodings to 0xfffd as some systems recommend.
154 */ 154 */
155 #define READ_UTF8(zIn, zTerm, c) \ 155 #define READ_UTF8(zIn, zTerm, c) \
156 c = *(zIn++); \ 156 c = *(zIn++); \
157 if( c>=0xc0 ){ \ 157 if( c>=0xc0 ){ \
158 c = sqlite3Utf8Trans1[c-0xc0]; \ 158 c = sqlite3Utf8Trans1[c-0xc0]; \
159 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ 159 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
160 c = (c<<6) + (0x3f & *(zIn++)); \ 160 c = (c<<6) + (0x3f & *(zIn++)); \
161 } \ 161 } \
162 if( c<0x80 \ 162 if( c<0x80 \
163 || (c&0xFFFFF800)==0xD800 \ 163 || (c&0xFFFFF800)==0xD800 \
164 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ 164 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
165 } 165 }
166 int sqlite3Utf8Read( 166 u32 sqlite3Utf8Read(
167 const unsigned char *zIn, /* First byte of UTF-8 character */ 167 const unsigned char **pz /* Pointer to string from which to read char */
168 const unsigned char **pzNext /* Write first byte past UTF-8 char here */
169 ){ 168 ){
170 unsigned int c; 169 unsigned int c;
171 170
172 /* Same as READ_UTF8() above but without the zTerm parameter. 171 /* Same as READ_UTF8() above but without the zTerm parameter.
173 ** For this routine, we assume the UTF8 string is always zero-terminated. 172 ** For this routine, we assume the UTF8 string is always zero-terminated.
174 */ 173 */
175 c = *(zIn++); 174 c = *((*pz)++);
176 if( c>=0xc0 ){ 175 if( c>=0xc0 ){
177 c = sqlite3Utf8Trans1[c-0xc0]; 176 c = sqlite3Utf8Trans1[c-0xc0];
178 while( (*zIn & 0xc0)==0x80 ){ 177 while( (*(*pz) & 0xc0)==0x80 ){
179 c = (c<<6) + (0x3f & *(zIn++)); 178 c = (c<<6) + (0x3f & *((*pz)++));
180 } 179 }
181 if( c<0x80 180 if( c<0x80
182 || (c&0xFFFFF800)==0xD800 181 || (c&0xFFFFF800)==0xD800
183 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } 182 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; }
184 } 183 }
185 *pzNext = zIn;
186 return c; 184 return c;
187 } 185 }
188 186
189 187
190 188
191 189
192 /* 190 /*
193 ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is 191 ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
194 ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate(). 192 ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
195 */ 193 */
196 /* #define TRANSLATE_TRACE 1 */ 194 /* #define TRANSLATE_TRACE 1 */
197 195
198 #ifndef SQLITE_OMIT_UTF16 196 #ifndef SQLITE_OMIT_UTF16
199 /* 197 /*
200 ** This routine transforms the internal text encoding used by pMem to 198 ** This routine transforms the internal text encoding used by pMem to
201 ** desiredEnc. It is an error if the string is already of the desired 199 ** desiredEnc. It is an error if the string is already of the desired
202 ** encoding, or if *pMem does not contain a string value. 200 ** encoding, or if *pMem does not contain a string value.
203 */ 201 */
204 int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ 202 SQLITE_NOINLINE int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
205 int len; /* Maximum length of output string in bytes */ 203 int len; /* Maximum length of output string in bytes */
206 unsigned char *zOut; /* Output buffer */ 204 unsigned char *zOut; /* Output buffer */
207 unsigned char *zIn; /* Input iterator */ 205 unsigned char *zIn; /* Input iterator */
208 unsigned char *zTerm; /* End of input */ 206 unsigned char *zTerm; /* End of input */
209 unsigned char *z; /* Output iterator */ 207 unsigned char *z; /* Output iterator */
210 unsigned int c; 208 unsigned int c;
211 209
212 assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); 210 assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
213 assert( pMem->flags&MEM_Str ); 211 assert( pMem->flags&MEM_Str );
214 assert( pMem->enc!=desiredEnc ); 212 assert( pMem->enc!=desiredEnc );
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
276 zOut = sqlite3DbMallocRaw(pMem->db, len); 274 zOut = sqlite3DbMallocRaw(pMem->db, len);
277 if( !zOut ){ 275 if( !zOut ){
278 return SQLITE_NOMEM; 276 return SQLITE_NOMEM;
279 } 277 }
280 z = zOut; 278 z = zOut;
281 279
282 if( pMem->enc==SQLITE_UTF8 ){ 280 if( pMem->enc==SQLITE_UTF8 ){
283 if( desiredEnc==SQLITE_UTF16LE ){ 281 if( desiredEnc==SQLITE_UTF16LE ){
284 /* UTF-8 -> UTF-16 Little-endian */ 282 /* UTF-8 -> UTF-16 Little-endian */
285 while( zIn<zTerm ){ 283 while( zIn<zTerm ){
286 /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
287 READ_UTF8(zIn, zTerm, c); 284 READ_UTF8(zIn, zTerm, c);
288 WRITE_UTF16LE(z, c); 285 WRITE_UTF16LE(z, c);
289 } 286 }
290 }else{ 287 }else{
291 assert( desiredEnc==SQLITE_UTF16BE ); 288 assert( desiredEnc==SQLITE_UTF16BE );
292 /* UTF-8 -> UTF-16 Big-endian */ 289 /* UTF-8 -> UTF-16 Big-endian */
293 while( zIn<zTerm ){ 290 while( zIn<zTerm ){
294 /* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */
295 READ_UTF8(zIn, zTerm, c); 291 READ_UTF8(zIn, zTerm, c);
296 WRITE_UTF16BE(z, c); 292 WRITE_UTF16BE(z, c);
297 } 293 }
298 } 294 }
299 pMem->n = (int)(z - zOut); 295 pMem->n = (int)(z - zOut);
300 *z++ = 0; 296 *z++ = 0;
301 }else{ 297 }else{
302 assert( desiredEnc==SQLITE_UTF8 ); 298 assert( desiredEnc==SQLITE_UTF8 );
303 if( pMem->enc==SQLITE_UTF16LE ){ 299 if( pMem->enc==SQLITE_UTF16LE ){
304 /* UTF-16 Little-endian -> UTF-8 */ 300 /* UTF-16 Little-endian -> UTF-8 */
305 while( zIn<zTerm ){ 301 while( zIn<zTerm ){
306 READ_UTF16LE(zIn, zIn<zTerm, c); 302 READ_UTF16LE(zIn, zIn<zTerm, c);
307 WRITE_UTF8(z, c); 303 WRITE_UTF8(z, c);
308 } 304 }
309 }else{ 305 }else{
310 /* UTF-16 Big-endian -> UTF-8 */ 306 /* UTF-16 Big-endian -> UTF-8 */
311 while( zIn<zTerm ){ 307 while( zIn<zTerm ){
312 READ_UTF16BE(zIn, zIn<zTerm, c); 308 READ_UTF16BE(zIn, zIn<zTerm, c);
313 WRITE_UTF8(z, c); 309 WRITE_UTF8(z, c);
314 } 310 }
315 } 311 }
316 pMem->n = (int)(z - zOut); 312 pMem->n = (int)(z - zOut);
317 } 313 }
318 *z = 0; 314 *z = 0;
319 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); 315 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
320 316
317 c = pMem->flags;
321 sqlite3VdbeMemRelease(pMem); 318 sqlite3VdbeMemRelease(pMem);
322 pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem); 319 pMem->flags = MEM_Str|MEM_Term|(c&MEM_AffMask);
323 pMem->enc = desiredEnc; 320 pMem->enc = desiredEnc;
324 pMem->flags |= (MEM_Term|MEM_Dyn);
325 pMem->z = (char*)zOut; 321 pMem->z = (char*)zOut;
326 pMem->zMalloc = pMem->z; 322 pMem->zMalloc = pMem->z;
323 pMem->szMalloc = sqlite3DbMallocSize(pMem->db, pMem->z);
327 324
328 translate_out: 325 translate_out:
329 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG) 326 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
330 { 327 {
331 char zBuf[100]; 328 char zBuf[100];
332 sqlite3VdbeMemPrettyPrint(pMem, zBuf); 329 sqlite3VdbeMemPrettyPrint(pMem, zBuf);
333 fprintf(stderr, "OUTPUT: %s\n", zBuf); 330 fprintf(stderr, "OUTPUT: %s\n", zBuf);
334 } 331 }
335 #endif 332 #endif
336 return SQLITE_OK; 333 return SQLITE_OK;
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
412 ** 409 **
413 ** The translation is done in-place and aborted if the output 410 ** The translation is done in-place and aborted if the output
414 ** overruns the input. 411 ** overruns the input.
415 */ 412 */
416 int sqlite3Utf8To8(unsigned char *zIn){ 413 int sqlite3Utf8To8(unsigned char *zIn){
417 unsigned char *zOut = zIn; 414 unsigned char *zOut = zIn;
418 unsigned char *zStart = zIn; 415 unsigned char *zStart = zIn;
419 u32 c; 416 u32 c;
420 417
421 while( zIn[0] && zOut<=zIn ){ 418 while( zIn[0] && zOut<=zIn ){
422 c = sqlite3Utf8Read(zIn, (const u8**)&zIn); 419 c = sqlite3Utf8Read((const u8**)&zIn);
423 if( c!=0xfffd ){ 420 if( c!=0xfffd ){
424 WRITE_UTF8(zOut, c); 421 WRITE_UTF8(zOut, c);
425 } 422 }
426 } 423 }
427 *zOut = 0; 424 *zOut = 0;
428 return (int)(zOut - zStart); 425 return (int)(zOut - zStart);
429 } 426 }
430 #endif 427 #endif
431 428
432 #ifndef SQLITE_OMIT_UTF16 429 #ifndef SQLITE_OMIT_UTF16
433 /* 430 /*
434 ** Convert a UTF-16 string in the native encoding into a UTF-8 string. 431 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.
435 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must 432 ** Memory to hold the UTF-8 string is obtained from sqlite3_malloc and must
436 ** be freed by the calling function. 433 ** be freed by the calling function.
437 ** 434 **
438 ** NULL is returned if there is an allocation error. 435 ** NULL is returned if there is an allocation error.
439 */ 436 */
440 char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){ 437 char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 enc){
441 Mem m; 438 Mem m;
442 memset(&m, 0, sizeof(m)); 439 memset(&m, 0, sizeof(m));
443 m.db = db; 440 m.db = db;
444 sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC); 441 sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC);
445 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8); 442 sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
446 if( db->mallocFailed ){ 443 if( db->mallocFailed ){
447 sqlite3VdbeMemRelease(&m); 444 sqlite3VdbeMemRelease(&m);
448 m.z = 0; 445 m.z = 0;
449 } 446 }
450 assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); 447 assert( (m.flags & MEM_Term)!=0 || db->mallocFailed );
451 assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); 448 assert( (m.flags & MEM_Str)!=0 || db->mallocFailed );
452 assert( (m.flags & MEM_Dyn)!=0 || db->mallocFailed );
453 assert( m.z || db->mallocFailed ); 449 assert( m.z || db->mallocFailed );
454 return m.z; 450 return m.z;
455 } 451 }
456 452
457 /* 453 /*
458 ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter
459 ** enc. A pointer to the new string is returned, and the value of *pnOut
460 ** is set to the length of the returned string in bytes. The call should
461 ** arrange to call sqlite3DbFree() on the returned pointer when it is
462 ** no longer required.
463 **
464 ** If a malloc failure occurs, NULL is returned and the db.mallocFailed
465 ** flag set.
466 */
467 #ifdef SQLITE_ENABLE_STAT2
468 char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){
469 Mem m;
470 memset(&m, 0, sizeof(m));
471 m.db = db;
472 sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC);
473 if( sqlite3VdbeMemTranslate(&m, enc) ){
474 assert( db->mallocFailed );
475 return 0;
476 }
477 assert( m.z==m.zMalloc );
478 *pnOut = m.n;
479 return m.z;
480 }
481 #endif
482
483 /*
484 ** zIn is a UTF-16 encoded unicode string at least nChar characters long. 454 ** zIn is a UTF-16 encoded unicode string at least nChar characters long.
485 ** Return the number of bytes in the first nChar unicode characters 455 ** Return the number of bytes in the first nChar unicode characters
486 ** in pZ. nChar must be non-negative. 456 ** in pZ. nChar must be non-negative.
487 */ 457 */
488 int sqlite3Utf16ByteLen(const void *zIn, int nChar){ 458 int sqlite3Utf16ByteLen(const void *zIn, int nChar){
489 int c; 459 int c;
490 unsigned char const *z = zIn; 460 unsigned char const *z = zIn;
491 int n = 0; 461 int n = 0;
492 462
493 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ 463 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
(...skipping 23 matching lines...) Expand all
517 int n; 487 int n;
518 unsigned int c; 488 unsigned int c;
519 489
520 for(i=0; i<0x00110000; i++){ 490 for(i=0; i<0x00110000; i++){
521 z = zBuf; 491 z = zBuf;
522 WRITE_UTF8(z, i); 492 WRITE_UTF8(z, i);
523 n = (int)(z-zBuf); 493 n = (int)(z-zBuf);
524 assert( n>0 && n<=4 ); 494 assert( n>0 && n<=4 );
525 z[0] = 0; 495 z[0] = 0;
526 z = zBuf; 496 z = zBuf;
527 c = sqlite3Utf8Read(z, (const u8**)&z); 497 c = sqlite3Utf8Read((const u8**)&z);
528 t = i; 498 t = i;
529 if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; 499 if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
530 if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; 500 if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
531 assert( c==t ); 501 assert( c==t );
532 assert( (z-zBuf)==n ); 502 assert( (z-zBuf)==n );
533 } 503 }
534 for(i=0; i<0x00110000; i++){ 504 for(i=0; i<0x00110000; i++){
535 if( i>=0xD800 && i<0xE000 ) continue; 505 if( i>=0xD800 && i<0xE000 ) continue;
536 z = zBuf; 506 z = zBuf;
537 WRITE_UTF16LE(z, i); 507 WRITE_UTF16LE(z, i);
(...skipping 13 matching lines...) Expand all
551 assert( n>0 && n<=4 ); 521 assert( n>0 && n<=4 );
552 z[0] = 0; 522 z[0] = 0;
553 z = zBuf; 523 z = zBuf;
554 READ_UTF16BE(z, 1, c); 524 READ_UTF16BE(z, 1, c);
555 assert( c==i ); 525 assert( c==i );
556 assert( (z-zBuf)==n ); 526 assert( (z-zBuf)==n );
557 } 527 }
558 } 528 }
559 #endif /* SQLITE_TEST */ 529 #endif /* SQLITE_TEST */
560 #endif /* SQLITE_OMIT_UTF16 */ 530 #endif /* SQLITE_OMIT_UTF16 */
OLDNEW
« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/src/update.c ('k') | third_party/sqlite/sqlite-src-3080704/src/util.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698