Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: cgpt/cgpt_common.c

Issue 5025003: The right implementation of CGPT label conversion between UTF8 and UTF16. (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/vboot_reference.git
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « cgpt/cgpt.h ('k') | cgpt/cmd_add.c » ('j') | cgpt/cmd_add.c » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. 1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be 2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file. 3 * found in the LICENSE file.
4 * 4 *
5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c 5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c
6 * files for more details. 6 * files for more details.
7 */ 7 */
8 8
9 #include "cgpt.h" 9 #include "cgpt.h"
10 10
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 le16toh(guid->u.Uuid.time_high_and_version), 343 le16toh(guid->u.Uuid.time_high_and_version),
344 guid->u.Uuid.clock_seq_high_and_reserved, 344 guid->u.Uuid.clock_seq_high_and_reserved,
345 guid->u.Uuid.clock_seq_low, 345 guid->u.Uuid.clock_seq_low,
346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], 346 guid->u.Uuid.node[0], guid->u.Uuid.node[1],
347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], 347 guid->u.Uuid.node[2], guid->u.Uuid.node[3],
348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); 348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1);
349 } 349 }
350 350
351 /* Convert possibly unterminated UTF16 string to UTF8. 351 /* Convert possibly unterminated UTF16 string to UTF8.
352 * Caller must prepare enough space for UTF8, which could be up to 352 * Caller must prepare enough space for UTF8, which could be up to
353 * twice the number of UTF16 chars plus the terminating '\0'. 353 * twice the number of UTF16 chars plus the terminating '\0'.
Bill Richardson 2010/11/17 17:26:39 I think this size bound is wrong. It should be "th
Louis 2010/11/18 05:35:21 Hm... my initial idea should be "UTF16 bytes", in
354 * FIXME(wfrichar): The original implementation had security issues. As a 354 *
355 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 355 * This function uses a simple state meachine to convert UTF-16 char(s) to
356 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix 356 * a code point. Once a code point is parsed out, the state machine throws
357 * this. 357 * out sequencial UTF-8 chars in one time.
358 *
359 * Return: CGPT_OK --- all character are converted successfully.
360 * CGPT_FAILED --- convert error, i.e. output buffer is too short.
358 */ 361 */
359 void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, 362 int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput,
360 uint8_t *utf8, unsigned int maxoutput) 363 uint8_t *utf8, unsigned int maxoutput)
361 { 364 {
362 size_t s16idx, s8idx; 365 size_t s16idx, s8idx;
363 uint32_t utfchar; 366 uint32_t code_point;
367 int code_point_ready = 1; // code point is ready to output.
368 int retval = CGPT_OK;
364 369
365 if (!utf16 || !maxinput || !utf8 || !maxoutput) 370 if (!utf16 || !maxinput || !utf8 || !maxoutput)
366 return; 371 return CGPT_FAILED;
367 372
368 maxoutput--; /* plan for termination now */ 373 maxoutput--; /* plan for termination now */
369 374
370 for (s16idx = s8idx = 0; 375 for (s16idx = s8idx = 0;
371 s16idx < maxinput && utf16[s16idx] && maxoutput; 376 s16idx < maxinput && utf16[s16idx] && maxoutput;
372 s16idx++, maxoutput--) { 377 s16idx++) {
373 utfchar = le16toh(utf16[s16idx]); 378 unsigned short codeunit = le16toh(utf16[s16idx]);
Bill Richardson 2010/11/17 17:26:39 Shouldn't codeunit be uint16_t instead of unsigned
Louis 2010/11/18 05:35:21 Done. Good catch! On 2010/11/17 17:26:39, Bill Ric
374 utf8[s8idx++] = utfchar & 0x7F; 379
380 if (code_point_ready) {
381 if (codeunit >= 0xD800 && codeunit <= 0xDBFF) {
382 /* high surrogate, need the low surrogate. */
383 code_point_ready = 0;
384 code_point = (codeunit & 0x03FF) + 0x0040;
385 } else {
386 /* BMP char, output it. */
387 code_point = codeunit;
388 }
389 } else {
390 /* expect the low surrogate */
391 if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) {
392 code_point = (code_point << 10) | (codeunit & 0x03FF);
393 code_point_ready = 1;
394 } else {
395 /* the second code unit is NOT the low surrogate. Unexpected. */
396 retval = CGPT_FAILED;
397 break;
398 }
399 }
400
401 /* If UTF code point is ready, output it. */
402 if (code_point_ready) {
403 require(code_point <= 0x10FFFF);
404 if (code_point <= 0x7F && maxoutput >= 1) {
Bill Richardson 2010/11/17 17:26:39 All these "maxoutput >=" tests should be "maxoutpu
Louis 2010/11/18 05:35:21 They don't because in line 373, the space was rese
405 maxoutput -= 1;
406 utf8[s8idx++] = code_point & 0x7F;
407 } else if (code_point <= 0x7FF && maxoutput >= 2) {
408 maxoutput -= 2;
409 utf8[s8idx++] = 0xC0 | (code_point >> 6);
410 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
411 } else if (code_point <= 0xFFFF && maxoutput >= 3) {
412 maxoutput -= 3;
413 utf8[s8idx++] = 0xE0 | (code_point >> 12);
414 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F);
415 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
416 } else if (code_point <= 0x10FFFF && maxoutput >= 4) {
417 maxoutput -= 4;
418 utf8[s8idx++] = 0xF0 | (code_point >> 18);
419 utf8[s8idx++] = 0x80 | ((code_point >> 12) & 0x3F);
420 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F);
421 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
422 } else {
423 /* buffer underrun */
424 retval = CGPT_FAILED;
425 break;
426 }
427 }
375 } 428 }
376 utf8[s8idx++] = 0; 429 utf8[s8idx++] = 0;
430 return retval;
377 } 431 }
378 432
379 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. 433 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated.
380 * Caller must prepare enough space for UTF16, including a terminating 0x0000. 434 * Caller must prepare enough space for UTF16, including a terminating 0x0000.
381 * FIXME(wfrichar): The original implementation had security issues. As a 435 *
382 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 436 * This function converts UTF8 chars to a code point first. Then, convrts it
383 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix 437 * to UTF16 code unit(s).
384 * this. 438 *
439 * Return: CGPT_OK --- all character are converted successfully.
440 * CGPT_FAILED --- convert error, i.e. output buffer is too short.
385 */ 441 */
386 void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) 442 int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput)
387 { 443 {
388 size_t s16idx, s8idx; 444 size_t s16idx, s8idx;
389 uint32_t utfchar; 445 uint32_t code_point = 0;
446 unsigned int need_more_code_unit = 0;
447 int retval = CGPT_OK;
390 448
391 if (!utf8 || !utf16 || !maxoutput) 449 if (!utf8 || !utf16 || !maxoutput)
392 return; 450 return CGPT_FAILED;
393 451
394 maxoutput--; /* plan for termination */ 452 maxoutput--; /* plan for termination */
395 453
396 for (s8idx = s16idx = 0; 454 for (s8idx = s16idx = 0;
397 utf8[s8idx] && maxoutput; 455 utf8[s8idx] && maxoutput;
398 s8idx++, maxoutput--) { 456 s8idx++) {
399 utfchar = utf8[s8idx]; 457 unsigned char code_unit;
Bill Richardson 2010/11/17 17:26:39 uint8_t instead of unsigned char ?
Louis 2010/11/18 05:35:21 Done. Thanks again. My stupidness. On 2010/11/17
400 utf16[s16idx++] = utfchar & 0x7F; 458 code_unit = utf8[s8idx];
459
460 if (need_more_code_unit) {
461 /* Trailing bytes of multi-byte character */
462 if ((code_unit & 0xC0) == 0x80) {
463 code_point = (code_point << 6) | (code_unit & 0x3F);
464 need_more_code_unit--;
465 } else {
466 /* Unexpected code unit. */
467 retval = CGPT_FAILED;
468 break;
469 }
470 } else {
471 /* parsing a new code point. */
472 if (code_unit <= 0x7F) {
473 code_point = code_unit;
474 } else if (code_unit <= 0xBF) {
475 /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */
476 retval = CGPT_FAILED;
477 break;
Bill Richardson 2010/11/17 17:26:39 I don't think this handles all the valid input. Fo
Louis 2010/11/18 05:35:21 Done. You are right. I changed the need_more_code_
478 } else if (code_unit >= 0xC2 && code_unit <= 0xDF) {
479 code_point = code_unit & 0x1F;
480 need_more_code_unit = 1;
481 } else if (code_unit >= 0xE0 && code_unit <= 0xEF) {
482 code_point = code_unit & 0x0F;
483 need_more_code_unit = 2;
484 } else if (code_unit >= 0xF0 && code_unit <= 0xF4) {
485 code_point = code_unit & 0x07;
486 need_more_code_unit = 3;
487 } else {
488 /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */
489 retval = CGPT_FAILED;
490 break;
491 }
492 }
493
494 /* If no more unit is needed, output the UTF16 unit(s). */
495 if (!need_more_code_unit) {
496 require(code_point <= 0x10FFFF);
497 if (code_point <= 0xFFFF) {
498 utf16[s16idx++] = code_point;
499 maxoutput -= 1;
500 } else if (code_point <= 0x10FFFF && maxoutput >= 2) {
Bill Richardson 2010/11/17 17:26:39 maxoutput > 2, to account for the trailing \0000.
Louis 2010/11/18 05:35:21 In line 452, the space has been reserved. On 2010
501 utf16[s16idx++] = 0xD800 | ((code_point >> 10) - 0x0040);
502 utf16[s16idx++] = 0xDC00 | (code_point & 0x03FF);
503 maxoutput -= 2;
504 } else {
505 /* buffer underrun */
506 retval = CGPT_FAILED;
507 break;
508 }
509 }
401 } 510 }
402 utf16[s16idx++] = 0; 511 utf16[s16idx++] = 0;
512 return retval;
403 } 513 }
404 514
405 struct { 515 struct {
406 Guid type; 516 Guid type;
407 char *name; 517 char *name;
408 char *description; 518 char *description;
409 } supported_types[] = { 519 } supported_types[] = {
410 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, 520 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"},
411 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, 521 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"},
412 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, 522 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"},
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
686 796
687 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { 797 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) {
688 char buf[GUID_STRLEN]; 798 char buf[GUID_STRLEN];
689 if (IsZero(&pmbr->boot_guid)) { 799 if (IsZero(&pmbr->boot_guid)) {
690 require(snprintf(str, buflen, "PMBR") < buflen); 800 require(snprintf(str, buflen, "PMBR") < buflen);
691 } else { 801 } else {
692 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); 802 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf));
693 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); 803 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen);
694 } 804 }
695 } 805 }
696
OLDNEW
« no previous file with comments | « cgpt/cgpt.h ('k') | cgpt/cmd_add.c » ('j') | cgpt/cmd_add.c » ('J')

Powered by Google App Engine
This is Rietveld 408576698