Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | 1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| 2 * Use of this source code is governed by a BSD-style license that can be | 2 * Use of this source code is governed by a BSD-style license that can be |
| 3 * found in the LICENSE file. | 3 * found in the LICENSE file. |
| 4 * | 4 * |
| 5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c | 5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c |
| 6 * files for more details. | 6 * files for more details. |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 #include "cgpt.h" | 9 #include "cgpt.h" |
| 10 | 10 |
| (...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 343 le16toh(guid->u.Uuid.time_high_and_version), | 343 le16toh(guid->u.Uuid.time_high_and_version), |
| 344 guid->u.Uuid.clock_seq_high_and_reserved, | 344 guid->u.Uuid.clock_seq_high_and_reserved, |
| 345 guid->u.Uuid.clock_seq_low, | 345 guid->u.Uuid.clock_seq_low, |
| 346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], | 346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], |
| 347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], | 347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], |
| 348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); | 348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); |
| 349 } | 349 } |
| 350 | 350 |
| 351 /* Convert possibly unterminated UTF16 string to UTF8. | 351 /* Convert possibly unterminated UTF16 string to UTF8. |
| 352 * Caller must prepare enough space for UTF8, which could be up to | 352 * Caller must prepare enough space for UTF8, which could be up to |
| 353 * twice the number of UTF16 chars plus the terminating '\0'. | 353 * twice the number of UTF16 chars plus the terminating '\0'. |
|
Bill Richardson
2010/11/17 17:26:39
I think this size bound is wrong. It should be "th
Louis
2010/11/18 05:35:21
Hm... my initial idea should be "UTF16 bytes", in
| |
| 354 * FIXME(wfrichar): The original implementation had security issues. As a | 354 * |
| 355 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 | 355 * This function uses a simple state meachine to convert UTF-16 char(s) to |
| 356 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix | 356 * a code point. Once a code point is parsed out, the state machine throws |
| 357 * this. | 357 * out sequencial UTF-8 chars in one time. |
| 358 * | |
| 359 * Return: CGPT_OK --- all character are converted successfully. | |
| 360 * CGPT_FAILED --- convert error, i.e. output buffer is too short. | |
| 358 */ | 361 */ |
| 359 void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, | 362 int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, |
| 360 uint8_t *utf8, unsigned int maxoutput) | 363 uint8_t *utf8, unsigned int maxoutput) |
| 361 { | 364 { |
| 362 size_t s16idx, s8idx; | 365 size_t s16idx, s8idx; |
| 363 uint32_t utfchar; | 366 uint32_t code_point; |
| 367 int code_point_ready = 1; // code point is ready to output. | |
| 368 int retval = CGPT_OK; | |
| 364 | 369 |
| 365 if (!utf16 || !maxinput || !utf8 || !maxoutput) | 370 if (!utf16 || !maxinput || !utf8 || !maxoutput) |
| 366 return; | 371 return CGPT_FAILED; |
| 367 | 372 |
| 368 maxoutput--; /* plan for termination now */ | 373 maxoutput--; /* plan for termination now */ |
| 369 | 374 |
| 370 for (s16idx = s8idx = 0; | 375 for (s16idx = s8idx = 0; |
| 371 s16idx < maxinput && utf16[s16idx] && maxoutput; | 376 s16idx < maxinput && utf16[s16idx] && maxoutput; |
| 372 s16idx++, maxoutput--) { | 377 s16idx++) { |
| 373 utfchar = le16toh(utf16[s16idx]); | 378 unsigned short codeunit = le16toh(utf16[s16idx]); |
|
Bill Richardson
2010/11/17 17:26:39
Shouldn't codeunit be uint16_t instead of unsigned
Louis
2010/11/18 05:35:21
Done. Good catch!
On 2010/11/17 17:26:39, Bill Ric
| |
| 374 utf8[s8idx++] = utfchar & 0x7F; | 379 |
| 380 if (code_point_ready) { | |
| 381 if (codeunit >= 0xD800 && codeunit <= 0xDBFF) { | |
| 382 /* high surrogate, need the low surrogate. */ | |
| 383 code_point_ready = 0; | |
| 384 code_point = (codeunit & 0x03FF) + 0x0040; | |
| 385 } else { | |
| 386 /* BMP char, output it. */ | |
| 387 code_point = codeunit; | |
| 388 } | |
| 389 } else { | |
| 390 /* expect the low surrogate */ | |
| 391 if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) { | |
| 392 code_point = (code_point << 10) | (codeunit & 0x03FF); | |
| 393 code_point_ready = 1; | |
| 394 } else { | |
| 395 /* the second code unit is NOT the low surrogate. Unexpected. */ | |
| 396 retval = CGPT_FAILED; | |
| 397 break; | |
| 398 } | |
| 399 } | |
| 400 | |
| 401 /* If UTF code point is ready, output it. */ | |
| 402 if (code_point_ready) { | |
| 403 require(code_point <= 0x10FFFF); | |
| 404 if (code_point <= 0x7F && maxoutput >= 1) { | |
|
Bill Richardson
2010/11/17 17:26:39
All these "maxoutput >=" tests should be "maxoutpu
Louis
2010/11/18 05:35:21
They don't because in line 373, the space was rese
| |
| 405 maxoutput -= 1; | |
| 406 utf8[s8idx++] = code_point & 0x7F; | |
| 407 } else if (code_point <= 0x7FF && maxoutput >= 2) { | |
| 408 maxoutput -= 2; | |
| 409 utf8[s8idx++] = 0xC0 | (code_point >> 6); | |
| 410 utf8[s8idx++] = 0x80 | (code_point & 0x3F); | |
| 411 } else if (code_point <= 0xFFFF && maxoutput >= 3) { | |
| 412 maxoutput -= 3; | |
| 413 utf8[s8idx++] = 0xE0 | (code_point >> 12); | |
| 414 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); | |
| 415 utf8[s8idx++] = 0x80 | (code_point & 0x3F); | |
| 416 } else if (code_point <= 0x10FFFF && maxoutput >= 4) { | |
| 417 maxoutput -= 4; | |
| 418 utf8[s8idx++] = 0xF0 | (code_point >> 18); | |
| 419 utf8[s8idx++] = 0x80 | ((code_point >> 12) & 0x3F); | |
| 420 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); | |
| 421 utf8[s8idx++] = 0x80 | (code_point & 0x3F); | |
| 422 } else { | |
| 423 /* buffer underrun */ | |
| 424 retval = CGPT_FAILED; | |
| 425 break; | |
| 426 } | |
| 427 } | |
| 375 } | 428 } |
| 376 utf8[s8idx++] = 0; | 429 utf8[s8idx++] = 0; |
| 430 return retval; | |
| 377 } | 431 } |
| 378 | 432 |
| 379 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. | 433 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. |
| 380 * Caller must prepare enough space for UTF16, including a terminating 0x0000. | 434 * Caller must prepare enough space for UTF16, including a terminating 0x0000. |
| 381 * FIXME(wfrichar): The original implementation had security issues. As a | 435 * |
| 382 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 | 436 * This function converts UTF8 chars to a code point first. Then, convrts it |
| 383 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix | 437 * to UTF16 code unit(s). |
| 384 * this. | 438 * |
| 439 * Return: CGPT_OK --- all character are converted successfully. | |
| 440 * CGPT_FAILED --- convert error, i.e. output buffer is too short. | |
| 385 */ | 441 */ |
| 386 void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) | 442 int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) |
| 387 { | 443 { |
| 388 size_t s16idx, s8idx; | 444 size_t s16idx, s8idx; |
| 389 uint32_t utfchar; | 445 uint32_t code_point = 0; |
| 446 unsigned int need_more_code_unit = 0; | |
| 447 int retval = CGPT_OK; | |
| 390 | 448 |
| 391 if (!utf8 || !utf16 || !maxoutput) | 449 if (!utf8 || !utf16 || !maxoutput) |
| 392 return; | 450 return CGPT_FAILED; |
| 393 | 451 |
| 394 maxoutput--; /* plan for termination */ | 452 maxoutput--; /* plan for termination */ |
| 395 | 453 |
| 396 for (s8idx = s16idx = 0; | 454 for (s8idx = s16idx = 0; |
| 397 utf8[s8idx] && maxoutput; | 455 utf8[s8idx] && maxoutput; |
| 398 s8idx++, maxoutput--) { | 456 s8idx++) { |
| 399 utfchar = utf8[s8idx]; | 457 unsigned char code_unit; |
|
Bill Richardson
2010/11/17 17:26:39
uint8_t instead of unsigned char ?
Louis
2010/11/18 05:35:21
Done. Thanks again. My stupidness.
On 2010/11/17
| |
| 400 utf16[s16idx++] = utfchar & 0x7F; | 458 code_unit = utf8[s8idx]; |
| 459 | |
| 460 if (need_more_code_unit) { | |
| 461 /* Trailing bytes of multi-byte character */ | |
| 462 if ((code_unit & 0xC0) == 0x80) { | |
| 463 code_point = (code_point << 6) | (code_unit & 0x3F); | |
| 464 need_more_code_unit--; | |
| 465 } else { | |
| 466 /* Unexpected code unit. */ | |
| 467 retval = CGPT_FAILED; | |
| 468 break; | |
| 469 } | |
| 470 } else { | |
| 471 /* parsing a new code point. */ | |
| 472 if (code_unit <= 0x7F) { | |
| 473 code_point = code_unit; | |
| 474 } else if (code_unit <= 0xBF) { | |
| 475 /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */ | |
| 476 retval = CGPT_FAILED; | |
| 477 break; | |
|
Bill Richardson
2010/11/17 17:26:39
I don't think this handles all the valid input. Fo
Louis
2010/11/18 05:35:21
Done. You are right. I changed the need_more_code_
| |
| 478 } else if (code_unit >= 0xC2 && code_unit <= 0xDF) { | |
| 479 code_point = code_unit & 0x1F; | |
| 480 need_more_code_unit = 1; | |
| 481 } else if (code_unit >= 0xE0 && code_unit <= 0xEF) { | |
| 482 code_point = code_unit & 0x0F; | |
| 483 need_more_code_unit = 2; | |
| 484 } else if (code_unit >= 0xF0 && code_unit <= 0xF4) { | |
| 485 code_point = code_unit & 0x07; | |
| 486 need_more_code_unit = 3; | |
| 487 } else { | |
| 488 /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */ | |
| 489 retval = CGPT_FAILED; | |
| 490 break; | |
| 491 } | |
| 492 } | |
| 493 | |
| 494 /* If no more unit is needed, output the UTF16 unit(s). */ | |
| 495 if (!need_more_code_unit) { | |
| 496 require(code_point <= 0x10FFFF); | |
| 497 if (code_point <= 0xFFFF) { | |
| 498 utf16[s16idx++] = code_point; | |
| 499 maxoutput -= 1; | |
| 500 } else if (code_point <= 0x10FFFF && maxoutput >= 2) { | |
|
Bill Richardson
2010/11/17 17:26:39
maxoutput > 2, to account for the trailing \0000.
Louis
2010/11/18 05:35:21
In line 452, the space has been reserved.
On 2010
| |
| 501 utf16[s16idx++] = 0xD800 | ((code_point >> 10) - 0x0040); | |
| 502 utf16[s16idx++] = 0xDC00 | (code_point & 0x03FF); | |
| 503 maxoutput -= 2; | |
| 504 } else { | |
| 505 /* buffer underrun */ | |
| 506 retval = CGPT_FAILED; | |
| 507 break; | |
| 508 } | |
| 509 } | |
| 401 } | 510 } |
| 402 utf16[s16idx++] = 0; | 511 utf16[s16idx++] = 0; |
| 512 return retval; | |
| 403 } | 513 } |
| 404 | 514 |
| 405 struct { | 515 struct { |
| 406 Guid type; | 516 Guid type; |
| 407 char *name; | 517 char *name; |
| 408 char *description; | 518 char *description; |
| 409 } supported_types[] = { | 519 } supported_types[] = { |
| 410 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, | 520 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, |
| 411 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, | 521 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, |
| 412 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, | 522 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 686 | 796 |
| 687 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { | 797 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { |
| 688 char buf[GUID_STRLEN]; | 798 char buf[GUID_STRLEN]; |
| 689 if (IsZero(&pmbr->boot_guid)) { | 799 if (IsZero(&pmbr->boot_guid)) { |
| 690 require(snprintf(str, buflen, "PMBR") < buflen); | 800 require(snprintf(str, buflen, "PMBR") < buflen); |
| 691 } else { | 801 } else { |
| 692 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); | 802 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); |
| 693 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); | 803 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); |
| 694 } | 804 } |
| 695 } | 805 } |
| 696 | |
| OLD | NEW |