OLD | NEW |
1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | 1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
2 * Use of this source code is governed by a BSD-style license that can be | 2 * Use of this source code is governed by a BSD-style license that can be |
3 * found in the LICENSE file. | 3 * found in the LICENSE file. |
4 * | 4 * |
5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c | 5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c |
6 * files for more details. | 6 * files for more details. |
7 */ | 7 */ |
8 | 8 |
9 #include "cgpt.h" | 9 #include "cgpt.h" |
10 | 10 |
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
343 le16toh(guid->u.Uuid.time_high_and_version), | 343 le16toh(guid->u.Uuid.time_high_and_version), |
344 guid->u.Uuid.clock_seq_high_and_reserved, | 344 guid->u.Uuid.clock_seq_high_and_reserved, |
345 guid->u.Uuid.clock_seq_low, | 345 guid->u.Uuid.clock_seq_low, |
346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], | 346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], |
347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], | 347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], |
348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); | 348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); |
349 } | 349 } |
350 | 350 |
351 /* Convert possibly unterminated UTF16 string to UTF8. | 351 /* Convert possibly unterminated UTF16 string to UTF8. |
352 * Caller must prepare enough space for UTF8, which could be up to | 352 * Caller must prepare enough space for UTF8, which could be up to |
353 * twice the number of UTF16 chars plus the terminating '\0'. | 353 * twice the byte length of UTF16 string plus the terminating '\0'. |
354 * FIXME(wfrichar): The original implementation had security issues. As a | 354 * See the following table for encoding lengths. |
355 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 | 355 * |
356 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix | 356 * Code point UTF16 UTF8 |
357 * this. | 357 * 0x0000-0x007F 2 bytes 1 byte |
| 358 * 0x0080-0x07FF 2 bytes 2 bytes |
| 359 * 0x0800-0xFFFF 2 bytes 3 bytes |
| 360 * 0x10000-0x10FFFF 4 bytes 4 bytes |
| 361 * |
| 362 * This function uses a simple state meachine to convert UTF-16 char(s) to |
| 363 * a code point. Once a code point is parsed out, the state machine throws |
| 364 * out sequencial UTF-8 chars in one time. |
| 365 * |
| 366 * Return: CGPT_OK --- all character are converted successfully. |
| 367 * CGPT_FAILED --- convert error, i.e. output buffer is too short. |
358 */ | 368 */ |
359 void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, | 369 int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, |
360 uint8_t *utf8, unsigned int maxoutput) | 370 uint8_t *utf8, unsigned int maxoutput) |
361 { | 371 { |
362 size_t s16idx, s8idx; | 372 size_t s16idx, s8idx; |
363 uint32_t utfchar; | 373 uint32_t code_point; |
| 374 int code_point_ready = 1; // code point is ready to output. |
| 375 int retval = CGPT_OK; |
364 | 376 |
365 if (!utf16 || !maxinput || !utf8 || !maxoutput) | 377 if (!utf16 || !maxinput || !utf8 || !maxoutput) |
366 return; | 378 return CGPT_FAILED; |
367 | 379 |
368 maxoutput--; /* plan for termination now */ | 380 maxoutput--; /* plan for termination now */ |
369 | 381 |
370 for (s16idx = s8idx = 0; | 382 for (s16idx = s8idx = 0; |
371 s16idx < maxinput && utf16[s16idx] && maxoutput; | 383 s16idx < maxinput && utf16[s16idx] && maxoutput; |
372 s16idx++, maxoutput--) { | 384 s16idx++) { |
373 utfchar = le16toh(utf16[s16idx]); | 385 uint16_t codeunit = le16toh(utf16[s16idx]); |
374 utf8[s8idx++] = utfchar & 0x7F; | 386 |
| 387 if (code_point_ready) { |
| 388 if (codeunit >= 0xD800 && codeunit <= 0xDBFF) { |
| 389 /* high surrogate, need the low surrogate. */ |
| 390 code_point_ready = 0; |
| 391 code_point = (codeunit & 0x03FF) + 0x0040; |
| 392 } else { |
| 393 /* BMP char, output it. */ |
| 394 code_point = codeunit; |
| 395 } |
| 396 } else { |
| 397 /* expect the low surrogate */ |
| 398 if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) { |
| 399 code_point = (code_point << 10) | (codeunit & 0x03FF); |
| 400 code_point_ready = 1; |
| 401 } else { |
| 402 /* the second code unit is NOT the low surrogate. Unexpected. */ |
| 403 retval = CGPT_FAILED; |
| 404 break; |
| 405 } |
| 406 } |
| 407 |
| 408 /* If UTF code point is ready, output it. */ |
| 409 if (code_point_ready) { |
| 410 require(code_point <= 0x10FFFF); |
| 411 if (code_point <= 0x7F && maxoutput >= 1) { |
| 412 maxoutput -= 1; |
| 413 utf8[s8idx++] = code_point & 0x7F; |
| 414 } else if (code_point <= 0x7FF && maxoutput >= 2) { |
| 415 maxoutput -= 2; |
| 416 utf8[s8idx++] = 0xC0 | (code_point >> 6); |
| 417 utf8[s8idx++] = 0x80 | (code_point & 0x3F); |
| 418 } else if (code_point <= 0xFFFF && maxoutput >= 3) { |
| 419 maxoutput -= 3; |
| 420 utf8[s8idx++] = 0xE0 | (code_point >> 12); |
| 421 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); |
| 422 utf8[s8idx++] = 0x80 | (code_point & 0x3F); |
| 423 } else if (code_point <= 0x10FFFF && maxoutput >= 4) { |
| 424 maxoutput -= 4; |
| 425 utf8[s8idx++] = 0xF0 | (code_point >> 18); |
| 426 utf8[s8idx++] = 0x80 | ((code_point >> 12) & 0x3F); |
| 427 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F); |
| 428 utf8[s8idx++] = 0x80 | (code_point & 0x3F); |
| 429 } else { |
| 430 /* buffer underrun */ |
| 431 retval = CGPT_FAILED; |
| 432 break; |
| 433 } |
| 434 } |
375 } | 435 } |
376 utf8[s8idx++] = 0; | 436 utf8[s8idx++] = 0; |
| 437 return retval; |
377 } | 438 } |
378 | 439 |
379 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. | 440 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. |
380 * Caller must prepare enough space for UTF16, including a terminating 0x0000. | 441 * Caller must prepare enough space for UTF16, including a terminating 0x0000. |
381 * FIXME(wfrichar): The original implementation had security issues. As a | 442 * See the following table for encoding lengths. In any case, the caller |
382 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 | 443 * just needs to prepare the byte length of UTF8 plus the terminating 0x0000. |
383 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix | 444 * |
384 * this. | 445 * Code point UTF16 UTF8 |
| 446 * 0x0000-0x007F 2 bytes 1 byte |
| 447 * 0x0080-0x07FF 2 bytes 2 bytes |
| 448 * 0x0800-0xFFFF 2 bytes 3 bytes |
| 449 * 0x10000-0x10FFFF 4 bytes 4 bytes |
| 450 * |
| 451 * This function converts UTF8 chars to a code point first. Then, convrts it |
| 452 * to UTF16 code unit(s). |
| 453 * |
| 454 * Return: CGPT_OK --- all character are converted successfully. |
| 455 * CGPT_FAILED --- convert error, i.e. output buffer is too short. |
385 */ | 456 */ |
386 void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) | 457 int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) |
387 { | 458 { |
388 size_t s16idx, s8idx; | 459 size_t s16idx, s8idx; |
389 uint32_t utfchar; | 460 uint32_t code_point = 0; |
| 461 unsigned int expected_units = 1; |
| 462 unsigned int decoded_units = 1; |
| 463 int retval = CGPT_OK; |
390 | 464 |
391 if (!utf8 || !utf16 || !maxoutput) | 465 if (!utf8 || !utf16 || !maxoutput) |
392 return; | 466 return CGPT_FAILED; |
393 | 467 |
394 maxoutput--; /* plan for termination */ | 468 maxoutput--; /* plan for termination */ |
395 | 469 |
396 for (s8idx = s16idx = 0; | 470 for (s8idx = s16idx = 0; |
397 utf8[s8idx] && maxoutput; | 471 utf8[s8idx] && maxoutput; |
398 s8idx++, maxoutput--) { | 472 s8idx++) { |
399 utfchar = utf8[s8idx]; | 473 uint8_t code_unit; |
400 utf16[s16idx++] = utfchar & 0x7F; | 474 code_unit = utf8[s8idx]; |
| 475 |
| 476 if (expected_units != decoded_units) { |
| 477 /* Trailing bytes of multi-byte character */ |
| 478 if ((code_unit & 0xC0) == 0x80) { |
| 479 code_point = (code_point << 6) | (code_unit & 0x3F); |
| 480 ++decoded_units; |
| 481 } else { |
| 482 /* Unexpected code unit. */ |
| 483 retval = CGPT_FAILED; |
| 484 break; |
| 485 } |
| 486 } else { |
| 487 /* parsing a new code point. */ |
| 488 decoded_units = 1; |
| 489 if (code_unit <= 0x7F) { |
| 490 code_point = code_unit; |
| 491 expected_units = 1; |
| 492 } else if (code_unit <= 0xBF) { |
| 493 /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */ |
| 494 retval = CGPT_FAILED; |
| 495 break; |
| 496 } else if (code_unit >= 0xC2 && code_unit <= 0xDF) { |
| 497 code_point = code_unit & 0x1F; |
| 498 expected_units = 2; |
| 499 } else if (code_unit >= 0xE0 && code_unit <= 0xEF) { |
| 500 code_point = code_unit & 0x0F; |
| 501 expected_units = 3; |
| 502 } else if (code_unit >= 0xF0 && code_unit <= 0xF4) { |
| 503 code_point = code_unit & 0x07; |
| 504 expected_units = 4; |
| 505 } else { |
| 506 /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */ |
| 507 retval = CGPT_FAILED; |
| 508 break; |
| 509 } |
| 510 } |
| 511 |
| 512 /* If no more unit is needed, output the UTF16 unit(s). */ |
| 513 if (expected_units == decoded_units) { |
| 514 /* Check if the encoding is the shortest possible UTF-8 sequence. */ |
| 515 switch (expected_units) { |
| 516 case 2: |
| 517 if (code_point <= 0x7F) retval = CGPT_FAILED; |
| 518 break; |
| 519 case 3: |
| 520 if (code_point <= 0x7FF) retval = CGPT_FAILED; |
| 521 break; |
| 522 case 4: |
| 523 if (code_point <= 0xFFFF) retval = CGPT_FAILED; |
| 524 break; |
| 525 } |
| 526 if (retval == CGPT_FAILED) break; /* leave immediately */ |
| 527 |
| 528 if ((code_point <= 0xD7FF) || |
| 529 (code_point >= 0xE000 && code_point <= 0xFFFF)) { |
| 530 utf16[s16idx++] = code_point; |
| 531 maxoutput -= 1; |
| 532 } else if (code_point >= 0x10000 && code_point <= 0x10FFFF && |
| 533 maxoutput >= 2) { |
| 534 utf16[s16idx++] = 0xD800 | ((code_point >> 10) - 0x0040); |
| 535 utf16[s16idx++] = 0xDC00 | (code_point & 0x03FF); |
| 536 maxoutput -= 2; |
| 537 } else { |
| 538 /* Three possibilities fall into here. Both are failure cases. |
| 539 * a. surrogate pair (non-BMP characters; 0xD800~0xDFFF) |
| 540 * b. invalid code point > 0x10FFFF |
| 541 * c. buffer underrun |
| 542 */ |
| 543 retval = CGPT_FAILED; |
| 544 break; |
| 545 } |
| 546 } |
401 } | 547 } |
| 548 |
| 549 /* A null-terminator shows up before the UTF8 sequence ends. */ |
| 550 if (expected_units != decoded_units) { |
| 551 retval = CGPT_FAILED; |
| 552 } |
| 553 |
402 utf16[s16idx++] = 0; | 554 utf16[s16idx++] = 0; |
| 555 return retval; |
403 } | 556 } |
404 | 557 |
405 struct { | 558 struct { |
406 Guid type; | 559 Guid type; |
407 char *name; | 560 char *name; |
408 char *description; | 561 char *description; |
409 } supported_types[] = { | 562 } supported_types[] = { |
410 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, | 563 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, |
411 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, | 564 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, |
412 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, | 565 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, |
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
694 | 847 |
695 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { | 848 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { |
696 char buf[GUID_STRLEN]; | 849 char buf[GUID_STRLEN]; |
697 if (IsZero(&pmbr->boot_guid)) { | 850 if (IsZero(&pmbr->boot_guid)) { |
698 require(snprintf(str, buflen, "PMBR") < buflen); | 851 require(snprintf(str, buflen, "PMBR") < buflen); |
699 } else { | 852 } else { |
700 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); | 853 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); |
701 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); | 854 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); |
702 } | 855 } |
703 } | 856 } |
OLD | NEW |