Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: cgpt/cgpt_common.c

Issue 5104009: Fix to initialize the code point. (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/vboot_reference.git
Patch Set: Fix according to code review. Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « cgpt/cgpt.h ('k') | cgpt/cmd_add.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved. 1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be 2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file. 3 * found in the LICENSE file.
4 * 4 *
5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c 5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c
6 * files for more details. 6 * files for more details.
7 */ 7 */
8 8
9 #include "cgpt.h" 9 #include "cgpt.h"
10 10
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 le16toh(guid->u.Uuid.time_high_and_version), 343 le16toh(guid->u.Uuid.time_high_and_version),
344 guid->u.Uuid.clock_seq_high_and_reserved, 344 guid->u.Uuid.clock_seq_high_and_reserved,
345 guid->u.Uuid.clock_seq_low, 345 guid->u.Uuid.clock_seq_low,
346 guid->u.Uuid.node[0], guid->u.Uuid.node[1], 346 guid->u.Uuid.node[0], guid->u.Uuid.node[1],
347 guid->u.Uuid.node[2], guid->u.Uuid.node[3], 347 guid->u.Uuid.node[2], guid->u.Uuid.node[3],
348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1); 348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1);
349 } 349 }
350 350
351 /* Convert possibly unterminated UTF16 string to UTF8. 351 /* Convert possibly unterminated UTF16 string to UTF8.
352 * Caller must prepare enough space for UTF8, which could be up to 352 * Caller must prepare enough space for UTF8, which could be up to
353 * twice the number of UTF16 chars plus the terminating '\0'. 353 * twice the byte length of UTF16 string plus the terminating '\0'.
354 * FIXME(wfrichar): The original implementation had security issues. As a 354 * See the following table for encoding lengths.
355 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 355 *
356 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix 356 * Code point UTF16 UTF8
357 * this. 357 * 0x0000-0x007F 2 bytes 1 byte
358 * 0x0080-0x07FF 2 bytes 2 bytes
359 * 0x0800-0xFFFF 2 bytes 3 bytes
360 * 0x10000-0x10FFFF 4 bytes 4 bytes
361 *
362 * This function uses a simple state meachine to convert UTF-16 char(s) to
363 * a code point. Once a code point is parsed out, the state machine throws
364 * out sequencial UTF-8 chars in one time.
365 *
366 * Return: CGPT_OK --- all character are converted successfully.
367 * CGPT_FAILED --- convert error, i.e. output buffer is too short.
358 */ 368 */
359 void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput, 369 int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput,
360 uint8_t *utf8, unsigned int maxoutput) 370 uint8_t *utf8, unsigned int maxoutput)
361 { 371 {
362 size_t s16idx, s8idx; 372 size_t s16idx, s8idx;
363 uint32_t utfchar; 373 uint32_t code_point = 0;
374 int code_point_ready = 1; // code point is ready to output.
375 int retval = CGPT_OK;
364 376
365 if (!utf16 || !maxinput || !utf8 || !maxoutput) 377 if (!utf16 || !maxinput || !utf8 || !maxoutput)
366 return; 378 return CGPT_FAILED;
367 379
368 maxoutput--; /* plan for termination now */ 380 maxoutput--; /* plan for termination now */
369 381
370 for (s16idx = s8idx = 0; 382 for (s16idx = s8idx = 0;
371 s16idx < maxinput && utf16[s16idx] && maxoutput; 383 s16idx < maxinput && utf16[s16idx] && maxoutput;
372 s16idx++, maxoutput--) { 384 s16idx++) {
373 utfchar = le16toh(utf16[s16idx]); 385 uint16_t codeunit = le16toh(utf16[s16idx]);
374 utf8[s8idx++] = utfchar & 0x7F; 386
387 if (code_point_ready) {
388 if (codeunit >= 0xD800 && codeunit <= 0xDBFF) {
389 /* high surrogate, need the low surrogate. */
390 code_point_ready = 0;
391 code_point = (codeunit & 0x03FF) + 0x0040;
392 } else {
393 /* BMP char, output it. */
394 code_point = codeunit;
395 }
396 } else {
397 /* expect the low surrogate */
398 if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) {
399 code_point = (code_point << 10) | (codeunit & 0x03FF);
400 code_point_ready = 1;
401 } else {
402 /* the second code unit is NOT the low surrogate. Unexpected. */
403 code_point_ready = 0;
404 retval = CGPT_FAILED;
405 break;
406 }
407 }
408
409 /* If UTF code point is ready, output it. */
410 if (code_point_ready) {
411 require(code_point <= 0x10FFFF);
412 if (code_point <= 0x7F && maxoutput >= 1) {
413 maxoutput -= 1;
414 utf8[s8idx++] = code_point & 0x7F;
415 } else if (code_point <= 0x7FF && maxoutput >= 2) {
416 maxoutput -= 2;
417 utf8[s8idx++] = 0xC0 | (code_point >> 6);
418 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
419 } else if (code_point <= 0xFFFF && maxoutput >= 3) {
420 maxoutput -= 3;
421 utf8[s8idx++] = 0xE0 | (code_point >> 12);
422 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F);
423 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
424 } else if (code_point <= 0x10FFFF && maxoutput >= 4) {
425 maxoutput -= 4;
426 utf8[s8idx++] = 0xF0 | (code_point >> 18);
427 utf8[s8idx++] = 0x80 | ((code_point >> 12) & 0x3F);
428 utf8[s8idx++] = 0x80 | ((code_point >> 6) & 0x3F);
429 utf8[s8idx++] = 0x80 | (code_point & 0x3F);
430 } else {
431 /* buffer underrun */
432 retval = CGPT_FAILED;
433 break;
434 }
435 }
375 } 436 }
376 utf8[s8idx++] = 0; 437 utf8[s8idx++] = 0;
438 return retval;
377 } 439 }
378 440
379 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated. 441 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated.
380 * Caller must prepare enough space for UTF16, including a terminating 0x0000. 442 * Caller must prepare enough space for UTF16, including a terminating 0x0000.
381 * FIXME(wfrichar): The original implementation had security issues. As a 443 * See the following table for encoding lengths. In any case, the caller
382 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542 444 * just needs to prepare the byte length of UTF8 plus the terminating 0x0000.
383 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix 445 *
384 * this. 446 * Code point UTF16 UTF8
447 * 0x0000-0x007F 2 bytes 1 byte
448 * 0x0080-0x07FF 2 bytes 2 bytes
449 * 0x0800-0xFFFF 2 bytes 3 bytes
450 * 0x10000-0x10FFFF 4 bytes 4 bytes
451 *
452 * This function converts UTF8 chars to a code point first. Then, convrts it
453 * to UTF16 code unit(s).
454 *
455 * Return: CGPT_OK --- all character are converted successfully.
456 * CGPT_FAILED --- convert error, i.e. output buffer is too short.
385 */ 457 */
386 void UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput) 458 int UTF8ToUTF16(const uint8_t *utf8, uint16_t *utf16, unsigned int maxoutput)
387 { 459 {
388 size_t s16idx, s8idx; 460 size_t s16idx, s8idx;
389 uint32_t utfchar; 461 uint32_t code_point = 0;
462 unsigned int expected_units = 1;
463 unsigned int decoded_units = 1;
464 int retval = CGPT_OK;
390 465
391 if (!utf8 || !utf16 || !maxoutput) 466 if (!utf8 || !utf16 || !maxoutput)
392 return; 467 return CGPT_FAILED;
393 468
394 maxoutput--; /* plan for termination */ 469 maxoutput--; /* plan for termination */
395 470
396 for (s8idx = s16idx = 0; 471 for (s8idx = s16idx = 0;
397 utf8[s8idx] && maxoutput; 472 utf8[s8idx] && maxoutput;
398 s8idx++, maxoutput--) { 473 s8idx++) {
399 utfchar = utf8[s8idx]; 474 uint8_t code_unit;
400 utf16[s16idx++] = utfchar & 0x7F; 475 code_unit = utf8[s8idx];
476
477 if (expected_units != decoded_units) {
478 /* Trailing bytes of multi-byte character */
479 if ((code_unit & 0xC0) == 0x80) {
480 code_point = (code_point << 6) | (code_unit & 0x3F);
481 ++decoded_units;
482 } else {
483 /* Unexpected code unit. */
484 retval = CGPT_FAILED;
485 break;
486 }
487 } else {
488 /* parsing a new code point. */
489 decoded_units = 1;
490 if (code_unit <= 0x7F) {
491 code_point = code_unit;
492 expected_units = 1;
493 } else if (code_unit <= 0xBF) {
494 /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */
495 retval = CGPT_FAILED;
496 break;
497 } else if (code_unit >= 0xC2 && code_unit <= 0xDF) {
498 code_point = code_unit & 0x1F;
499 expected_units = 2;
500 } else if (code_unit >= 0xE0 && code_unit <= 0xEF) {
501 code_point = code_unit & 0x0F;
502 expected_units = 3;
503 } else if (code_unit >= 0xF0 && code_unit <= 0xF4) {
504 code_point = code_unit & 0x07;
505 expected_units = 4;
506 } else {
507 /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */
508 retval = CGPT_FAILED;
509 break;
510 }
511 }
512
513 /* If no more unit is needed, output the UTF16 unit(s). */
514 if ((retval == CGPT_OK) &&
515 (expected_units == decoded_units)) {
516 /* Check if the encoding is the shortest possible UTF-8 sequence. */
517 switch (expected_units) {
518 case 2:
519 if (code_point <= 0x7F) retval = CGPT_FAILED;
520 break;
521 case 3:
522 if (code_point <= 0x7FF) retval = CGPT_FAILED;
523 break;
524 case 4:
525 if (code_point <= 0xFFFF) retval = CGPT_FAILED;
526 break;
527 }
528 if (retval == CGPT_FAILED) break; /* leave immediately */
529
530 if ((code_point <= 0xD7FF) ||
531 (code_point >= 0xE000 && code_point <= 0xFFFF)) {
532 utf16[s16idx++] = code_point;
533 maxoutput -= 1;
534 } else if (code_point >= 0x10000 && code_point <= 0x10FFFF &&
535 maxoutput >= 2) {
536 utf16[s16idx++] = 0xD800 | ((code_point >> 10) - 0x0040);
537 utf16[s16idx++] = 0xDC00 | (code_point & 0x03FF);
538 maxoutput -= 2;
539 } else {
540 /* Three possibilities fall into here. Both are failure cases.
541 * a. surrogate pair (non-BMP characters; 0xD800~0xDFFF)
542 * b. invalid code point > 0x10FFFF
543 * c. buffer underrun
544 */
545 retval = CGPT_FAILED;
546 break;
547 }
548 }
401 } 549 }
550
551 /* A null-terminator shows up before the UTF8 sequence ends. */
552 if (expected_units != decoded_units) {
553 retval = CGPT_FAILED;
554 }
555
402 utf16[s16idx++] = 0; 556 utf16[s16idx++] = 0;
557 return retval;
403 } 558 }
404 559
405 struct { 560 struct {
406 Guid type; 561 Guid type;
407 char *name; 562 char *name;
408 char *description; 563 char *description;
409 } supported_types[] = { 564 } supported_types[] = {
410 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"}, 565 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"},
411 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"}, 566 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"},
412 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"}, 567 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"},
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
694 849
695 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) { 850 void PMBRToStr(struct pmbr *pmbr, char *str, unsigned int buflen) {
696 char buf[GUID_STRLEN]; 851 char buf[GUID_STRLEN];
697 if (IsZero(&pmbr->boot_guid)) { 852 if (IsZero(&pmbr->boot_guid)) {
698 require(snprintf(str, buflen, "PMBR") < buflen); 853 require(snprintf(str, buflen, "PMBR") < buflen);
699 } else { 854 } else {
700 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf)); 855 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf));
701 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen); 856 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen);
702 } 857 }
703 } 858 }
OLDNEW
« no previous file with comments | « cgpt/cgpt.h ('k') | cgpt/cmd_add.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698