cgpt/cgpt_common.c - Issue 5104009: Fix to initialize the code point.

Side by Side Diff: cgpt/cgpt_common.c

Issue 5104009: Fix to initialize the code point. (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/vboot_reference.git

Patch Set: Fix according to code review. Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved.	1 /* Copyright (c) 2010 The Chromium OS Authors. All rights reserved.

2 * Use of this source code is governed by a BSD-style license that can be	2 * Use of this source code is governed by a BSD-style license that can be

3 * found in the LICENSE file.	3 * found in the LICENSE file.

4 *	4 *

5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c	5 * Utility for ChromeOS-specific GPT partitions, Please see corresponding .c

6 * files for more details.	6 * files for more details.

7 */	7 */

8	8

9 #include "cgpt.h"	9 #include "cgpt.h"

10	10

(...skipping 332 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
343 le16toh(guid->u.Uuid.time_high_and_version),	343 le16toh(guid->u.Uuid.time_high_and_version),

344 guid->u.Uuid.clock_seq_high_and_reserved,	344 guid->u.Uuid.clock_seq_high_and_reserved,

345 guid->u.Uuid.clock_seq_low,	345 guid->u.Uuid.clock_seq_low,

346 guid->u.Uuid.node[0], guid->u.Uuid.node[1],	346 guid->u.Uuid.node[0], guid->u.Uuid.node[1],

347 guid->u.Uuid.node[2], guid->u.Uuid.node[3],	347 guid->u.Uuid.node[2], guid->u.Uuid.node[3],

348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1);	348 guid->u.Uuid.node[4], guid->u.Uuid.node[5]) == GUID_STRLEN-1);

349 }	349 }

350	350

351 /* Convert possibly unterminated UTF16 string to UTF8.	351 /* Convert possibly unterminated UTF16 string to UTF8.

352 * Caller must prepare enough space for UTF8, which could be up to	352 * Caller must prepare enough space for UTF8, which could be up to

353 * twice the number of UTF16 chars plus the terminating '\0'.	353 * twice the byte length of UTF16 string plus the terminating '\0'.

354 * FIXME(wfrichar): The original implementation had security issues. As a	354 * See the following table for encoding lengths.

355 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542	355 *

356 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix	356 * Code point UTF16 UTF8

357 * this.	357 * 0x0000-0x007F 2 bytes 1 byte

	358 * 0x0080-0x07FF 2 bytes 2 bytes

	359 * 0x0800-0xFFFF 2 bytes 3 bytes

	360 * 0x10000-0x10FFFF 4 bytes 4 bytes

	361 *

	362 * This function uses a simple state meachine to convert UTF-16 char(s) to

	363 * a code point. Once a code point is parsed out, the state machine throws

	364 * out sequencial UTF-8 chars in one time.

	365 *

	366 * Return: CGPT_OK --- all character are converted successfully.

	367 * CGPT_FAILED --- convert error, i.e. output buffer is too short.

358 */	368 */

359 void UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput,	369 int UTF16ToUTF8(const uint16_t *utf16, unsigned int maxinput,

360 uint8_t *utf8, unsigned int maxoutput)	370 uint8_t *utf8, unsigned int maxoutput)

361 {	371 {

362 size_t s16idx, s8idx;	372 size_t s16idx, s8idx;

363 uint32_t utfchar;	373 uint32_t code_point = 0;

	374 int code_point_ready = 1; // code point is ready to output.

	375 int retval = CGPT_OK;

364	376

365 if (!utf16 \|\| !maxinput \|\| !utf8 \|\| !maxoutput)	377 if (!utf16 \|\| !maxinput \|\| !utf8 \|\| !maxoutput)

366 return;	378 return CGPT_FAILED;

367	379

368 maxoutput--; /* plan for termination now */	380 maxoutput--; /* plan for termination now */

369	381

370 for (s16idx = s8idx = 0;	382 for (s16idx = s8idx = 0;

371 s16idx < maxinput && utf16[s16idx] && maxoutput;	383 s16idx < maxinput && utf16[s16idx] && maxoutput;

372 s16idx++, maxoutput--) {	384 s16idx++) {

373 utfchar = le16toh(utf16[s16idx]);	385 uint16_t codeunit = le16toh(utf16[s16idx]);

374 utf8[s8idx++] = utfchar & 0x7F;	386

	387 if (code_point_ready) {

	388 if (codeunit >= 0xD800 && codeunit <= 0xDBFF) {

	389 /* high surrogate, need the low surrogate. */

	390 code_point_ready = 0;

	391 code_point = (codeunit & 0x03FF) + 0x0040;

	392 } else {

	393 /* BMP char, output it. */

	394 code_point = codeunit;

	395 }

	396 } else {

	397 /* expect the low surrogate */

	398 if (codeunit >= 0xDC00 && codeunit <= 0xDFFF) {

	399 code_point = (code_point << 10) \| (codeunit & 0x03FF);

	400 code_point_ready = 1;

	401 } else {

	402 /* the second code unit is NOT the low surrogate. Unexpected. */

	403 code_point_ready = 0;

	404 retval = CGPT_FAILED;

	405 break;

	406 }

	407 }

	408

	409 /* If UTF code point is ready, output it. */

	410 if (code_point_ready) {

	411 require(code_point <= 0x10FFFF);

	412 if (code_point <= 0x7F && maxoutput >= 1) {

	413 maxoutput -= 1;

	414 utf8[s8idx++] = code_point & 0x7F;

	415 } else if (code_point <= 0x7FF && maxoutput >= 2) {

	416 maxoutput -= 2;

	417 utf8[s8idx++] = 0xC0 \| (code_point >> 6);

	418 utf8[s8idx++] = 0x80 \| (code_point & 0x3F);

	419 } else if (code_point <= 0xFFFF && maxoutput >= 3) {

	420 maxoutput -= 3;

	421 utf8[s8idx++] = 0xE0 \| (code_point >> 12);

	422 utf8[s8idx++] = 0x80 \| ((code_point >> 6) & 0x3F);

	423 utf8[s8idx++] = 0x80 \| (code_point & 0x3F);

	424 } else if (code_point <= 0x10FFFF && maxoutput >= 4) {

	425 maxoutput -= 4;

	426 utf8[s8idx++] = 0xF0 \| (code_point >> 18);

	427 utf8[s8idx++] = 0x80 \| ((code_point >> 12) & 0x3F);

	428 utf8[s8idx++] = 0x80 \| ((code_point >> 6) & 0x3F);

	429 utf8[s8idx++] = 0x80 \| (code_point & 0x3F);

	430 } else {

	431 /* buffer underrun */

	432 retval = CGPT_FAILED;

	433 break;

	434 }

	435 }

375 }	436 }

376 utf8[s8idx++] = 0;	437 utf8[s8idx++] = 0;

	438 return retval;

377 }	439 }

378	440

379 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated.	441 /* Convert UTF8 string to UTF16. The UTF8 string must be null-terminated.

380 * Caller must prepare enough space for UTF16, including a terminating 0x0000.	442 * Caller must prepare enough space for UTF16, including a terminating 0x0000.

381 * FIXME(wfrichar): The original implementation had security issues. As a	443 * See the following table for encoding lengths. In any case, the caller

382 * temporary fix, I'm making this ONLY support ASCII codepoints. Bug 7542	444 * just needs to prepare the byte length of UTF8 plus the terminating 0x0000.

383 * (http://code.google.com/p/chromium-os/issues/detail?id=7542) is filed to fix	445 *

384 * this.	446 * Code point UTF16 UTF8

	447 * 0x0000-0x007F 2 bytes 1 byte

	448 * 0x0080-0x07FF 2 bytes 2 bytes

	449 * 0x0800-0xFFFF 2 bytes 3 bytes

	450 * 0x10000-0x10FFFF 4 bytes 4 bytes

	451 *

	452 * This function converts UTF8 chars to a code point first. Then, convrts it

	453 * to UTF16 code unit(s).

	454 *

	455 * Return: CGPT_OK --- all character are converted successfully.

	456 * CGPT_FAILED --- convert error, i.e. output buffer is too short.

385 */	457 */

386 void UTF8ToUTF16(const uint8_t utf8, uint16_t utf16, unsigned int maxoutput)	458 int UTF8ToUTF16(const uint8_t utf8, uint16_t utf16, unsigned int maxoutput)

387 {	459 {

388 size_t s16idx, s8idx;	460 size_t s16idx, s8idx;

389 uint32_t utfchar;	461 uint32_t code_point = 0;

	462 unsigned int expected_units = 1;

	463 unsigned int decoded_units = 1;

	464 int retval = CGPT_OK;

390	465

391 if (!utf8 \|\| !utf16 \|\| !maxoutput)	466 if (!utf8 \|\| !utf16 \|\| !maxoutput)

392 return;	467 return CGPT_FAILED;

393	468

394 maxoutput--; /* plan for termination */	469 maxoutput--; /* plan for termination */

395	470

396 for (s8idx = s16idx = 0;	471 for (s8idx = s16idx = 0;

397 utf8[s8idx] && maxoutput;	472 utf8[s8idx] && maxoutput;

398 s8idx++, maxoutput--) {	473 s8idx++) {

399 utfchar = utf8[s8idx];	474 uint8_t code_unit;

400 utf16[s16idx++] = utfchar & 0x7F;	475 code_unit = utf8[s8idx];

	476

	477 if (expected_units != decoded_units) {

	478 /* Trailing bytes of multi-byte character */

	479 if ((code_unit & 0xC0) == 0x80) {

	480 code_point = (code_point << 6) \| (code_unit & 0x3F);

	481 ++decoded_units;

	482 } else {

	483 /* Unexpected code unit. */

	484 retval = CGPT_FAILED;

	485 break;

	486 }

	487 } else {

	488 /* parsing a new code point. */

	489 decoded_units = 1;

	490 if (code_unit <= 0x7F) {

	491 code_point = code_unit;

	492 expected_units = 1;

	493 } else if (code_unit <= 0xBF) {

	494 /* 0x80-0xBF must NOT be the heading byte unit of a new code point. */

	495 retval = CGPT_FAILED;

	496 break;

	497 } else if (code_unit >= 0xC2 && code_unit <= 0xDF) {

	498 code_point = code_unit & 0x1F;

	499 expected_units = 2;

	500 } else if (code_unit >= 0xE0 && code_unit <= 0xEF) {

	501 code_point = code_unit & 0x0F;

	502 expected_units = 3;

	503 } else if (code_unit >= 0xF0 && code_unit <= 0xF4) {

	504 code_point = code_unit & 0x07;

	505 expected_units = 4;

	506 } else {

	507 /* illegal code unit: 0xC0-0xC1, 0xF5-0xFF */

	508 retval = CGPT_FAILED;

	509 break;

	510 }

	511 }

	512

	513 /* If no more unit is needed, output the UTF16 unit(s). */

	514 if ((retval == CGPT_OK) &&

	515 (expected_units == decoded_units)) {

	516 /* Check if the encoding is the shortest possible UTF-8 sequence. */

	517 switch (expected_units) {

	518 case 2:

	519 if (code_point <= 0x7F) retval = CGPT_FAILED;

	520 break;

	521 case 3:

	522 if (code_point <= 0x7FF) retval = CGPT_FAILED;

	523 break;

	524 case 4:

	525 if (code_point <= 0xFFFF) retval = CGPT_FAILED;

	526 break;

	527 }

	528 if (retval == CGPT_FAILED) break; /* leave immediately */

	529

	530 if ((code_point <= 0xD7FF) \|\|

	531 (code_point >= 0xE000 && code_point <= 0xFFFF)) {

	532 utf16[s16idx++] = code_point;

	533 maxoutput -= 1;

	534 } else if (code_point >= 0x10000 && code_point <= 0x10FFFF &&

	535 maxoutput >= 2) {

	536 utf16[s16idx++] = 0xD800 \| ((code_point >> 10) - 0x0040);

	537 utf16[s16idx++] = 0xDC00 \| (code_point & 0x03FF);

	538 maxoutput -= 2;

	539 } else {

	540 /* Three possibilities fall into here. Both are failure cases.

	541 * a. surrogate pair (non-BMP characters; 0xD800~0xDFFF)

	542 * b. invalid code point > 0x10FFFF

	543 * c. buffer underrun

	544 */

	545 retval = CGPT_FAILED;

	546 break;

	547 }

	548 }

401 }	549 }

	550

	551 /* A null-terminator shows up before the UTF8 sequence ends. */

	552 if (expected_units != decoded_units) {

	553 retval = CGPT_FAILED;

	554 }

	555

402 utf16[s16idx++] = 0;	556 utf16[s16idx++] = 0;

	557 return retval;

403 }	558 }

404	559

405 struct {	560 struct {

406 Guid type;	561 Guid type;

407 char *name;	562 char *name;

408 char *description;	563 char *description;

409 } supported_types[] = {	564 } supported_types[] = {

410 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"},	565 {GPT_ENT_TYPE_CHROMEOS_KERNEL, "kernel", "ChromeOS kernel"},

411 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"},	566 {GPT_ENT_TYPE_CHROMEOS_ROOTFS, "rootfs", "ChromeOS rootfs"},

412 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"},	567 {GPT_ENT_TYPE_LINUX_DATA, "data", "Linux data"},

(...skipping 281 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
694	849

695 void PMBRToStr(struct pmbr pmbr, char str, unsigned int buflen) {	850 void PMBRToStr(struct pmbr pmbr, char str, unsigned int buflen) {

696 char buf[GUID_STRLEN];	851 char buf[GUID_STRLEN];

697 if (IsZero(&pmbr->boot_guid)) {	852 if (IsZero(&pmbr->boot_guid)) {

698 require(snprintf(str, buflen, "PMBR") < buflen);	853 require(snprintf(str, buflen, "PMBR") < buflen);

699 } else {	854 } else {

700 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf));	855 GuidToStr(&pmbr->boot_guid, buf, sizeof(buf));

701 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen);	856 require(snprintf(str, buflen, "PMBR (Boot GUID: %s)", buf) < buflen);

702 }	857 }

703 }	858 }

OLD	NEW

« no previous file with comments | « cgpt/cgpt.h ('k') | cgpt/cmd_add.c » ('j') | no next file with comments »