| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2011 The Native Client Authors. All rights reserved. | |
| 3 * Use of this source code is governed by a BSD-style license that can be | |
| 4 * found in the LICENSE file. | |
| 5 */ | |
| 6 | |
| 7 #include "native_client/src/include/nacl_platform.h" | |
| 8 #include "native_client/src/trusted/plugin/string_encoding.h" | |
| 9 | |
| 10 | |
| 11 namespace plugin { | |
| 12 | |
| 13 // PPAPI requires us to encode byte strings as UTF-8. Unfortunately | |
| 14 // this is rather inefficient, in terms of both space and time. | |
| 15 | |
| 16 bool ByteStringAsUTF8(const char* input, size_t input_byte_count, | |
| 17 char** result, size_t* result_byte_count) { | |
| 18 // UTF-8 encoding may result in a 2x size increase at the most. | |
| 19 // TODO(mseaborn): We could do a pre-scan to get the real size. | |
| 20 // If we wanted to be faster, we could do a word-by-word pre-scan | |
| 21 // to check for top-bit-set characters. | |
| 22 size_t max_output_size = input_byte_count * 2; | |
| 23 // We include a null terminator for convenience. | |
| 24 char* output = reinterpret_cast<char*>(malloc(max_output_size + 1)); | |
| 25 if (output == NULL) { | |
| 26 return false; | |
| 27 } | |
| 28 char* dest_ptr = output; | |
| 29 for (size_t i = 0; i < input_byte_count; i++) { | |
| 30 unsigned char ch = input[i]; | |
| 31 if (ch < 128) { | |
| 32 // Code results in a one byte encoding. | |
| 33 *dest_ptr++ = ch; | |
| 34 } else { | |
| 35 // Code results in a two byte encoding. | |
| 36 *dest_ptr++ = 0xc0 | (ch >> 6); /* Top 2 bits */ | |
| 37 *dest_ptr++ = 0x80 | (ch & 0x3f); /* Bottom 6 bits */ | |
| 38 } | |
| 39 } | |
| 40 *dest_ptr = 0; | |
| 41 *result = output; | |
| 42 *result_byte_count = dest_ptr - output; | |
| 43 return true; | |
| 44 } | |
| 45 | |
| 46 bool ByteStringFromUTF8(const char* input, size_t input_byte_count, | |
| 47 char** result, size_t* result_byte_count) { | |
| 48 // The output cannot be larger than the input. | |
| 49 char* output = reinterpret_cast<char*>(malloc(input_byte_count + 1)); | |
| 50 if (output == NULL) { | |
| 51 return NULL; | |
| 52 } | |
| 53 char* dest_ptr = output; | |
| 54 size_t i; | |
| 55 for (i = 0; i < input_byte_count; ) { | |
| 56 unsigned char ch = input[i]; | |
| 57 if ((ch & 0x80) == 0) { | |
| 58 // One byte encoding. | |
| 59 *dest_ptr++ = ch; | |
| 60 i++; | |
| 61 } else { | |
| 62 if (i == input_byte_count - 1) { | |
| 63 // Invalid UTF-8: incomplete sequence. | |
| 64 goto fail; | |
| 65 } | |
| 66 // Check that this is a two byte encoding. | |
| 67 // The first character must contain 110xxxxxb and the | |
| 68 // second must contain 10xxxxxxb. | |
| 69 unsigned char ch2 = input[i + 1]; | |
| 70 if ((ch & 0xe0) != 0xc0) { | |
| 71 // >=2 byte encoding. | |
| 72 goto fail; | |
| 73 } | |
| 74 if ((ch2 & 0xc0) != 0x80) { | |
| 75 // Invalid UTF-8. | |
| 76 goto fail; | |
| 77 } | |
| 78 uint32_t value = (((uint32_t) ch & 0x1f) << 6) | ((uint32_t) ch2 & 0x3f); | |
| 79 if (value < 128) { | |
| 80 // Invalid UTF-8: overly long encoding. | |
| 81 goto fail; | |
| 82 } | |
| 83 if (value >= 0x100) { | |
| 84 // Overly large character. Will not fit into a byte. | |
| 85 goto fail; | |
| 86 } | |
| 87 *dest_ptr++ = value; | |
| 88 i += 2; | |
| 89 } | |
| 90 } | |
| 91 *dest_ptr = 0; | |
| 92 *result = output; | |
| 93 *result_byte_count = dest_ptr - output; | |
| 94 return true; | |
| 95 fail: | |
| 96 free(output); | |
| 97 return false; | |
| 98 } | |
| 99 | |
| 100 } // namespace plugin | |
| OLD | NEW |