OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library utf.util; |
| 6 |
| 7 import 'constants.dart'; |
| 8 import 'list_range.dart'; |
| 9 import 'utf_16_code_unit_decoder.dart'; |
| 10 |
| 11 /** |
| 12 * Decodes the utf16 codeunits to codepoints. |
| 13 */ |
| 14 List<int> utf16CodeUnitsToCodepoints( |
| 15 List<int> utf16CodeUnits, [int offset = 0, int length, |
| 16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 17 ListRangeIterator source = |
| 18 (new ListRange(utf16CodeUnits, offset, length)).iterator; |
| 19 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder |
| 20 .fromListRangeIterator(source, replacementCodepoint); |
| 21 List<int> codepoints = new List<int>(source.remaining); |
| 22 int i = 0; |
| 23 while (decoder.moveNext()) { |
| 24 codepoints[i++] = decoder.current; |
| 25 } |
| 26 if (i == codepoints.length) { |
| 27 return codepoints; |
| 28 } else { |
| 29 List<int> codepointTrunc = new List<int>(i); |
| 30 codepointTrunc.setRange(0, i, codepoints); |
| 31 return codepointTrunc; |
| 32 } |
| 33 } |
| 34 |
| 35 /** |
| 36 * Encode code points as UTF16 code units. |
| 37 */ |
| 38 List<int> codepointsToUtf16CodeUnits( |
| 39 List<int> codepoints, |
| 40 [int offset = 0, |
| 41 int length, |
| 42 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 43 |
| 44 ListRange listRange = new ListRange(codepoints, offset, length); |
| 45 int encodedLength = 0; |
| 46 for (int value in listRange) { |
| 47 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
| 48 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 49 encodedLength++; |
| 50 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 51 value <= UNICODE_VALID_RANGE_MAX) { |
| 52 encodedLength += 2; |
| 53 } else { |
| 54 encodedLength++; |
| 55 } |
| 56 } |
| 57 |
| 58 List<int> codeUnitsBuffer = new List<int>(encodedLength); |
| 59 int j = 0; |
| 60 for (int value in listRange) { |
| 61 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
| 62 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 63 codeUnitsBuffer[j++] = value; |
| 64 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 65 value <= UNICODE_VALID_RANGE_MAX) { |
| 66 int base = value - UNICODE_UTF16_OFFSET; |
| 67 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |
| 68 ((base & UNICODE_UTF16_HI_MASK) >> 10); |
| 69 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + |
| 70 (base & UNICODE_UTF16_LO_MASK); |
| 71 } else if (replacementCodepoint != null) { |
| 72 codeUnitsBuffer[j++] = replacementCodepoint; |
| 73 } else { |
| 74 throw new ArgumentError("Invalid encoding"); |
| 75 } |
| 76 } |
| 77 return codeUnitsBuffer; |
| 78 } |
OLD | NEW |