| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library utf.util; | 5 library utf.util; |
| 6 | 6 |
| 7 import 'constants.dart'; | 7 import 'constants.dart'; |
| 8 import 'list_range.dart'; | 8 import 'list_range.dart'; |
| 9 import 'utf_16_code_unit_decoder.dart'; | 9 import 'utf_16_code_unit_decoder.dart'; |
| 10 | 10 |
| 11 /** | 11 /** |
| 12 * Decodes the utf16 codeunits to codepoints. | 12 * Decodes the utf16 codeunits to codepoints. |
| 13 */ | 13 */ |
| 14 List<int> utf16CodeUnitsToCodepoints( | 14 List<int> utf16CodeUnitsToCodepoints(List<int> utf16CodeUnits, |
| 15 List<int> utf16CodeUnits, [int offset = 0, int length, | 15 [int offset = 0, |
| 16 int length, |
| 16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 17 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 17 ListRangeIterator source = | 18 ListRangeIterator source = |
| 18 (new ListRange(utf16CodeUnits, offset, length)).iterator; | 19 (new ListRange(utf16CodeUnits, offset, length)).iterator; |
| 19 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder | 20 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder.fromListRangeIterator( |
| 20 .fromListRangeIterator(source, replacementCodepoint); | 21 source, replacementCodepoint); |
| 21 List<int> codepoints = new List<int>(source.remaining); | 22 List<int> codepoints = new List<int>(source.remaining); |
| 22 int i = 0; | 23 int i = 0; |
| 23 while (decoder.moveNext()) { | 24 while (decoder.moveNext()) { |
| 24 codepoints[i++] = decoder.current; | 25 codepoints[i++] = decoder.current; |
| 25 } | 26 } |
| 26 if (i == codepoints.length) { | 27 if (i == codepoints.length) { |
| 27 return codepoints; | 28 return codepoints; |
| 28 } else { | 29 } else { |
| 29 List<int> codepointTrunc = new List<int>(i); | 30 List<int> codepointTrunc = new List<int>(i); |
| 30 codepointTrunc.setRange(0, i, codepoints); | 31 codepointTrunc.setRange(0, i, codepoints); |
| 31 return codepointTrunc; | 32 return codepointTrunc; |
| 32 } | 33 } |
| 33 } | 34 } |
| 34 | 35 |
| 35 /** | 36 /** |
| 36 * Encode code points as UTF16 code units. | 37 * Encode code points as UTF16 code units. |
| 37 */ | 38 */ |
| 38 List<int> codepointsToUtf16CodeUnits( | 39 List<int> codepointsToUtf16CodeUnits(List<int> codepoints, |
| 39 List<int> codepoints, | |
| 40 [int offset = 0, | 40 [int offset = 0, |
| 41 int length, | 41 int length, |
| 42 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 42 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 43 | |
| 44 ListRange listRange = new ListRange(codepoints, offset, length); | 43 ListRange listRange = new ListRange(codepoints, offset, length); |
| 45 int encodedLength = 0; | 44 int encodedLength = 0; |
| 46 for (int value in listRange) { | 45 for (int value in listRange) { |
| 47 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | 46 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
| 48 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 47 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 49 encodedLength++; | 48 encodedLength++; |
| 50 } else if (value > UNICODE_PLANE_ONE_MAX && | 49 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 51 value <= UNICODE_VALID_RANGE_MAX) { | 50 value <= UNICODE_VALID_RANGE_MAX) { |
| 52 encodedLength += 2; | 51 encodedLength += 2; |
| 53 } else { | 52 } else { |
| 54 encodedLength++; | 53 encodedLength++; |
| 55 } | 54 } |
| 56 } | 55 } |
| 57 | 56 |
| 58 List<int> codeUnitsBuffer = new List<int>(encodedLength); | 57 List<int> codeUnitsBuffer = new List<int>(encodedLength); |
| 59 int j = 0; | 58 int j = 0; |
| 60 for (int value in listRange) { | 59 for (int value in listRange) { |
| 61 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | 60 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
| 62 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 61 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 63 codeUnitsBuffer[j++] = value; | 62 codeUnitsBuffer[j++] = value; |
| 64 } else if (value > UNICODE_PLANE_ONE_MAX && | 63 } else if (value > UNICODE_PLANE_ONE_MAX && |
| 65 value <= UNICODE_VALID_RANGE_MAX) { | 64 value <= UNICODE_VALID_RANGE_MAX) { |
| 66 int base = value - UNICODE_UTF16_OFFSET; | 65 int base = value - UNICODE_UTF16_OFFSET; |
| 67 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | 66 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |
| 68 ((base & UNICODE_UTF16_HI_MASK) >> 10); | 67 ((base & UNICODE_UTF16_HI_MASK) >> 10); |
| 69 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | 68 codeUnitsBuffer[j++] = |
| 70 (base & UNICODE_UTF16_LO_MASK); | 69 UNICODE_UTF16_SURROGATE_UNIT_1_BASE + (base & UNICODE_UTF16_LO_MASK); |
| 71 } else if (replacementCodepoint != null) { | 70 } else if (replacementCodepoint != null) { |
| 72 codeUnitsBuffer[j++] = replacementCodepoint; | 71 codeUnitsBuffer[j++] = replacementCodepoint; |
| 73 } else { | 72 } else { |
| 74 throw new ArgumentError("Invalid encoding"); | 73 throw new ArgumentError("Invalid encoding"); |
| 75 } | 74 } |
| 76 } | 75 } |
| 77 return codeUnitsBuffer; | 76 return codeUnitsBuffer; |
| 78 } | 77 } |
| OLD | NEW |