OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library utf.util; | 5 library utf.util; |
6 | 6 |
7 import 'constants.dart'; | 7 import 'constants.dart'; |
8 import 'list_range.dart'; | 8 import 'list_range.dart'; |
9 import 'utf_16_code_unit_decoder.dart'; | 9 import 'utf_16_code_unit_decoder.dart'; |
10 | 10 |
11 /** | 11 /** |
12 * Decodes the utf16 codeunits to codepoints. | 12 * Decodes the utf16 codeunits to codepoints. |
13 */ | 13 */ |
14 List<int> utf16CodeUnitsToCodepoints( | 14 List<int> utf16CodeUnitsToCodepoints(List<int> utf16CodeUnits, |
15 List<int> utf16CodeUnits, [int offset = 0, int length, | 15 [int offset = 0, |
| 16 int length, |
16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 17 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
17 ListRangeIterator source = | 18 ListRangeIterator source = |
18 (new ListRange(utf16CodeUnits, offset, length)).iterator; | 19 (new ListRange(utf16CodeUnits, offset, length)).iterator; |
19 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder | 20 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder.fromListRangeIterator( |
20 .fromListRangeIterator(source, replacementCodepoint); | 21 source, replacementCodepoint); |
21 List<int> codepoints = new List<int>(source.remaining); | 22 List<int> codepoints = new List<int>(source.remaining); |
22 int i = 0; | 23 int i = 0; |
23 while (decoder.moveNext()) { | 24 while (decoder.moveNext()) { |
24 codepoints[i++] = decoder.current; | 25 codepoints[i++] = decoder.current; |
25 } | 26 } |
26 if (i == codepoints.length) { | 27 if (i == codepoints.length) { |
27 return codepoints; | 28 return codepoints; |
28 } else { | 29 } else { |
29 List<int> codepointTrunc = new List<int>(i); | 30 List<int> codepointTrunc = new List<int>(i); |
30 codepointTrunc.setRange(0, i, codepoints); | 31 codepointTrunc.setRange(0, i, codepoints); |
31 return codepointTrunc; | 32 return codepointTrunc; |
32 } | 33 } |
33 } | 34 } |
34 | 35 |
35 /** | 36 /** |
36 * Encode code points as UTF16 code units. | 37 * Encode code points as UTF16 code units. |
37 */ | 38 */ |
38 List<int> codepointsToUtf16CodeUnits( | 39 List<int> codepointsToUtf16CodeUnits(List<int> codepoints, |
39 List<int> codepoints, | |
40 [int offset = 0, | 40 [int offset = 0, |
41 int length, | 41 int length, |
42 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 42 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
43 | |
44 ListRange listRange = new ListRange(codepoints, offset, length); | 43 ListRange listRange = new ListRange(codepoints, offset, length); |
45 int encodedLength = 0; | 44 int encodedLength = 0; |
46 for (int value in listRange) { | 45 for (int value in listRange) { |
47 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | 46 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
48 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 47 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
49 encodedLength++; | 48 encodedLength++; |
50 } else if (value > UNICODE_PLANE_ONE_MAX && | 49 } else if (value > UNICODE_PLANE_ONE_MAX && |
51 value <= UNICODE_VALID_RANGE_MAX) { | 50 value <= UNICODE_VALID_RANGE_MAX) { |
52 encodedLength += 2; | 51 encodedLength += 2; |
53 } else { | 52 } else { |
54 encodedLength++; | 53 encodedLength++; |
55 } | 54 } |
56 } | 55 } |
57 | 56 |
58 List<int> codeUnitsBuffer = new List<int>(encodedLength); | 57 List<int> codeUnitsBuffer = new List<int>(encodedLength); |
59 int j = 0; | 58 int j = 0; |
60 for (int value in listRange) { | 59 for (int value in listRange) { |
61 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | 60 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |
62 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | 61 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
63 codeUnitsBuffer[j++] = value; | 62 codeUnitsBuffer[j++] = value; |
64 } else if (value > UNICODE_PLANE_ONE_MAX && | 63 } else if (value > UNICODE_PLANE_ONE_MAX && |
65 value <= UNICODE_VALID_RANGE_MAX) { | 64 value <= UNICODE_VALID_RANGE_MAX) { |
66 int base = value - UNICODE_UTF16_OFFSET; | 65 int base = value - UNICODE_UTF16_OFFSET; |
67 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | 66 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |
68 ((base & UNICODE_UTF16_HI_MASK) >> 10); | 67 ((base & UNICODE_UTF16_HI_MASK) >> 10); |
69 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | 68 codeUnitsBuffer[j++] = |
70 (base & UNICODE_UTF16_LO_MASK); | 69 UNICODE_UTF16_SURROGATE_UNIT_1_BASE + (base & UNICODE_UTF16_LO_MASK); |
71 } else if (replacementCodepoint != null) { | 70 } else if (replacementCodepoint != null) { |
72 codeUnitsBuffer[j++] = replacementCodepoint; | 71 codeUnitsBuffer[j++] = replacementCodepoint; |
73 } else { | 72 } else { |
74 throw new ArgumentError("Invalid encoding"); | 73 throw new ArgumentError("Invalid encoding"); |
75 } | 74 } |
76 } | 75 } |
77 return codeUnitsBuffer; | 76 return codeUnitsBuffer; |
78 } | 77 } |
OLD | NEW |