Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(435)

Side by Side Diff: pkg/third_party/html5lib/lib/src/char_encodings.dart

Issue 178843003: [html5lib] triple slash comment style (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: remove extra check Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /** Decodes bytes using the correct name. See [decodeBytes]. */ 1 /// Decodes bytes using the correct name. See [decodeBytes].
2 library char_encodings; 2 library char_encodings;
3 3
4 import 'dart:collection'; 4 import 'dart:collection';
5 import 'package:utf/utf.dart'; 5 import 'package:utf/utf.dart';
6 6
7 // TODO(jmesserly): this function is conspicuously absent from dart:utf. 7 // TODO(jmesserly): this function is conspicuously absent from dart:utf.
8 /** 8 /// Returns true if the [bytes] starts with a UTF-8 byte order mark.
9 * Returns true if the [bytes] starts with a UTF-8 byte order mark. 9 /// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is
10 * Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is 10 /// used in HTML to detect the UTF-
11 * used in HTML to detect the UTF-
12 */
13 bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) { 11 bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
14 int end = length != null ? offset + length : bytes.length; 12 int end = length != null ? offset + length : bytes.length;
15 return (offset + 3) <= end && 13 return (offset + 3) <= end &&
16 bytes[offset] == 0xEF && 14 bytes[offset] == 0xEF &&
17 bytes[offset + 1] == 0xBB && 15 bytes[offset + 1] == 0xBB &&
18 bytes[offset + 2] == 0xBF; 16 bytes[offset + 2] == 0xBF;
19 } 17 }
20 18
21 // TODO(jmesserly): it's unfortunate that this has to be one-shot on the entire 19 // TODO(jmesserly): it's unfortunate that this has to be one-shot on the entire
22 // file, but dart:utf does not expose stream-based decoders yet. 20 // file, but dart:utf does not expose stream-based decoders yet.
23 /** 21 /// Decodes the [bytes] with the provided [encoding] and returns an iterable for
24 * Decodes the [bytes] with the provided [encoding] and returns an iterable for 22 /// the codepoints. Supports the major unicode encodings as well as ascii and
25 * the codepoints. Supports the major unicode encodings as well as ascii and 23 /// and windows-1252 encodings.
26 * and windows-1252 encodings.
27 */
28 Iterable<int> decodeBytes(String encoding, List<int> bytes, 24 Iterable<int> decodeBytes(String encoding, List<int> bytes,
29 [int offset = 0, int length, 25 [int offset = 0, int length,
30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 26 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
31 if (length == null) length = bytes.length; 27 if (length == null) length = bytes.length;
32 final replace = replacementCodepoint; 28 final replace = replacementCodepoint;
33 switch (encoding) { 29 switch (encoding) {
34 case 'ascii': 30 case 'ascii':
35 bytes = bytes.sublist(offset, offset + length); 31 bytes = bytes.sublist(offset, offset + length);
36 // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart 32 // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart
37 for (int byte in bytes) { 33 for (int byte in bytes) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
71 case 'utf-32-le': 67 case 'utf-32-le':
72 return decodeUtf32leAsIterable(bytes, offset, length, true, replace); 68 return decodeUtf32leAsIterable(bytes, offset, length, true, replace);
73 69
74 default: 70 default:
75 throw new ArgumentError('Encoding $encoding not supported'); 71 throw new ArgumentError('Encoding $encoding not supported');
76 } 72 }
77 } 73 }
78 74
79 75
80 // TODO(jmesserly): use dart:utf once http://dartbug.com/6476 is fixed. 76 // TODO(jmesserly): use dart:utf once http://dartbug.com/6476 is fixed.
81 /** 77 /// Returns the code points for the [input]. This works like [String.charCodes]
82 * Returns the code points for the [input]. This works like [String.charCodes] 78 /// but it decodes UTF-16 surrogate pairs.
83 * but it decodes UTF-16 surrogate pairs.
84 */
85 List<int> toCodepoints(String input) { 79 List<int> toCodepoints(String input) {
86 var newCodes = <int>[]; 80 var newCodes = <int>[];
87 for (int i = 0; i < input.length; i++) { 81 for (int i = 0; i < input.length; i++) {
88 var c = input.codeUnitAt(i); 82 var c = input.codeUnitAt(i);
89 if (0xD800 <= c && c <= 0xDBFF) { 83 if (0xD800 <= c && c <= 0xDBFF) {
90 int next = i + 1; 84 int next = i + 1;
91 if (next < input.length) { 85 if (next < input.length) {
92 var d = input.codeUnitAt(next); 86 var d = input.codeUnitAt(next);
93 if (0xDC00 <= d && d <= 0xDFFF) { 87 if (0xDC00 <= d && d <= 0xDFFF) {
94 c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00); 88 c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);
95 i = next; 89 i = next;
96 } 90 }
97 } 91 }
98 } 92 }
99 newCodes.add(c); 93 newCodes.add(c);
100 } 94 }
101 return newCodes; 95 return newCodes;
102 } 96 }
103 97
104 98
105 /** 99 /// Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as
106 * Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as an 100 /// an iterable. Thus, the consumer can only convert as much of the input as
107 * iterable. Thus, the consumer can only convert as much of the input as needed. 101 /// needed. Set the [replacementCharacter] to null to throw an [ArgumentError]
108 * Set the [replacementCharacter] to null to throw an [ArgumentError] 102 /// rather than replace the bad value.
109 * rather than replace the bad value.
110 */
111 IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes, 103 IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes,
112 [int offset = 0, int length, 104 [int offset = 0, int length,
113 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 105 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
114 return new IterableWindows1252Decoder(bytes, offset, length, 106 return new IterableWindows1252Decoder(bytes, offset, length,
115 replacementCodepoint); 107 replacementCodepoint);
116 } 108 }
117 109
118 110
119 /** 111 /// Return type of [decodeWindows1252AsIterable] and variants. The Iterable type
120 * Return type of [decodeWindows1252AsIterable] and variants. The Iterable type 112 /// provides an iterator on demand and the iterator will only translate bytes
121 * provides an iterator on demand and the iterator will only translate bytes 113 /// as requested by the user of the iterator. (Note: results are not cached.)
122 * as requested by the user of the iterator. (Note: results are not cached.)
123 */
124 class IterableWindows1252Decoder extends IterableBase<int> { 114 class IterableWindows1252Decoder extends IterableBase<int> {
125 final List<int> bytes; 115 final List<int> bytes;
126 final int offset; 116 final int offset;
127 final int length; 117 final int length;
128 final int replacementCodepoint; 118 final int replacementCodepoint;
129 119
130 IterableWindows1252Decoder(List<int> this.bytes, [int this.offset = 0, 120 IterableWindows1252Decoder(List<int> this.bytes, [int this.offset = 0,
131 int this.length = null, 121 int this.length = null,
132 int this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); 122 int this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
133 123
134 Windows1252Decoder get iterator => 124 Windows1252Decoder get iterator =>
135 new Windows1252Decoder(bytes, offset, length, replacementCodepoint); 125 new Windows1252Decoder(bytes, offset, length, replacementCodepoint);
136 } 126 }
137 127
138 128
139 /** 129 /// Provides an iterator of Unicode codepoints from windows-1252 encoded bytes.
140 * Provides an iterator of Unicode codepoints from windows-1252 encoded bytes. 130 /// The parameters can set an offset into a list of bytes (as int), limit the
141 * The parameters can set an offset into a list of bytes (as int), limit the 131 /// length of the values to be decoded, and override the default Unicode
142 * length of the values to be decoded, and override the default Unicode 132 /// replacement character. Set the replacementCharacter to null to throw an
143 * replacement character. Set the replacementCharacter to null to throw an 133 /// ArgumentError rather than replace the bad value. The return value
144 * ArgumentError rather than replace the bad value. The return value 134 /// from this method can be used as an Iterable (e.g. in a for-loop).
145 * from this method can be used as an Iterable (e.g. in a for-loop).
146 */
147 class Windows1252Decoder implements Iterator<int> { 135 class Windows1252Decoder implements Iterator<int> {
148 final int replacementCodepoint; 136 final int replacementCodepoint;
149 final List<int> _bytes; 137 final List<int> _bytes;
150 int _offset; 138 int _offset;
151 final int _length; 139 final int _length;
152 140
153 Windows1252Decoder(List<int> bytes, [int offset = 0, int length, 141 Windows1252Decoder(List<int> bytes, [int offset = 0, int length,
154 this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) 142 this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
155 : _bytes = bytes, 143 : _bytes = bytes,
156 _offset = offset - 1, 144 _offset = offset - 1,
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 case 0x9D: 191 case 0x9D:
204 if (replacementCodepoint == null) { 192 if (replacementCodepoint == null) {
205 throw new ArgumentError( 193 throw new ArgumentError(
206 "Invalid windows-1252 code point $char at $_offset"); 194 "Invalid windows-1252 code point $char at $_offset");
207 } 195 }
208 return replacementCodepoint; 196 return replacementCodepoint;
209 } 197 }
210 return char; 198 return char;
211 } 199 }
212 } 200 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698