pkg/third_party/html5lib/lib/src/char_encodings.dart - Issue 178843003: [html5lib] triple slash comment style

Side by Side Diff: pkg/third_party/html5lib/lib/src/char_encodings.dart

Issue 178843003: [html5lib] triple slash comment style (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: remove extra check Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /** Decodes bytes using the correct name. See [decodeBytes]. */	1 /// Decodes bytes using the correct name. See [decodeBytes].

2 library char_encodings;	2 library char_encodings;

3	3

4 import 'dart:collection';	4 import 'dart:collection';

5 import 'package:utf/utf.dart';	5 import 'package:utf/utf.dart';

6	6

7 // TODO(jmesserly): this function is conspicuously absent from dart:utf.	7 // TODO(jmesserly): this function is conspicuously absent from dart:utf.

8 /**	8 /// Returns true if the [bytes] starts with a UTF-8 byte order mark.

9 * Returns true if the [bytes] starts with a UTF-8 byte order mark.	9 /// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is

10 * Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is	10 /// used in HTML to detect the UTF-

11 * used in HTML to detect the UTF-

12 */

13 bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {	11 bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {

14 int end = length != null ? offset + length : bytes.length;	12 int end = length != null ? offset + length : bytes.length;

15 return (offset + 3) <= end &&	13 return (offset + 3) <= end &&

16 bytes[offset] == 0xEF &&	14 bytes[offset] == 0xEF &&

17 bytes[offset + 1] == 0xBB &&	15 bytes[offset + 1] == 0xBB &&

18 bytes[offset + 2] == 0xBF;	16 bytes[offset + 2] == 0xBF;

19 }	17 }

20	18

21 // TODO(jmesserly): it's unfortunate that this has to be one-shot on the entire	19 // TODO(jmesserly): it's unfortunate that this has to be one-shot on the entire

22 // file, but dart:utf does not expose stream-based decoders yet.	20 // file, but dart:utf does not expose stream-based decoders yet.

23 /**	21 /// Decodes the [bytes] with the provided [encoding] and returns an iterable for

24 * Decodes the [bytes] with the provided [encoding] and returns an iterable for	22 /// the codepoints. Supports the major unicode encodings as well as ascii and

25 * the codepoints. Supports the major unicode encodings as well as ascii and	23 /// and windows-1252 encodings.

26 * and windows-1252 encodings.

27 */

28 Iterable<int> decodeBytes(String encoding, List<int> bytes,	24 Iterable<int> decodeBytes(String encoding, List<int> bytes,

29 [int offset = 0, int length,	25 [int offset = 0, int length,

30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	26 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

31 if (length == null) length = bytes.length;	27 if (length == null) length = bytes.length;

32 final replace = replacementCodepoint;	28 final replace = replacementCodepoint;

33 switch (encoding) {	29 switch (encoding) {

34 case 'ascii':	30 case 'ascii':

35 bytes = bytes.sublist(offset, offset + length);	31 bytes = bytes.sublist(offset, offset + length);

36 // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart	32 // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart

37 for (int byte in bytes) {	33 for (int byte in bytes) {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
71 case 'utf-32-le':	67 case 'utf-32-le':

72 return decodeUtf32leAsIterable(bytes, offset, length, true, replace);	68 return decodeUtf32leAsIterable(bytes, offset, length, true, replace);

73	69

74 default:	70 default:

75 throw new ArgumentError('Encoding $encoding not supported');	71 throw new ArgumentError('Encoding $encoding not supported');

76 }	72 }

77 }	73 }

78	74

79	75

80 // TODO(jmesserly): use dart:utf once http://dartbug.com/6476 is fixed.	76 // TODO(jmesserly): use dart:utf once http://dartbug.com/6476 is fixed.

81 /**	77 /// Returns the code points for the [input]. This works like [String.charCodes]

82 * Returns the code points for the [input]. This works like [String.charCodes]	78 /// but it decodes UTF-16 surrogate pairs.

83 * but it decodes UTF-16 surrogate pairs.

84 */

85 List<int> toCodepoints(String input) {	79 List<int> toCodepoints(String input) {

86 var newCodes = <int>[];	80 var newCodes = <int>[];

87 for (int i = 0; i < input.length; i++) {	81 for (int i = 0; i < input.length; i++) {

88 var c = input.codeUnitAt(i);	82 var c = input.codeUnitAt(i);

89 if (0xD800 <= c && c <= 0xDBFF) {	83 if (0xD800 <= c && c <= 0xDBFF) {

90 int next = i + 1;	84 int next = i + 1;

91 if (next < input.length) {	85 if (next < input.length) {

92 var d = input.codeUnitAt(next);	86 var d = input.codeUnitAt(next);

93 if (0xDC00 <= d && d <= 0xDFFF) {	87 if (0xDC00 <= d && d <= 0xDFFF) {

94 c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);	88 c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);

95 i = next;	89 i = next;

96 }	90 }

97 }	91 }

98 }	92 }

99 newCodes.add(c);	93 newCodes.add(c);

100 }	94 }

101 return newCodes;	95 return newCodes;

102 }	96 }

103	97

104	98

105 /**	99 /// Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as

106 * Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as an	100 /// an iterable. Thus, the consumer can only convert as much of the input as

107 * iterable. Thus, the consumer can only convert as much of the input as needed.	101 /// needed. Set the [replacementCharacter] to null to throw an [ArgumentError]

108 * Set the [replacementCharacter] to null to throw an [ArgumentError]	102 /// rather than replace the bad value.

109 * rather than replace the bad value.

110 */

111 IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes,	103 IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes,

112 [int offset = 0, int length,	104 [int offset = 0, int length,

113 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {	105 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

114 return new IterableWindows1252Decoder(bytes, offset, length,	106 return new IterableWindows1252Decoder(bytes, offset, length,

115 replacementCodepoint);	107 replacementCodepoint);

116 }	108 }

117	109

118	110

119 /**	111 /// Return type of [decodeWindows1252AsIterable] and variants. The Iterable type

120 * Return type of [decodeWindows1252AsIterable] and variants. The Iterable type	112 /// provides an iterator on demand and the iterator will only translate bytes

121 * provides an iterator on demand and the iterator will only translate bytes	113 /// as requested by the user of the iterator. (Note: results are not cached.)

122 * as requested by the user of the iterator. (Note: results are not cached.)

123 */

124 class IterableWindows1252Decoder extends IterableBase<int> {	114 class IterableWindows1252Decoder extends IterableBase<int> {

125 final List<int> bytes;	115 final List<int> bytes;

126 final int offset;	116 final int offset;

127 final int length;	117 final int length;

128 final int replacementCodepoint;	118 final int replacementCodepoint;

129	119

130 IterableWindows1252Decoder(List<int> this.bytes, [int this.offset = 0,	120 IterableWindows1252Decoder(List<int> this.bytes, [int this.offset = 0,

131 int this.length = null,	121 int this.length = null,

132 int this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);	122 int this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

133	123

134 Windows1252Decoder get iterator =>	124 Windows1252Decoder get iterator =>

135 new Windows1252Decoder(bytes, offset, length, replacementCodepoint);	125 new Windows1252Decoder(bytes, offset, length, replacementCodepoint);

136 }	126 }

137	127

138	128

139 /**	129 /// Provides an iterator of Unicode codepoints from windows-1252 encoded bytes.

140 * Provides an iterator of Unicode codepoints from windows-1252 encoded bytes.	130 /// The parameters can set an offset into a list of bytes (as int), limit the

141 * The parameters can set an offset into a list of bytes (as int), limit the	131 /// length of the values to be decoded, and override the default Unicode

142 * length of the values to be decoded, and override the default Unicode	132 /// replacement character. Set the replacementCharacter to null to throw an

143 * replacement character. Set the replacementCharacter to null to throw an	133 /// ArgumentError rather than replace the bad value. The return value

144 * ArgumentError rather than replace the bad value. The return value	134 /// from this method can be used as an Iterable (e.g. in a for-loop).

145 * from this method can be used as an Iterable (e.g. in a for-loop).

146 */

147 class Windows1252Decoder implements Iterator<int> {	135 class Windows1252Decoder implements Iterator<int> {

148 final int replacementCodepoint;	136 final int replacementCodepoint;

149 final List<int> _bytes;	137 final List<int> _bytes;

150 int _offset;	138 int _offset;

151 final int _length;	139 final int _length;

152	140

153 Windows1252Decoder(List<int> bytes, [int offset = 0, int length,	141 Windows1252Decoder(List<int> bytes, [int offset = 0, int length,

154 this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])	142 this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])

155 : _bytes = bytes,	143 : _bytes = bytes,

156 _offset = offset - 1,	144 _offset = offset - 1,

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
203 case 0x9D:	191 case 0x9D:

204 if (replacementCodepoint == null) {	192 if (replacementCodepoint == null) {

205 throw new ArgumentError(	193 throw new ArgumentError(

206 "Invalid windows-1252 code point $char at $_offset");	194 "Invalid windows-1252 code point $char at $_offset");

207 }	195 }

208 return replacementCodepoint;	196 return replacementCodepoint;

209 }	197 }

210 return char;	198 return char;

211 }	199 }

212 }	200 }

OLD	NEW

« pkg/third_party/html5lib/lib/dom.dart ('K') | « pkg/third_party/html5lib/lib/parser_console.dart ('k') | pkg/third_party/html5lib/lib/src/constants.dart » ('j') | no next file with comments »