pkg/third_party/html5lib/lib/src/inputstream.dart - Issue 178843003: [html5lib] triple slash comment style

Side by Side Diff: pkg/third_party/html5lib/lib/src/inputstream.dart

Issue 178843003: [html5lib] triple slash comment style (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: remove extra check Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 library inputstream;	1 library inputstream;

2	2

3 import 'dart:collection';	3 import 'dart:collection';

4 import 'package:utf/utf.dart';	4 import 'package:utf/utf.dart';

5 import 'package:source_maps/span.dart' show SourceFile;	5 import 'package:source_maps/span.dart' show SourceFile;

6 import 'char_encodings.dart';	6 import 'char_encodings.dart';

7 import 'constants.dart';	7 import 'constants.dart';

8 import 'utils.dart';	8 import 'utils.dart';

9 import 'encoding_parser.dart';	9 import 'encoding_parser.dart';

10	10

11 /** Hooks to call into dart:io without directly referencing it. */	11 /// Hooks to call into dart:io without directly referencing it.

12 class ConsoleSupport {	12 class ConsoleSupport {

13 List<int> bytesFromFile(source) => null;	13 List<int> bytesFromFile(source) => null;

14 }	14 }

15	15

16 // TODO(jmesserly): use lazy init here when supported.	16 // TODO(jmesserly): use lazy init here when supported.

17 ConsoleSupport consoleSupport = new ConsoleSupport();	17 ConsoleSupport consoleSupport = new ConsoleSupport();

18	18

19 /**	19 /// Provides a unicode stream of characters to the HtmlTokenizer.

20 * Provides a unicode stream of characters to the HtmlTokenizer.	20 ///

21 *	21 /// This class takes care of character encoding and removing or replacing

22 * This class takes care of character encoding and removing or replacing	22 /// incorrect byte-sequences and also provides column and line tracking.

23 * incorrect byte-sequences and also provides column and line tracking.

24 */

25 class HtmlInputStream {	23 class HtmlInputStream {

26 /**	24 /// Number of bytes to use when looking for a meta element with

27 * Number of bytes to use when looking for a meta element with	25 /// encoding information.

28 * encoding information.

29 */

30 static const int numBytesMeta = 512;	26 static const int numBytesMeta = 512;

31	27

32 /** Encoding to use if no other information can be found. */	28 /// Encoding to use if no other information can be found.

33 static const String defaultEncoding = 'windows-1252';	29 static const String defaultEncoding = 'windows-1252';

34	30

35 /** The name of the character encoding. */	31 /// The name of the character encoding.

36 String charEncodingName;	32 String charEncodingName;

37	33

38 /** True if we are certain about [charEncodingName], false for tenative. */	34 /// True if we are certain about [charEncodingName], false for tenative.

39 bool charEncodingCertain = true;	35 bool charEncodingCertain = true;

40	36

41 final bool generateSpans;	37 final bool generateSpans;

42	38

43 /** Location where the contents of the stream were found. */	39 /// Location where the contents of the stream were found.

44 final String sourceUrl;	40 final String sourceUrl;

45	41

46 List<int> _rawBytes;	42 List<int> _rawBytes;

47	43

48 /** Raw UTF-16 codes, used if a Dart String is passed in. */	44 /// Raw UTF-16 codes, used if a Dart String is passed in.

49 Iterable<int> _rawChars;	45 Iterable<int> _rawChars;

50	46

51 Queue<String> errors;	47 Queue<String> errors;

52	48

53 SourceFile fileInfo;	49 SourceFile fileInfo;

54	50

55 List<int> _lineStarts;	51 List<int> _lineStarts;

56	52

57 List<int> _chars;	53 List<int> _chars;

58	54

59 int _offset;	55 int _offset;

60	56

61 /**	57 /// Initialises the HtmlInputStream.

62 * Initialises the HtmlInputStream.	58 ///

63 *	59 /// HtmlInputStream(source, [encoding]) -> Normalized stream from source

64 * HtmlInputStream(source, [encoding]) -> Normalized stream from source	60 /// for use by html5lib.

65 * for use by html5lib.	61 ///

66 *	62 /// [source] can be either a [String] or a [List<int>] containing the raw

67 * [source] can be either a [String] or a [List<int>] containing the raw	63 /// bytes, or a file if [consoleSupport] is initialized.

68 * bytes, or a file if [consoleSupport] is initialized.	64 ///

69 *	65 /// The optional encoding parameter must be a string that indicates

70 * The optional encoding parameter must be a string that indicates	66 /// the encoding. If specified, that encoding will be used,

71 * the encoding. If specified, that encoding will be used,	67 /// regardless of any BOM or later declaration (such as in a meta

72 * regardless of any BOM or later declaration (such as in a meta	68 /// element)

73 * element)	69 ///

74 *	70 /// [parseMeta] - Look for a <meta> element containing encoding information

75 * [parseMeta] - Look for a <meta> element containing encoding information

76 */

77 HtmlInputStream(source, [String encoding, bool parseMeta = true,	71 HtmlInputStream(source, [String encoding, bool parseMeta = true,

78 this.generateSpans = false, this.sourceUrl])	72 this.generateSpans = false, this.sourceUrl])

79 : charEncodingName = codecName(encoding) {	73 : charEncodingName = codecName(encoding) {

80	74

81 if (source is String) {	75 if (source is String) {

82 _rawChars = toCodepoints(source);	76 _rawChars = toCodepoints(source);

83 charEncodingName = 'utf-8';	77 charEncodingName = 'utf-8';

84 charEncodingCertain = true;	78 charEncodingCertain = true;

85 } else if (source is List<int>) {	79 } else if (source is List<int>) {

86 _rawBytes = source;	80 _rawBytes = source;

(...skipping 101 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
188 } else {	182 } else {

189 charEncodingName = newEncoding;	183 charEncodingName = newEncoding;

190 charEncodingCertain = true;	184 charEncodingCertain = true;

191 _rawChars = null;	185 _rawChars = null;

192 reset();	186 reset();

193 throw new ReparseException(	187 throw new ReparseException(

194 'Encoding changed from $charEncodingName to $newEncoding');	188 'Encoding changed from $charEncodingName to $newEncoding');

195 }	189 }

196 }	190 }

197	191

198 /**	192 /// Attempts to detect at BOM at the start of the stream. If

199 * Attempts to detect at BOM at the start of the stream. If	193 /// an encoding can be determined from the BOM return the name of the

200 * an encoding can be determined from the BOM return the name of the	194 /// encoding otherwise return null.

201 * encoding otherwise return null.

202 */

203 String detectBOM() {	195 String detectBOM() {

204 // Try detecting the BOM using bytes from the string	196 // Try detecting the BOM using bytes from the string

205 if (hasUtf8Bom(_rawBytes)) {	197 if (hasUtf8Bom(_rawBytes)) {

206 return 'utf-8';	198 return 'utf-8';

207 }	199 }

208 // Note: we don't need to remember whether it was big or little endian	200 // Note: we don't need to remember whether it was big or little endian

209 // because the decoder will do that later. It will also eat the BOM for us.	201 // because the decoder will do that later. It will also eat the BOM for us.

210 if (hasUtf16Bom(_rawBytes)) {	202 if (hasUtf16Bom(_rawBytes)) {

211 return 'utf-16';	203 return 'utf-16';

212 }	204 }

213 if (hasUtf32Bom(_rawBytes)) {	205 if (hasUtf32Bom(_rawBytes)) {

214 return 'utf-32';	206 return 'utf-32';

215 }	207 }

216 return null;	208 return null;

217 }	209 }

218	210

219 /** Report the encoding declared by the meta element. */	211 /// Report the encoding declared by the meta element.

220 String detectEncodingMeta() {	212 String detectEncodingMeta() {

221 var parser = new EncodingParser(slice(_rawBytes, 0, numBytesMeta));	213 var parser = new EncodingParser(slice(_rawBytes, 0, numBytesMeta));

222 var encoding = parser.getEncoding();	214 var encoding = parser.getEncoding();

223	215

224 if (const ['utf-16', 'utf-16-be', 'utf-16-le'].contains(encoding)) {	216 if (const ['utf-16', 'utf-16-be', 'utf-16-le'].contains(encoding)) {

225 encoding = 'utf-8';	217 encoding = 'utf-8';

226 }	218 }

227	219

228 return encoding;	220 return encoding;

229 }	221 }

230	222

231 /**	223 /// Returns the current offset in the stream, i.e. the number of codepoints

232 * Returns the current offset in the stream, i.e. the number of codepoints	224 /// since the start of the file.

233 * since the start of the file.

234 */

235 int get position => _offset;	225 int get position => _offset;

236	226

237 /**	227 /// Read one character from the stream or queue if available. Return

238 * Read one character from the stream or queue if available. Return	228 /// EOF when EOF is reached.

239 * EOF when EOF is reached.

240 */

241 String char() {	229 String char() {

242 if (_offset >= _chars.length) return EOF;	230 if (_offset >= _chars.length) return EOF;

243 return new String.fromCharCodes([_chars[_offset++]]);	231 return new String.fromCharCodes([_chars[_offset++]]);

244 }	232 }

245	233

246 String peekChar() {	234 String peekChar() {

247 if (_offset >= _chars.length) return EOF;	235 if (_offset >= _chars.length) return EOF;

248 return new String.fromCharCodes([_chars[_offset]]);	236 return new String.fromCharCodes([_chars[_offset]]);

249 }	237 }

250	238

251 /**	239 /// Returns a string of characters from the stream up to but not

252 * Returns a string of characters from the stream up to but not	240 /// including any character in 'characters' or EOF.

253 * including any character in 'characters' or EOF.

254 */

255 String charsUntil(String characters, [bool opposite = false]) {	241 String charsUntil(String characters, [bool opposite = false]) {

256 int start = _offset;	242 int start = _offset;

257 String c;	243 String c;

258 while ((c = peekChar()) != null && characters.contains(c) == opposite) {	244 while ((c = peekChar()) != null && characters.contains(c) == opposite) {

259 _offset++;	245 _offset++;

260 }	246 }

261	247

262 return new String.fromCharCodes(_chars.sublist(start, _offset));	248 return new String.fromCharCodes(_chars.sublist(start, _offset));

263 }	249 }

264	250

(...skipping 24 matching lines...) Expand all Loading...
289 case 0x08FFFE: case 0x08FFFF: case 0x09FFFE: case 0x09FFFF:	275 case 0x08FFFE: case 0x08FFFF: case 0x09FFFE: case 0x09FFFF:

290 case 0x0AFFFE: case 0x0AFFFF: case 0x0BFFFE: case 0x0BFFFF:	276 case 0x0AFFFE: case 0x0AFFFF: case 0x0BFFFE: case 0x0BFFFF:

291 case 0x0CFFFE: case 0x0CFFFF: case 0x0DFFFE: case 0x0DFFFF:	277 case 0x0CFFFE: case 0x0CFFFF: case 0x0DFFFE: case 0x0DFFFF:

292 case 0x0EFFFE: case 0x0EFFFF: case 0x0FFFFE: case 0x0FFFFF:	278 case 0x0EFFFE: case 0x0EFFFF: case 0x0FFFFE: case 0x0FFFFF:

293 case 0x10FFFE: case 0x10FFFF:	279 case 0x10FFFE: case 0x10FFFF:

294 return true;	280 return true;

295 }	281 }

296 return false;	282 return false;

297 }	283 }

298	284

299 /**	285 /// Return the python codec name corresponding to an encoding or null if the

300 * Return the python codec name corresponding to an encoding or null if the	286 /// string doesn't correspond to a valid encoding.

301 * string doesn't correspond to a valid encoding.

302 */

303 String codecName(String encoding) {	287 String codecName(String encoding) {

304 final asciiPunctuation = new RegExp(	288 final asciiPunctuation = new RegExp(

305 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]");	289 "[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]");

306	290

307 if (encoding == null) return null;	291 if (encoding == null) return null;

308 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();	292 var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();

309 return encodings[canonicalName];	293 return encodings[canonicalName];

310 }	294 }

OLD	NEW