pkg/third_party/html5lib/lib/src/tokenizer.dart - Issue 178843003: [html5lib] triple slash comment style

Side by Side Diff: pkg/third_party/html5lib/lib/src/tokenizer.dart

Issue 178843003: [html5lib] triple slash comment style (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: remove extra check Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 library tokenizer;	1 library tokenizer;

2	2

3 import 'dart:collection';	3 import 'dart:collection';

4 import 'package:html5lib/parser.dart' show HtmlParser;	4 import 'package:html5lib/parser.dart' show HtmlParser;

5 import 'package:source_maps/span.dart' show Span, FileSpan;	5 import 'package:source_maps/span.dart' show Span, FileSpan;

6 import 'constants.dart';	6 import 'constants.dart';

7 import 'inputstream.dart';	7 import 'inputstream.dart';

8 import 'token.dart';	8 import 'token.dart';

9 import 'utils.dart';	9 import 'utils.dart';

10	10

11 // Group entities by their first character, for faster lookups	11 // Group entities by their first character, for faster lookups

12	12

13 // TODO(jmesserly): we could use a better data structure here like a trie, if	13 // TODO(jmesserly): we could use a better data structure here like a trie, if

14 // we had it implemented in Dart.	14 // we had it implemented in Dart.

15 Map<String, List<String>> entitiesByFirstChar = (() {	15 Map<String, List<String>> entitiesByFirstChar = (() {

16 var result = {};	16 var result = {};

17 for (var k in entities.keys) {	17 for (var k in entities.keys) {

18 result.putIfAbsent(k[0], () => []).add(k);	18 result.putIfAbsent(k[0], () => []).add(k);

19 }	19 }

20 return result;	20 return result;

21 })();	21 })();

22	22

23 // TODO(jmesserly): lots of ways to make this faster:	23 // TODO(jmesserly): lots of ways to make this faster:

24 // - use char codes everywhere instead of 1-char strings	24 // - use char codes everywhere instead of 1-char strings

25 // - use switch instead of contains, indexOf	25 // - use switch instead of contains, indexOf

26 // - use switch instead of the sequential if tests	26 // - use switch instead of the sequential if tests

27 // - avoid string concat	27 // - avoid string concat

28	28

29 /**	29 /// This class takes care of tokenizing HTML.

30 * This class takes care of tokenizing HTML.

31 */

32 class HtmlTokenizer implements Iterator<Token> {	30 class HtmlTokenizer implements Iterator<Token> {

33 // TODO(jmesserly): a lot of these could be made private	31 // TODO(jmesserly): a lot of these could be made private

34	32

35 final HtmlInputStream stream;	33 final HtmlInputStream stream;

36	34

37 final bool lowercaseElementName;	35 final bool lowercaseElementName;

38	36

39 final bool lowercaseAttrName;	37 final bool lowercaseAttrName;

40	38

41 /** True to generate spans in for [Token.span]. */	39 /// True to generate spans in for [Token.span].

42 final bool generateSpans;	40 final bool generateSpans;

43	41

44 /** True to generate spans for attributes. */	42 /// True to generate spans for attributes.

45 final bool attributeSpans;	43 final bool attributeSpans;

46	44

47 /**	45 /// This reference to the parser is used for correct CDATA handling.

48 * This reference to the parser is used for correct CDATA handling.	46 /// The [HtmlParser] will set this at construction time.

49 * The [HtmlParser] will set this at construction time.

50 */

51 HtmlParser parser;	47 HtmlParser parser;

52	48

53 final Queue<Token> tokenQueue;	49 final Queue<Token> tokenQueue;

54	50

55 /** Holds the token that is currently being processed. */	51 /// Holds the token that is currently being processed.

56 Token currentToken;	52 Token currentToken;

57	53

58 /**	54 /// Holds a reference to the method to be invoked for the next parser state.

59 * Holds a reference to the method to be invoked for the next parser state.

60 */

61 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode	55 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode

62 // bug prevents us from doing that. See http://dartbug.com/12465	56 // bug prevents us from doing that. See http://dartbug.com/12465

63 Function state;	57 Function state;

64	58

65 String temporaryBuffer;	59 String temporaryBuffer;

66	60

67 int _lastOffset;	61 int _lastOffset;

68	62

69 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add	63 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add

70 // an item until it's ready. But the code doesn't have a clear notion of when	64 // an item until it's ready. But the code doesn't have a clear notion of when

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
117 // Note: we could track the name span here, if we need it.	111 // Note: we could track the name span here, if we need it.

118 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset);	112 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset);

119	113

120 void _addAttribute(String name) {	114 void _addAttribute(String name) {

121 if (_attributes == null) _attributes = [];	115 if (_attributes == null) _attributes = [];

122 var attr = new TagAttribute(name);	116 var attr = new TagAttribute(name);

123 _attributes.add(attr);	117 _attributes.add(attr);

124 if (attributeSpans) attr.start = stream.position - name.length;	118 if (attributeSpans) attr.start = stream.position - name.length;

125 }	119 }

126	120

127 /**	121 /// This is where the magic happens.

128 * This is where the magic happens.	122 ///

129 *	123 /// We do our usually processing through the states and when we have a token

130 * We do our usually processing through the states and when we have a token	124 /// to return we yield the token which pauses processing until the next token

131 * to return we yield the token which pauses processing until the next token	125 /// is requested.

132 * is requested.

133 */

134 bool moveNext() {	126 bool moveNext() {

135 // Start processing. When EOF is reached state will return false;	127 // Start processing. When EOF is reached state will return false;

136 // instead of true and the loop will terminate.	128 // instead of true and the loop will terminate.

137 while (stream.errors.length == 0 && tokenQueue.length == 0) {	129 while (stream.errors.length == 0 && tokenQueue.length == 0) {

138 if (!state()) {	130 if (!state()) {

139 _current = null;	131 _current = null;

140 return false;	132 return false;

141 }	133 }

142 }	134 }

143 if (stream.errors.length > 0) {	135 if (stream.errors.length > 0) {

144 _current = new ParseErrorToken(stream.errors.removeFirst());	136 _current = new ParseErrorToken(stream.errors.removeFirst());

145 } else {	137 } else {

146 assert (tokenQueue.length > 0);	138 assert (tokenQueue.length > 0);

147 _current = tokenQueue.removeFirst();	139 _current = tokenQueue.removeFirst();

148 }	140 }

149 return true;	141 return true;

150 }	142 }

151	143

152 /**	144 /// Resets the tokenizer state. Calling this does not reset the [stream] or

153 * Resets the tokenizer state. Calling this does not reset the [stream] or	145 /// the [parser].

154 * the [parser].

155 */

156 void reset() {	146 void reset() {

157 _lastOffset = 0;	147 _lastOffset = 0;

158 tokenQueue.clear();	148 tokenQueue.clear();

159 currentToken = null;	149 currentToken = null;

160 temporaryBuffer = null;	150 temporaryBuffer = null;

161 _attributes = null;	151 _attributes = null;

162 _attributeNames = null;	152 _attributeNames = null;

163 state = dataState;	153 state = dataState;

164 }	154 }

165	155

166 /** Adds a token to the queue. Sets the span if needed. */	156 /// Adds a token to the queue. Sets the span if needed.

167 void _addToken(Token token) {	157 void _addToken(Token token) {

168 if (generateSpans && token.span == null) {	158 if (generateSpans && token.span == null) {

169 int offset = stream.position;	159 int offset = stream.position;

170 token.span = new FileSpan(stream.fileInfo, _lastOffset, offset);	160 token.span = new FileSpan(stream.fileInfo, _lastOffset, offset);

171 if (token is! ParseErrorToken) {	161 if (token is! ParseErrorToken) {

172 _lastOffset = offset;	162 _lastOffset = offset;

173 }	163 }

174 }	164 }

175 tokenQueue.add(token);	165 tokenQueue.add(token);

176 }	166 }

177	167

178 /**	168 /// This function returns either U+FFFD or the character based on the

179 * This function returns either U+FFFD or the character based on the	169 /// decimal or hexadecimal representation. It also discards ";" if present.

180 * decimal or hexadecimal representation. It also discards ";" if present.	170 /// If not present it will add a [ParseErrorToken].

181 * If not present it will add a [ParseErrorToken].

182 */

183 String consumeNumberEntity(bool isHex) {	171 String consumeNumberEntity(bool isHex) {

184 var allowed = isDigit;	172 var allowed = isDigit;

185 var radix = 10;	173 var radix = 10;

186 if (isHex) {	174 if (isHex) {

187 allowed = isHexDigit;	175 allowed = isHexDigit;

188 radix = 16;	176 radix = 16;

189 }	177 }

190	178

191 var charStack = [];	179 var charStack = [];

192	180

(...skipping 145 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
338 var token;	326 var token;

339 if (isWhitespace(output)) {	327 if (isWhitespace(output)) {

340 token = new SpaceCharactersToken(output);	328 token = new SpaceCharactersToken(output);

341 } else {	329 } else {

342 token = new CharactersToken(output);	330 token = new CharactersToken(output);

343 }	331 }

344 _addToken(token);	332 _addToken(token);

345 }	333 }

346 }	334 }

347	335

348 /** This method replaces the need for "entityInAttributeValueState". */	336 /// This method replaces the need for "entityInAttributeValueState".

349 void processEntityInAttribute(String allowedChar) {	337 void processEntityInAttribute(String allowedChar) {

350 consumeEntity(allowedChar: allowedChar, fromAttribute: true);	338 consumeEntity(allowedChar: allowedChar, fromAttribute: true);

351 }	339 }

352	340

353 /**	341 /// This method is a generic handler for emitting the tags. It also sets

354 * This method is a generic handler for emitting the tags. It also sets	342 /// the state to "data" because that's what's needed after a token has been

355 * the state to "data" because that's what's needed after a token has been	343 /// emitted.

356 * emitted.

357 */

358 void emitCurrentToken() {	344 void emitCurrentToken() {

359 var token = currentToken;	345 var token = currentToken;

360 // Add token to the queue to be yielded	346 // Add token to the queue to be yielded

361 if (token is TagToken) {	347 if (token is TagToken) {

362 if (lowercaseElementName) {	348 if (lowercaseElementName) {

363 token.name = asciiUpper2Lower(token.name);	349 token.name = asciiUpper2Lower(token.name);

364 }	350 }

365 if (token is EndTagToken) {	351 if (token is EndTagToken) {

366 if (_attributes != null) {	352 if (_attributes != null) {

367 _addToken(new ParseErrorToken("attributes-in-end-tag"));	353 _addToken(new ParseErrorToken("attributes-in-end-tag"));

(...skipping 1524 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1892 }	1878 }

1893	1879

1894 if (data.length > 0) {	1880 if (data.length > 0) {

1895 _addToken(new CharactersToken(data.join()));	1881 _addToken(new CharactersToken(data.join()));

1896 }	1882 }

1897 state = dataState;	1883 state = dataState;

1898 return true;	1884 return true;

1899 }	1885 }

1900 }	1886 }

1901	1887

OLD	NEW

« pkg/third_party/html5lib/lib/dom.dart ('K') | « pkg/third_party/html5lib/lib/src/token.dart ('k') | pkg/third_party/html5lib/lib/src/treebuilder.dart » ('j') | no next file with comments »