pkg/third_party/html5lib/lib/src/tokenizer.dart - Issue 157983005: pkg/third_party/html5lib: lots of cleanup

Unified Diff: pkg/third_party/html5lib/lib/src/tokenizer.dart

Issue 157983005: pkg/third_party/html5lib: lots of cleanup (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: bump version Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« pkg/third_party/html5lib/lib/src/constants.dart ('K') | « pkg/third_party/html5lib/lib/src/inputstream.dart ('k') | pkg/third_party/html5lib/lib/src/treebuilder.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: pkg/third_party/html5lib/lib/src/tokenizer.dart

diff --git a/pkg/third_party/html5lib/lib/src/tokenizer.dart b/pkg/third_party/html5lib/lib/src/tokenizer.dart

index 2c2ada79d5ab5fbabdc3bc0aa56f6a91717e51c0..057021b5ff1dcbedb776c4322c8780c345df979e 100644

--- a/pkg/third_party/html5lib/lib/src/tokenizer.dart

+++ b/pkg/third_party/html5lib/lib/src/tokenizer.dart

@@ -1,7 +1,6 @@

library tokenizer;

import 'dart:collection';

-import 'dart:math';

import 'package:html5lib/parser.dart' show HtmlParser;

import 'package:source_maps/span.dart' show Span, FileSpan;

import 'constants.dart';

@@ -15,7 +14,7 @@ import 'utils.dart';

// we had it implemented in Dart.

Map<String, List<String>> entitiesByFirstChar = (() {

var result = {};

- for (var k in entities.keys) {

+ for (var k in ENTITIES.keys) {

result.putIfAbsent(k[0], () => []).add(k);

}

return result;

@@ -203,7 +202,7 @@ class HtmlTokenizer implements Iterator<Token> {

var charAsInt = parseIntRadix(charStack.join(), radix);

// Certain characters get replaced with others

- var char = replacementCharacters[charAsInt];

+ var char = REPLACEMENT_CHARACTERS[charAsInt];

if (char != null) {

_addToken(new ParseErrorToken(

"illegal-codepoint-for-numeric-entity",

@@ -305,7 +304,7 @@ class HtmlTokenizer implements Iterator<Token> {

int entityLen;

for (entityLen = charStack.length - 1; entityLen > 1; entityLen--) {

var possibleEntityName = charStack.sublist(0, entityLen).join();

- if (entities.containsKey(possibleEntityName)) {

+ if (ENTITIES.containsKey(possibleEntityName)) {

entityName = possibleEntityName;

break;

}

@@ -323,7 +322,7 @@ class HtmlTokenizer implements Iterator<Token> {

stream.unget(charStack.removeLast());

output = "&${charStack.join()}";

} else {

- output = entities[entityName];

+ output = ENTITIES[entityName];

stream.unget(charStack.removeLast());

output = '${output}${slice(charStack, entityLen).join()}';

}

@@ -407,7 +406,7 @@ class HtmlTokenizer implements Iterator<Token> {

// state". At that point spaceCharacters are important so they are

// emitted separately.

_addToken(new SpaceCharactersToken(

- '${data}${stream.charsUntil(spaceCharacters, true)}'));

+ '${data}${stream.charsUntil(SPACE_CHARACTERS, true)}'));

// No need to update lastFourChars here, since the first space will

// have already been appended to lastFourChars and will have broken

// any  sequences

@@ -441,7 +440,7 @@ class HtmlTokenizer implements Iterator<Token> {

// state". At that point spaceCharacters are important so they are

// emitted separately.

_addToken(new SpaceCharactersToken(

- '${data}${stream.charsUntil(spaceCharacters, true)}'));

+ '${data}${stream.charsUntil(SPACE_CHARACTERS, true)}'));

} else {

var chars = stream.charsUntil("&<");

_addToken(new CharactersToken('${data}${chars}'));

@@ -998,7 +997,7 @@ class HtmlTokenizer implements Iterator<Token> {

bool beforeAttributeNameState() {

var data = stream.char();

if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

+ stream.charsUntil(SPACE_CHARACTERS, true);

} else if (isLetter(data)) {

_addAttribute(data);

state = attributeNameState;

@@ -1032,7 +1031,7 @@ class HtmlTokenizer implements Iterator<Token> {

state = beforeAttributeValueState;

} else if (isLetter(data)) {

_attributeName = '$_attributeName$data'

- '${stream.charsUntil(asciiLetters, true)}';

+ '${stream.charsUntil(ASCII_LETTERS, true)}';

leavingThisState = false;

} else if (data == ">") {

// XXX If we emit here the attributes are converted to a dict

@@ -1085,7 +1084,7 @@ class HtmlTokenizer implements Iterator<Token> {

bool afterAttributeNameState() {

var data = stream.char();

if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

+ stream.charsUntil(SPACE_CHARACTERS, true);

} else if (data == "=") {

state = beforeAttributeValueState;

} else if (data == ">") {

@@ -1116,7 +1115,7 @@ class HtmlTokenizer implements Iterator<Token> {

bool beforeAttributeValueState() {

var data = stream.char();

if (isWhitespace(data)) {

- stream.charsUntil(spaceCharacters, true);

+ stream.charsUntil(SPACE_CHARACTERS, true);

} else if (data == "\"") {

_markAttributeValueStart(0);

state = attributeValueDoubleQuotedState;

@@ -1217,7 +1216,7 @@ class HtmlTokenizer implements Iterator<Token> {

_attributeValue = '${_attributeValue}\uFFFD';

} else {

_attributeValue = '$_attributeValue$data'

- '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}';

+ '${stream.charsUntil("&>\"\'=<`$SPACE_CHARACTERS")}';

}

return true;

}