sky/engine/core/html/parser/HTMLTokenizer.h - Issue 678263002: Update tokenizer to match spec

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.h

Issue 678263002: Update tokenizer to match spec (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.	2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.

3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.	3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 29 matching lines...) Expand all Loading...
40 public:	40 public:

41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }	41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }

42 ~HTMLTokenizer();	42 ~HTMLTokenizer();

43	43

44 void reset();	44 void reset();

45	45

46 enum State {	46 enum State {

47 DataState,	47 DataState,

48 CharacterReferenceInDataState,	48 CharacterReferenceInDataState,

49 CharacterReferenceInAttributeValueState,	49 CharacterReferenceInAttributeValueState,

50 RAWTEXTState,	50 RawDataState,

	51 RawDataLessThanSignState,

	52 RawDataEndTagOpenState,

	53 RawDataEndTagNameState,

51 TagOpenState,	54 TagOpenState,

52 CloseTagState,	55 CloseTagState,

53 TagNameState,	56 TagNameState,

54 RAWTEXTLessThanSignState,

55 RAWTEXTEndTagOpenState,

56 RAWTEXTEndTagNameState,

57 BeforeAttributeNameState,	57 BeforeAttributeNameState,

58 AttributeNameState,	58 AttributeNameState,

59 AfterAttributeNameState,	59 AfterAttributeNameState,

60 BeforeAttributeValueState,	60 BeforeAttributeValueState,

61 AttributeValueDoubleQuotedState,	61 AttributeValueDoubleQuotedState,

62 AttributeValueSingleQuotedState,	62 AttributeValueSingleQuotedState,

63 AttributeValueUnquotedState,	63 AttributeValueUnquotedState,

64 AfterAttributeValueQuotedState,	64 VoidTagState,

65 SelfClosingStartTagState,

66 CommentStart1State,	65 CommentStart1State,

67 CommentStart2State,	66 CommentStart2State,

68 CommentState,	67 CommentState,

69 CommentEnd1State,	68 CommentEnd1State,

70 CommentEnd2State,	69 CommentEnd2State,

71 };	70 };

72	71

73 // This function returns true if it emits a token. Otherwise, callers	72 // This function returns true if it emits a token. Otherwise, callers

74 // must provide the same (in progress) token on the next call (unless	73 // must provide the same (in progress) token on the next call (unless

75 // they call reset() first).	74 // they call reset() first).

76 bool nextToken(SegmentedString&, HTMLToken&);	75 bool nextToken(SegmentedString&, HTMLToken&);

77	76

78 State state() const { return m_state; }	77 State state() const { return m_state; }

	78

79 void setState(State state) { m_state = state; }	79 void setState(State state) { m_state = state; }

80	80

81 private:	81 private:

82 HTMLTokenizer();	82 HTMLTokenizer();

83	83

84 inline void parseError();	84 inline void parseError();

85	85

86 inline void bufferCharacter(UChar character)	86 inline void bufferCharacter(UChar character)

87 {	87 {

88 ASSERT(character != kEndOfFileMarker);	88 ASSERT(character != kEndOfFileMarker);

(...skipping 25 matching lines...) Expand all Loading...
114 m_token->clear();	114 m_token->clear();

115 m_token->makeEndOfFile();	115 m_token->makeEndOfFile();

116 return true;	116 return true;

117 }	117 }

118	118

119 inline bool flushEmitAndResumeIn(SegmentedString&, State);	119 inline bool flushEmitAndResumeIn(SegmentedString&, State);

120	120

121 // Return whether we need to emit a character token before dealing with	121 // Return whether we need to emit a character token before dealing with

122 // the buffered end tag.	122 // the buffered end tag.

123 inline bool flushBufferedEndTag(SegmentedString&);	123 inline bool flushBufferedEndTag(SegmentedString&);

124 inline bool temporaryBufferIs(const String&);

125

126 // Sometimes we speculatively consume input characters and we don't

127 // know whether they represent end tags or RCDATA, etc. These

128 // functions help manage these state.

129 inline void addToPossibleEndTag(LChar cc);

130	124

131 inline void saveEndTagNameIfNeeded()	125 inline void saveEndTagNameIfNeeded()

132 {	126 {

133 ASSERT(m_token->type() != HTMLToken::Uninitialized);	127 ASSERT(m_token->type() != HTMLToken::Uninitialized);

134 if (m_token->type() == HTMLToken::StartTag)	128 if (m_token->type() == HTMLToken::StartTag)

135 m_appropriateEndTagName = m_token->name();	129 m_appropriateEndTagName = m_token->name();

136 }	130 }

137 inline bool isAppropriateEndTag();	131 inline bool isAppropriateEndTag();

138	132

139

140 inline bool haveBufferedCharacterToken()	133 inline bool haveBufferedCharacterToken()

141 {	134 {

142 return m_token->type() == HTMLToken::Character;	135 return m_token->type() == HTMLToken::Character;

143 }	136 }

144	137

145 State m_state;	138 State m_state;

146	139

147 // m_token is owned by the caller. If nextToken is not on the stack,	140 // m_token is owned by the caller. If nextToken is not on the stack,

148 // this member might be pointing to unallocated memory.	141 // this member might be pointing to unallocated memory.

149 HTMLToken* m_token;	142 HTMLToken* m_token;

150	143

151 State m_returnState;	144 State m_returnState;

152	145

153 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream	146 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream

154 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;	147 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;

155 HTMLEntityParser m_entityParser;	148 HTMLEntityParser m_entityParser;

156	149

157 Vector<UChar, 32> m_appropriateEndTagName;	150 Vector<UChar, 32> m_appropriateEndTagName;

158	151

159 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer	152 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer

160 Vector<LChar, 32> m_temporaryBuffer;	153 Vector<LChar, 32> m_temporaryBuffer;

161

162 // We occationally want to emit both a character token and an end tag

163 // token (e.g., when lexing script). We buffer the name of the end tag

164 // token here so we remember it next time we re-enter the tokenizer.

165 Vector<LChar, 32> m_bufferedEndTagName;

166 };	154 };

167	155

168 }	156 }

169	157

170 #endif	158 #endif

OLD	NEW

« no previous file with comments | « sky/engine/core/html/parser/HTMLToken.h ('k') | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »