Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(106)

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.h

Issue 678263002: Update tokenizer to match spec (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « sky/engine/core/html/parser/HTMLToken.h ('k') | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 29 matching lines...) Expand all
40 public: 40 public:
41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); } 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }
42 ~HTMLTokenizer(); 42 ~HTMLTokenizer();
43 43
44 void reset(); 44 void reset();
45 45
46 enum State { 46 enum State {
47 DataState, 47 DataState,
48 CharacterReferenceInDataState, 48 CharacterReferenceInDataState,
49 CharacterReferenceInAttributeValueState, 49 CharacterReferenceInAttributeValueState,
50 RAWTEXTState, 50 RawDataState,
51 RawDataLessThanSignState,
52 RawDataEndTagOpenState,
53 RawDataEndTagNameState,
51 TagOpenState, 54 TagOpenState,
52 CloseTagState, 55 CloseTagState,
53 TagNameState, 56 TagNameState,
54 RAWTEXTLessThanSignState,
55 RAWTEXTEndTagOpenState,
56 RAWTEXTEndTagNameState,
57 BeforeAttributeNameState, 57 BeforeAttributeNameState,
58 AttributeNameState, 58 AttributeNameState,
59 AfterAttributeNameState, 59 AfterAttributeNameState,
60 BeforeAttributeValueState, 60 BeforeAttributeValueState,
61 AttributeValueDoubleQuotedState, 61 AttributeValueDoubleQuotedState,
62 AttributeValueSingleQuotedState, 62 AttributeValueSingleQuotedState,
63 AttributeValueUnquotedState, 63 AttributeValueUnquotedState,
64 AfterAttributeValueQuotedState, 64 VoidTagState,
65 SelfClosingStartTagState,
66 CommentStart1State, 65 CommentStart1State,
67 CommentStart2State, 66 CommentStart2State,
68 CommentState, 67 CommentState,
69 CommentEnd1State, 68 CommentEnd1State,
70 CommentEnd2State, 69 CommentEnd2State,
71 }; 70 };
72 71
73 // This function returns true if it emits a token. Otherwise, callers 72 // This function returns true if it emits a token. Otherwise, callers
74 // must provide the same (in progress) token on the next call (unless 73 // must provide the same (in progress) token on the next call (unless
75 // they call reset() first). 74 // they call reset() first).
76 bool nextToken(SegmentedString&, HTMLToken&); 75 bool nextToken(SegmentedString&, HTMLToken&);
77 76
78 State state() const { return m_state; } 77 State state() const { return m_state; }
78
79 void setState(State state) { m_state = state; } 79 void setState(State state) { m_state = state; }
80 80
81 private: 81 private:
82 HTMLTokenizer(); 82 HTMLTokenizer();
83 83
84 inline void parseError(); 84 inline void parseError();
85 85
86 inline void bufferCharacter(UChar character) 86 inline void bufferCharacter(UChar character)
87 { 87 {
88 ASSERT(character != kEndOfFileMarker); 88 ASSERT(character != kEndOfFileMarker);
(...skipping 25 matching lines...) Expand all
114 m_token->clear(); 114 m_token->clear();
115 m_token->makeEndOfFile(); 115 m_token->makeEndOfFile();
116 return true; 116 return true;
117 } 117 }
118 118
119 inline bool flushEmitAndResumeIn(SegmentedString&, State); 119 inline bool flushEmitAndResumeIn(SegmentedString&, State);
120 120
121 // Return whether we need to emit a character token before dealing with 121 // Return whether we need to emit a character token before dealing with
122 // the buffered end tag. 122 // the buffered end tag.
123 inline bool flushBufferedEndTag(SegmentedString&); 123 inline bool flushBufferedEndTag(SegmentedString&);
124 inline bool temporaryBufferIs(const String&);
125
126 // Sometimes we speculatively consume input characters and we don't
127 // know whether they represent end tags or RCDATA, etc. These
128 // functions help manage these state.
129 inline void addToPossibleEndTag(LChar cc);
130 124
131 inline void saveEndTagNameIfNeeded() 125 inline void saveEndTagNameIfNeeded()
132 { 126 {
133 ASSERT(m_token->type() != HTMLToken::Uninitialized); 127 ASSERT(m_token->type() != HTMLToken::Uninitialized);
134 if (m_token->type() == HTMLToken::StartTag) 128 if (m_token->type() == HTMLToken::StartTag)
135 m_appropriateEndTagName = m_token->name(); 129 m_appropriateEndTagName = m_token->name();
136 } 130 }
137 inline bool isAppropriateEndTag(); 131 inline bool isAppropriateEndTag();
138 132
139
140 inline bool haveBufferedCharacterToken() 133 inline bool haveBufferedCharacterToken()
141 { 134 {
142 return m_token->type() == HTMLToken::Character; 135 return m_token->type() == HTMLToken::Character;
143 } 136 }
144 137
145 State m_state; 138 State m_state;
146 139
147 // m_token is owned by the caller. If nextToken is not on the stack, 140 // m_token is owned by the caller. If nextToken is not on the stack,
148 // this member might be pointing to unallocated memory. 141 // this member might be pointing to unallocated memory.
149 HTMLToken* m_token; 142 HTMLToken* m_token;
150 143
151 State m_returnState; 144 State m_returnState;
152 145
153 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream 146 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream
154 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; 147 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
155 HTMLEntityParser m_entityParser; 148 HTMLEntityParser m_entityParser;
156 149
157 Vector<UChar, 32> m_appropriateEndTagName; 150 Vector<UChar, 32> m_appropriateEndTagName;
158 151
159 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer 152 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
160 Vector<LChar, 32> m_temporaryBuffer; 153 Vector<LChar, 32> m_temporaryBuffer;
161
162 // We occationally want to emit both a character token and an end tag
163 // token (e.g., when lexing script). We buffer the name of the end tag
164 // token here so we remember it next time we re-enter the tokenizer.
165 Vector<LChar, 32> m_bufferedEndTagName;
166 }; 154 };
167 155
168 } 156 }
169 157
170 #endif 158 #endif
OLDNEW
« no previous file with comments | « sky/engine/core/html/parser/HTMLToken.h ('k') | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698