Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(443)

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.h

Issue 678073002: Parse Sky entities according to the spec (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the 11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution. 12 * documentation and/or other materials provided with the distribution.
13 * 13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */ 25 */
26 26
27 #ifndef HTMLTokenizer_h 27 #ifndef HTMLTokenizer_h
28 #define HTMLTokenizer_h 28 #define HTMLTokenizer_h
29 29
30 #include "core/html/parser/HTMLEntityParser.h"
30 #include "core/html/parser/HTMLToken.h" 31 #include "core/html/parser/HTMLToken.h"
31 #include "core/html/parser/InputStreamPreprocessor.h" 32 #include "core/html/parser/InputStreamPreprocessor.h"
32 #include "platform/text/SegmentedString.h" 33 #include "platform/text/SegmentedString.h"
33 34
34 namespace blink { 35 namespace blink {
35 36
36 class HTMLTokenizer { 37 class HTMLTokenizer {
37 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); 38 WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
38 WTF_MAKE_FAST_ALLOCATED; 39 WTF_MAKE_FAST_ALLOCATED;
39 public: 40 public:
40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); } 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize r()); }
41 ~HTMLTokenizer(); 42 ~HTMLTokenizer();
42 43
43 void reset(); 44 void reset();
44 45
45 enum State { 46 enum State {
46 DataState, 47 DataState,
47 CharacterReferenceInDataState, 48 CharacterReferenceInDataState,
49 CharacterReferenceInAttributeValueState,
48 RAWTEXTState, 50 RAWTEXTState,
49 TagOpenState, 51 TagOpenState,
50 EndTagOpenState, 52 EndTagOpenState,
51 TagNameState, 53 TagNameState,
52 RAWTEXTLessThanSignState, 54 RAWTEXTLessThanSignState,
53 RAWTEXTEndTagOpenState, 55 RAWTEXTEndTagOpenState,
54 RAWTEXTEndTagNameState, 56 RAWTEXTEndTagNameState,
55 BeforeAttributeNameState, 57 BeforeAttributeNameState,
56 AttributeNameState, 58 AttributeNameState,
57 AfterAttributeNameState, 59 AfterAttributeNameState,
58 BeforeAttributeValueState, 60 BeforeAttributeValueState,
59 AttributeValueDoubleQuotedState, 61 AttributeValueDoubleQuotedState,
60 AttributeValueSingleQuotedState, 62 AttributeValueSingleQuotedState,
61 AttributeValueUnquotedState, 63 AttributeValueUnquotedState,
62 CharacterReferenceInAttributeValueState,
63 AfterAttributeValueQuotedState, 64 AfterAttributeValueQuotedState,
64 SelfClosingStartTagState, 65 SelfClosingStartTagState,
65 BogusCommentState, 66 BogusCommentState,
66 // The ContinueBogusCommentState is not in the HTML5 spec, but we use 67 // The ContinueBogusCommentState is not in the HTML5 spec, but we use
67 // it internally to keep track of whether we've started the bogus 68 // it internally to keep track of whether we've started the bogus
68 // comment token yet. 69 // comment token yet.
69 ContinueBogusCommentState, 70 ContinueBogusCommentState,
70 MarkupDeclarationOpenState, 71 MarkupDeclarationOpenState,
71 CommentStartState, 72 CommentStartState,
72 CommentStartDashState, 73 CommentStartDashState,
73 CommentState, 74 CommentState,
74 CommentEndDashState, 75 CommentEndDashState,
75 CommentEndState, 76 CommentEndState,
76 CommentEndBangState, 77 CommentEndBangState,
77 }; 78 };
78 79
79 // This function returns true if it emits a token. Otherwise, callers 80 // This function returns true if it emits a token. Otherwise, callers
80 // must provide the same (in progress) token on the next call (unless 81 // must provide the same (in progress) token on the next call (unless
81 // they call reset() first). 82 // they call reset() first).
82 bool nextToken(SegmentedString&, HTMLToken&); 83 bool nextToken(SegmentedString&, HTMLToken&);
83 84
84 State state() const { return m_state; } 85 State state() const { return m_state; }
85 void setState(State state) { m_state = state; } 86 void setState(State state) { m_state = state; }
86 87
87 private: 88 private:
88 HTMLTokenizer(); 89 HTMLTokenizer();
89 90
90 inline bool processEntity(SegmentedString&);
91
92 inline void parseError(); 91 inline void parseError();
93 92
94 inline void bufferCharacter(UChar character) 93 inline void bufferCharacter(UChar character)
95 { 94 {
96 ASSERT(character != kEndOfFileMarker); 95 ASSERT(character != kEndOfFileMarker);
97 m_token->ensureIsCharacterToken(); 96 m_token->ensureIsCharacterToken();
98 m_token->appendToCharacter(character); 97 m_token->appendToCharacter(character);
99 } 98 }
100 99
101 inline bool emitAndResumeIn(SegmentedString& source, State state) 100 inline bool emitAndResumeIn(SegmentedString& source, State state)
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 { 148 {
150 return m_token->type() == HTMLToken::Character; 149 return m_token->type() == HTMLToken::Character;
151 } 150 }
152 151
153 State m_state; 152 State m_state;
154 153
155 // m_token is owned by the caller. If nextToken is not on the stack, 154 // m_token is owned by the caller. If nextToken is not on the stack,
156 // this member might be pointing to unallocated memory. 155 // this member might be pointing to unallocated memory.
157 HTMLToken* m_token; 156 HTMLToken* m_token;
158 157
159 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-cha racter 158 State m_returnState;
160 UChar m_additionalAllowedCharacter;
161 159
162 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream 160 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu t-stream
163 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; 161 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
162 HTMLEntityParser m_entityParser;
164 163
165 Vector<UChar, 32> m_appropriateEndTagName; 164 Vector<UChar, 32> m_appropriateEndTagName;
166 165
167 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer 166 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
168 Vector<LChar, 32> m_temporaryBuffer; 167 Vector<LChar, 32> m_temporaryBuffer;
169 168
170 // We occationally want to emit both a character token and an end tag 169 // We occationally want to emit both a character token and an end tag
171 // token (e.g., when lexing script). We buffer the name of the end tag 170 // token (e.g., when lexing script). We buffer the name of the end tag
172 // token here so we remember it next time we re-enter the tokenizer. 171 // token here so we remember it next time we re-enter the tokenizer.
173 Vector<LChar, 32> m_bufferedEndTagName; 172 Vector<LChar, 32> m_bufferedEndTagName;
174 }; 173 };
175 174
176 } 175 }
177 176
178 #endif 177 #endif
OLDNEW
« no previous file with comments | « sky/engine/core/html/parser/HTMLEntityParser.cpp ('k') | sky/engine/core/html/parser/HTMLTokenizer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698