| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| (...skipping 29 matching lines...) Expand all Loading... |
| 40 public: | 40 public: |
| 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } | 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } |
| 42 ~HTMLTokenizer(); | 42 ~HTMLTokenizer(); |
| 43 | 43 |
| 44 void reset(); | 44 void reset(); |
| 45 | 45 |
| 46 enum State { | 46 enum State { |
| 47 DataState, | 47 DataState, |
| 48 CharacterReferenceInDataState, | 48 CharacterReferenceInDataState, |
| 49 CharacterReferenceInAttributeValueState, | 49 CharacterReferenceInAttributeValueState, |
| 50 RAWTEXTState, | 50 RawDataState, |
| 51 RawDataLessThanSignState, |
| 52 RawDataEndTagOpenState, |
| 53 RawDataEndTagNameState, |
| 51 TagOpenState, | 54 TagOpenState, |
| 52 CloseTagState, | 55 CloseTagState, |
| 53 TagNameState, | 56 TagNameState, |
| 54 RAWTEXTLessThanSignState, | |
| 55 RAWTEXTEndTagOpenState, | |
| 56 RAWTEXTEndTagNameState, | |
| 57 BeforeAttributeNameState, | 57 BeforeAttributeNameState, |
| 58 AttributeNameState, | 58 AttributeNameState, |
| 59 AfterAttributeNameState, | 59 AfterAttributeNameState, |
| 60 BeforeAttributeValueState, | 60 BeforeAttributeValueState, |
| 61 AttributeValueDoubleQuotedState, | 61 AttributeValueDoubleQuotedState, |
| 62 AttributeValueSingleQuotedState, | 62 AttributeValueSingleQuotedState, |
| 63 AttributeValueUnquotedState, | 63 AttributeValueUnquotedState, |
| 64 AfterAttributeValueQuotedState, | 64 VoidTagState, |
| 65 SelfClosingStartTagState, | |
| 66 CommentStart1State, | 65 CommentStart1State, |
| 67 CommentStart2State, | 66 CommentStart2State, |
| 68 CommentState, | 67 CommentState, |
| 69 CommentEnd1State, | 68 CommentEnd1State, |
| 70 CommentEnd2State, | 69 CommentEnd2State, |
| 71 }; | 70 }; |
| 72 | 71 |
| 73 // This function returns true if it emits a token. Otherwise, callers | 72 // This function returns true if it emits a token. Otherwise, callers |
| 74 // must provide the same (in progress) token on the next call (unless | 73 // must provide the same (in progress) token on the next call (unless |
| 75 // they call reset() first). | 74 // they call reset() first). |
| 76 bool nextToken(SegmentedString&, HTMLToken&); | 75 bool nextToken(SegmentedString&, HTMLToken&); |
| 77 | 76 |
| 78 State state() const { return m_state; } | 77 State state() const { return m_state; } |
| 78 |
| 79 void setState(State state) { m_state = state; } | 79 void setState(State state) { m_state = state; } |
| 80 | 80 |
| 81 private: | 81 private: |
| 82 HTMLTokenizer(); | 82 HTMLTokenizer(); |
| 83 | 83 |
| 84 inline void parseError(); | 84 inline void parseError(); |
| 85 | 85 |
| 86 inline void bufferCharacter(UChar character) | 86 inline void bufferCharacter(UChar character) |
| 87 { | 87 { |
| 88 ASSERT(character != kEndOfFileMarker); | 88 ASSERT(character != kEndOfFileMarker); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 114 m_token->clear(); | 114 m_token->clear(); |
| 115 m_token->makeEndOfFile(); | 115 m_token->makeEndOfFile(); |
| 116 return true; | 116 return true; |
| 117 } | 117 } |
| 118 | 118 |
| 119 inline bool flushEmitAndResumeIn(SegmentedString&, State); | 119 inline bool flushEmitAndResumeIn(SegmentedString&, State); |
| 120 | 120 |
| 121 // Return whether we need to emit a character token before dealing with | 121 // Return whether we need to emit a character token before dealing with |
| 122 // the buffered end tag. | 122 // the buffered end tag. |
| 123 inline bool flushBufferedEndTag(SegmentedString&); | 123 inline bool flushBufferedEndTag(SegmentedString&); |
| 124 inline bool temporaryBufferIs(const String&); | |
| 125 | |
| 126 // Sometimes we speculatively consume input characters and we don't | |
| 127 // know whether they represent end tags or RCDATA, etc. These | |
| 128 // functions help manage these state. | |
| 129 inline void addToPossibleEndTag(LChar cc); | |
| 130 | 124 |
| 131 inline void saveEndTagNameIfNeeded() | 125 inline void saveEndTagNameIfNeeded() |
| 132 { | 126 { |
| 133 ASSERT(m_token->type() != HTMLToken::Uninitialized); | 127 ASSERT(m_token->type() != HTMLToken::Uninitialized); |
| 134 if (m_token->type() == HTMLToken::StartTag) | 128 if (m_token->type() == HTMLToken::StartTag) |
| 135 m_appropriateEndTagName = m_token->name(); | 129 m_appropriateEndTagName = m_token->name(); |
| 136 } | 130 } |
| 137 inline bool isAppropriateEndTag(); | 131 inline bool isAppropriateEndTag(); |
| 138 | 132 |
| 139 | |
| 140 inline bool haveBufferedCharacterToken() | 133 inline bool haveBufferedCharacterToken() |
| 141 { | 134 { |
| 142 return m_token->type() == HTMLToken::Character; | 135 return m_token->type() == HTMLToken::Character; |
| 143 } | 136 } |
| 144 | 137 |
| 145 State m_state; | 138 State m_state; |
| 146 | 139 |
| 147 // m_token is owned by the caller. If nextToken is not on the stack, | 140 // m_token is owned by the caller. If nextToken is not on the stack, |
| 148 // this member might be pointing to unallocated memory. | 141 // this member might be pointing to unallocated memory. |
| 149 HTMLToken* m_token; | 142 HTMLToken* m_token; |
| 150 | 143 |
| 151 State m_returnState; | 144 State m_returnState; |
| 152 | 145 |
| 153 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream | 146 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream |
| 154 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; | 147 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; |
| 155 HTMLEntityParser m_entityParser; | 148 HTMLEntityParser m_entityParser; |
| 156 | 149 |
| 157 Vector<UChar, 32> m_appropriateEndTagName; | 150 Vector<UChar, 32> m_appropriateEndTagName; |
| 158 | 151 |
| 159 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | 152 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer |
| 160 Vector<LChar, 32> m_temporaryBuffer; | 153 Vector<LChar, 32> m_temporaryBuffer; |
| 161 | |
| 162 // We occationally want to emit both a character token and an end tag | |
| 163 // token (e.g., when lexing script). We buffer the name of the end tag | |
| 164 // token here so we remember it next time we re-enter the tokenizer. | |
| 165 Vector<LChar, 32> m_bufferedEndTagName; | |
| 166 }; | 154 }; |
| 167 | 155 |
| 168 } | 156 } |
| 169 | 157 |
| 170 #endif | 158 #endif |
| OLD | NEW |