| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 bool AtomicHTMLToken::usesName() const | 55 bool AtomicHTMLToken::usesName() const |
| 56 { | 56 { |
| 57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; | 57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; |
| 58 } | 58 } |
| 59 | 59 |
| 60 bool AtomicHTMLToken::usesAttributes() const | 60 bool AtomicHTMLToken::usesAttributes() const |
| 61 { | 61 { |
| 62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; | 62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; |
| 63 } | 63 } |
| 64 | 64 |
| 65 static inline UChar toLowerCase(UChar cc) | |
| 66 { | |
| 67 ASSERT(isASCIIUpper(cc)); | |
| 68 const int lowerCaseOffset = 0x20; | |
| 69 return cc + lowerCaseOffset; | |
| 70 } | |
| 71 | |
| 72 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const Str
ing& string) | |
| 73 { | |
| 74 if (vector.size() != string.length()) | |
| 75 return false; | |
| 76 | |
| 77 if (!string.length()) | |
| 78 return true; | |
| 79 | |
| 80 return equal(string.impl(), vector.data(), vector.size()); | |
| 81 } | |
| 82 | |
| 83 static inline bool isEndTagBufferingState(HTMLTokenizer::State state) | 65 static inline bool isEndTagBufferingState(HTMLTokenizer::State state) |
| 84 { | 66 { |
| 85 switch (state) { | 67 return state == HTMLTokenizer::RawDataEndTagOpenState || state == HTMLTokeni
zer::RawDataEndTagNameState; |
| 86 case HTMLTokenizer::RAWTEXTEndTagOpenState: | |
| 87 case HTMLTokenizer::RAWTEXTEndTagNameState: | |
| 88 return true; | |
| 89 default: | |
| 90 return false; | |
| 91 } | |
| 92 } | 68 } |
| 93 | 69 |
| 94 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) | 70 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) |
| 95 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) | 71 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) |
| 96 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) | 72 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) |
| 97 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) | 73 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) |
| 98 | 74 |
| 99 HTMLTokenizer::HTMLTokenizer() | 75 HTMLTokenizer::HTMLTokenizer() |
| 100 : m_inputStreamPreprocessor(this) | 76 : m_inputStreamPreprocessor(this) |
| 101 { | 77 { |
| 102 reset(); | 78 reset(); |
| 103 } | 79 } |
| 104 | 80 |
| 105 HTMLTokenizer::~HTMLTokenizer() | 81 HTMLTokenizer::~HTMLTokenizer() |
| 106 { | 82 { |
| 107 } | 83 } |
| 108 | 84 |
| 109 void HTMLTokenizer::reset() | 85 void HTMLTokenizer::reset() |
| 110 { | 86 { |
| 111 m_state = HTMLTokenizer::DataState; | 87 m_state = HTMLTokenizer::DataState; |
| 112 m_token = 0; | 88 m_token = 0; |
| 113 } | 89 } |
| 114 | 90 |
| 115 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) | 91 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) |
| 116 { | 92 { |
| 117 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok
en::Uninitialized); | 93 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok
en::Uninitialized); |
| 118 source.advanceAndUpdateLineNumber(); | 94 source.advanceAndUpdateLineNumber(); |
| 119 if (m_token->type() == HTMLToken::Character) | 95 if (m_token->type() == HTMLToken::Character) |
| 120 return true; | 96 return true; |
| 121 m_token->beginEndTag(m_bufferedEndTagName); | 97 m_token->beginEndTag(m_temporaryBuffer); |
| 122 m_bufferedEndTagName.clear(); | |
| 123 m_appropriateEndTagName.clear(); | 98 m_appropriateEndTagName.clear(); |
| 124 m_temporaryBuffer.clear(); | 99 m_temporaryBuffer.clear(); |
| 125 return false; | 100 return false; |
| 126 } | 101 } |
| 127 | 102 |
| 128 #define FLUSH_AND_ADVANCE_TO(stateName) \ | 103 #define FLUSH_AND_ADVANCE_TO(stateName) \ |
| 129 do { \ | 104 do { \ |
| 130 m_state = HTMLTokenizer::stateName; \ | 105 m_state = HTMLTokenizer::stateName; \ |
| 131 if (flushBufferedEndTag(source)) \ | 106 if (flushBufferedEndTag(source)) \ |
| 132 return true; \ | 107 return true; \ |
| (...skipping 11 matching lines...) Expand all Loading... |
| 144 return true; | 119 return true; |
| 145 } | 120 } |
| 146 | 121 |
| 147 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) | 122 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
| 148 { | 123 { |
| 149 // If we have a token in progress, then we're supposed to be called back | 124 // If we have a token in progress, then we're supposed to be called back |
| 150 // with the same token so we can finish it. | 125 // with the same token so we can finish it. |
| 151 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial
ized); | 126 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial
ized); |
| 152 m_token = &token; | 127 m_token = &token; |
| 153 | 128 |
| 154 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { | 129 if (!m_temporaryBuffer.isEmpty() && !isEndTagBufferingState(m_state)) { |
| 155 // FIXME: This should call flushBufferedEndTag(). | 130 // FIXME: This should call flushBufferedEndTag(). |
| 156 // We started an end tag during our last iteration. | 131 // We started an end tag during our last iteration. |
| 157 m_token->beginEndTag(m_bufferedEndTagName); | 132 m_token->beginEndTag(m_temporaryBuffer); |
| 158 m_bufferedEndTagName.clear(); | |
| 159 m_appropriateEndTagName.clear(); | 133 m_appropriateEndTagName.clear(); |
| 160 m_temporaryBuffer.clear(); | 134 m_temporaryBuffer.clear(); |
| 161 if (m_state == HTMLTokenizer::DataState) { | 135 if (m_state == HTMLTokenizer::DataState) { |
| 162 // We're back in the data state, so we must be done with the tag. | 136 // We're back in the data state, so we must be done with the tag. |
| 163 return true; | 137 return true; |
| 164 } | 138 } |
| 165 } | 139 } |
| 166 | 140 |
| 167 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) | 141 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
| 168 return haveBufferedCharacterToken(); | 142 return haveBufferedCharacterToken(); |
| 169 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 143 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
| 170 | 144 |
| 171 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 | 145 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
| 172 switch (m_state) { | 146 switch (m_state) { |
| 173 HTML_BEGIN_STATE(DataState) { | 147 HTML_BEGIN_STATE(DataState) { |
| 174 if (cc == '&') { | 148 if (cc == '&') { |
| 175 m_returnState = DataState; | 149 m_returnState = DataState; |
| 176 m_entityParser.reset(); | 150 m_entityParser.reset(); |
| 177 HTML_ADVANCE_TO(CharacterReferenceInDataState); | 151 HTML_ADVANCE_TO(CharacterReferenceInDataState); |
| 178 } else if (cc == '<') { | 152 } else if (cc == '<') { |
| 179 if (m_token->type() == HTMLToken::Character) { | 153 if (m_token->type() == HTMLToken::Character) { |
| 180 // We have a bunch of character tokens queued up that we | 154 // We have a bunch of character tokens queued up that we |
| 181 // are emitting lazily here. | 155 // are emitting lazily here. |
| 182 return true; | 156 return true; |
| 183 } | 157 } |
| 184 HTML_ADVANCE_TO(TagOpenState); | 158 HTML_ADVANCE_TO(TagOpenState); |
| 185 } else if (cc == kEndOfFileMarker) | 159 } else if (cc == kEndOfFileMarker) { |
| 186 return emitEndOfFile(source); | 160 return emitEndOfFile(source); |
| 187 else { | 161 } else { |
| 188 bufferCharacter(cc); | 162 bufferCharacter(cc); |
| 189 HTML_ADVANCE_TO(DataState); | 163 HTML_ADVANCE_TO(DataState); |
| 190 } | 164 } |
| 191 } | 165 } |
| 192 END_STATE() | 166 END_STATE() |
| 193 | 167 |
| 194 HTML_BEGIN_STATE(CharacterReferenceInDataState) { | 168 HTML_BEGIN_STATE(CharacterReferenceInDataState) { |
| 195 if (!m_entityParser.parse(source)) | 169 if (!m_entityParser.parse(source)) |
| 196 return haveBufferedCharacterToken(); | 170 return haveBufferedCharacterToken(); |
| 197 for (const UChar& entityCharacter : m_entityParser.result()) | 171 for (const UChar& entityCharacter : m_entityParser.result()) |
| (...skipping 15 matching lines...) Expand all Loading... |
| 213 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); | 187 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
| 214 else if (m_returnState == AttributeValueSingleQuotedState) | 188 else if (m_returnState == AttributeValueSingleQuotedState) |
| 215 HTML_SWITCH_TO(AttributeValueSingleQuotedState); | 189 HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
| 216 else if (m_returnState == AttributeValueUnquotedState) | 190 else if (m_returnState == AttributeValueUnquotedState) |
| 217 HTML_SWITCH_TO(AttributeValueUnquotedState); | 191 HTML_SWITCH_TO(AttributeValueUnquotedState); |
| 218 else | 192 else |
| 219 ASSERT_NOT_REACHED(); | 193 ASSERT_NOT_REACHED(); |
| 220 } | 194 } |
| 221 END_STATE() | 195 END_STATE() |
| 222 | 196 |
| 223 HTML_BEGIN_STATE(RAWTEXTState) { | 197 HTML_BEGIN_STATE(RawDataState) { |
| 224 if (cc == '<') | 198 if (cc == '<') { |
| 225 HTML_ADVANCE_TO(RAWTEXTLessThanSignState); | 199 HTML_ADVANCE_TO(RawDataLessThanSignState); |
| 226 else if (cc == kEndOfFileMarker) | 200 } else { |
| 227 return emitEndOfFile(source); | |
| 228 else { | |
| 229 bufferCharacter(cc); | 201 bufferCharacter(cc); |
| 230 HTML_ADVANCE_TO(RAWTEXTState); | 202 HTML_ADVANCE_TO(RawDataState); |
| 203 } |
| 204 } |
| 205 END_STATE() |
| 206 |
| 207 HTML_BEGIN_STATE(RawDataLessThanSignState) { |
| 208 if (cc == '/') { |
| 209 m_temporaryBuffer.clear(); |
| 210 HTML_ADVANCE_TO(RawDataEndTagOpenState); |
| 211 } else { |
| 212 bufferCharacter('<'); |
| 213 HTML_RECONSUME_IN(RawDataState); |
| 214 } |
| 215 } |
| 216 END_STATE() |
| 217 |
| 218 HTML_BEGIN_STATE(RawDataEndTagOpenState) { |
| 219 if (isASCIILower(cc)) { |
| 220 m_temporaryBuffer.append(static_cast<LChar>(cc)); |
| 221 HTML_ADVANCE_TO(RawDataEndTagNameState); |
| 222 } else { |
| 223 bufferCharacter('<'); |
| 224 bufferCharacter('/'); |
| 225 HTML_RECONSUME_IN(RawDataState); |
| 226 } |
| 227 } |
| 228 END_STATE() |
| 229 |
| 230 HTML_BEGIN_STATE(RawDataEndTagNameState) { |
| 231 if (isASCIILower(cc)) { |
| 232 m_temporaryBuffer.append(static_cast<LChar>(cc)); |
| 233 HTML_ADVANCE_TO(RawDataEndTagNameState); |
| 234 } else { |
| 235 if (isTokenizerWhitespace(cc)) { |
| 236 if (isAppropriateEndTag()) |
| 237 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
| 238 } else if (cc == '/') { |
| 239 if (isAppropriateEndTag()) |
| 240 FLUSH_AND_ADVANCE_TO(VoidTagState); |
| 241 } else if (cc == '>') { |
| 242 if (isAppropriateEndTag()) |
| 243 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState
); |
| 244 } |
| 245 bufferCharacter('<'); |
| 246 bufferCharacter('/'); |
| 247 m_token->appendToCharacter(m_temporaryBuffer); |
| 248 m_temporaryBuffer.clear(); |
| 249 HTML_RECONSUME_IN(RawDataState); |
| 231 } | 250 } |
| 232 } | 251 } |
| 233 END_STATE() | 252 END_STATE() |
| 234 | 253 |
| 235 HTML_BEGIN_STATE(TagOpenState) { | 254 HTML_BEGIN_STATE(TagOpenState) { |
| 236 if (cc == '!') | 255 if (cc == '!') { |
| 237 HTML_ADVANCE_TO(CommentStart1State); | 256 HTML_ADVANCE_TO(CommentStart1State); |
| 238 else if (cc == '/') | 257 } else if (cc == '/') { |
| 239 HTML_ADVANCE_TO(CloseTagState); | 258 HTML_ADVANCE_TO(CloseTagState); |
| 240 else if (isASCIIUpper(cc)) { | 259 } else if (isTokenizerTagName(cc)) { |
| 241 m_token->beginStartTag(toLowerCase(cc)); | 260 m_token->beginStartTag(static_cast<LChar>(cc)); |
| 242 HTML_ADVANCE_TO(TagNameState); | |
| 243 } else if (isASCIILower(cc)) { | |
| 244 m_token->beginStartTag(cc); | |
| 245 HTML_ADVANCE_TO(TagNameState); | 261 HTML_ADVANCE_TO(TagNameState); |
| 246 } else { | 262 } else { |
| 247 parseError(); | |
| 248 bufferCharacter('<'); | 263 bufferCharacter('<'); |
| 249 HTML_RECONSUME_IN(DataState); | 264 HTML_RECONSUME_IN(DataState); |
| 250 } | 265 } |
| 251 } | 266 } |
| 252 END_STATE() | 267 END_STATE() |
| 253 | 268 |
| 254 HTML_BEGIN_STATE(CloseTagState) { | 269 HTML_BEGIN_STATE(CloseTagState) { |
| 255 if (isASCIIUpper(cc)) { | 270 if (isTokenizerTagName(cc)) { |
| 256 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); | |
| 257 m_appropriateEndTagName.clear(); | |
| 258 HTML_ADVANCE_TO(TagNameState); | |
| 259 } else if (isASCIILower(cc)) { | |
| 260 m_token->beginEndTag(static_cast<LChar>(cc)); | 271 m_token->beginEndTag(static_cast<LChar>(cc)); |
| 261 m_appropriateEndTagName.clear(); | |
| 262 HTML_ADVANCE_TO(TagNameState); | 272 HTML_ADVANCE_TO(TagNameState); |
| 263 } else if (cc == '>') { | 273 } else if (cc == '>') { |
| 264 bufferCharacter('<'); | 274 bufferCharacter('<'); |
| 265 bufferCharacter('/'); | 275 bufferCharacter('/'); |
| 266 bufferCharacter('>'); | 276 bufferCharacter('>'); |
| 267 HTML_ADVANCE_TO(DataState); | 277 HTML_ADVANCE_TO(DataState); |
| 268 } else { | 278 } else { |
| 269 bufferCharacter('<'); | 279 bufferCharacter('<'); |
| 270 bufferCharacter('/'); | 280 bufferCharacter('/'); |
| 271 HTML_RECONSUME_IN(DataState); | 281 HTML_RECONSUME_IN(DataState); |
| 272 } | 282 } |
| 273 } | 283 } |
| 274 END_STATE() | 284 END_STATE() |
| 275 | 285 |
| 276 HTML_BEGIN_STATE(TagNameState) { | 286 HTML_BEGIN_STATE(TagNameState) { |
| 277 if (isTokenizerWhitespace(cc)) | 287 if (isTokenizerWhitespace(cc)) { |
| 278 HTML_ADVANCE_TO(BeforeAttributeNameState); | 288 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 279 else if (cc == '/') | 289 } else if (cc == '/') { |
| 280 HTML_ADVANCE_TO(SelfClosingStartTagState); | 290 HTML_ADVANCE_TO(VoidTagState); |
| 281 else if (cc == '>') | 291 } else if (cc == '>') { |
| 282 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 292 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 283 else if (isASCIIUpper(cc)) { | |
| 284 m_token->appendToName(toLowerCase(cc)); | |
| 285 HTML_ADVANCE_TO(TagNameState); | |
| 286 } else if (cc == kEndOfFileMarker) { | |
| 287 parseError(); | |
| 288 HTML_RECONSUME_IN(DataState); | |
| 289 } else { | 293 } else { |
| 290 m_token->appendToName(cc); | 294 m_token->appendToName(cc); |
| 291 HTML_ADVANCE_TO(TagNameState); | 295 HTML_ADVANCE_TO(TagNameState); |
| 292 } | 296 } |
| 293 } | 297 } |
| 294 END_STATE() | 298 END_STATE() |
| 295 | 299 |
| 296 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { | |
| 297 if (cc == '/') { | |
| 298 m_temporaryBuffer.clear(); | |
| 299 ASSERT(m_bufferedEndTagName.isEmpty()); | |
| 300 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); | |
| 301 } else { | |
| 302 bufferCharacter('<'); | |
| 303 HTML_RECONSUME_IN(RAWTEXTState); | |
| 304 } | |
| 305 } | |
| 306 END_STATE() | |
| 307 | |
| 308 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { | |
| 309 if (isASCIIUpper(cc)) { | |
| 310 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 311 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); | |
| 312 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
| 313 } else if (isASCIILower(cc)) { | |
| 314 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 315 addToPossibleEndTag(static_cast<LChar>(cc)); | |
| 316 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
| 317 } else { | |
| 318 bufferCharacter('<'); | |
| 319 bufferCharacter('/'); | |
| 320 HTML_RECONSUME_IN(RAWTEXTState); | |
| 321 } | |
| 322 } | |
| 323 END_STATE() | |
| 324 | |
| 325 HTML_BEGIN_STATE(RAWTEXTEndTagNameState) { | |
| 326 if (isASCIIUpper(cc)) { | |
| 327 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 328 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); | |
| 329 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
| 330 } else if (isASCIILower(cc)) { | |
| 331 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 332 addToPossibleEndTag(static_cast<LChar>(cc)); | |
| 333 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
| 334 } else { | |
| 335 if (isTokenizerWhitespace(cc)) { | |
| 336 if (isAppropriateEndTag()) { | |
| 337 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 338 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); | |
| 339 } | |
| 340 } else if (cc == '/') { | |
| 341 if (isAppropriateEndTag()) { | |
| 342 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 343 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); | |
| 344 } | |
| 345 } else if (cc == '>') { | |
| 346 if (isAppropriateEndTag()) { | |
| 347 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
| 348 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState
); | |
| 349 } | |
| 350 } | |
| 351 bufferCharacter('<'); | |
| 352 bufferCharacter('/'); | |
| 353 m_token->appendToCharacter(m_temporaryBuffer); | |
| 354 m_bufferedEndTagName.clear(); | |
| 355 m_temporaryBuffer.clear(); | |
| 356 HTML_RECONSUME_IN(RAWTEXTState); | |
| 357 } | |
| 358 } | |
| 359 END_STATE() | |
| 360 | |
| 361 HTML_BEGIN_STATE(BeforeAttributeNameState) { | 300 HTML_BEGIN_STATE(BeforeAttributeNameState) { |
| 362 if (isTokenizerWhitespace(cc)) | 301 if (isTokenizerWhitespace(cc)) { |
| 363 HTML_ADVANCE_TO(BeforeAttributeNameState); | 302 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 364 else if (cc == '/') | 303 } else if (cc == '/') { |
| 365 HTML_ADVANCE_TO(SelfClosingStartTagState); | 304 HTML_ADVANCE_TO(VoidTagState); |
| 366 else if (cc == '>') | 305 } else if (cc == '>') { |
| 367 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 306 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 368 else if (isASCIIUpper(cc)) { | |
| 369 m_token->addNewAttribute(); | |
| 370 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | |
| 371 m_token->appendToAttributeName(toLowerCase(cc)); | |
| 372 HTML_ADVANCE_TO(AttributeNameState); | |
| 373 } else if (cc == kEndOfFileMarker) { | |
| 374 parseError(); | |
| 375 HTML_RECONSUME_IN(DataState); | |
| 376 } else { | 307 } else { |
| 377 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') | |
| 378 parseError(); | |
| 379 m_token->addNewAttribute(); | 308 m_token->addNewAttribute(); |
| 380 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | 309 m_token->beginAttributeName(source.numberOfCharactersConsumed()); |
| 381 m_token->appendToAttributeName(cc); | 310 m_token->appendToAttributeName(cc); |
| 382 HTML_ADVANCE_TO(AttributeNameState); | 311 HTML_ADVANCE_TO(AttributeNameState); |
| 383 } | 312 } |
| 384 } | 313 } |
| 385 END_STATE() | 314 END_STATE() |
| 386 | 315 |
| 387 HTML_BEGIN_STATE(AttributeNameState) { | 316 HTML_BEGIN_STATE(AttributeNameState) { |
| 388 if (isTokenizerWhitespace(cc)) { | 317 if (isTokenizerWhitespace(cc)) { |
| 389 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 318 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 390 HTML_ADVANCE_TO(AfterAttributeNameState); | 319 HTML_ADVANCE_TO(AfterAttributeNameState); |
| 391 } else if (cc == '/') { | 320 } else if (cc == '/') { |
| 392 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 321 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 393 HTML_ADVANCE_TO(SelfClosingStartTagState); | 322 HTML_ADVANCE_TO(VoidTagState); |
| 394 } else if (cc == '=') { | 323 } else if (cc == '=') { |
| 395 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 324 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 396 HTML_ADVANCE_TO(BeforeAttributeValueState); | 325 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 397 } else if (cc == '>') { | 326 } else if (cc == '>') { |
| 398 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 327 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 399 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 328 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 400 } else if (isASCIIUpper(cc)) { | |
| 401 m_token->appendToAttributeName(toLowerCase(cc)); | |
| 402 HTML_ADVANCE_TO(AttributeNameState); | |
| 403 } else if (cc == kEndOfFileMarker) { | |
| 404 parseError(); | |
| 405 m_token->endAttributeName(source.numberOfCharactersConsumed()); | |
| 406 HTML_RECONSUME_IN(DataState); | |
| 407 } else { | 329 } else { |
| 408 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') | |
| 409 parseError(); | |
| 410 m_token->appendToAttributeName(cc); | 330 m_token->appendToAttributeName(cc); |
| 411 HTML_ADVANCE_TO(AttributeNameState); | 331 HTML_ADVANCE_TO(AttributeNameState); |
| 412 } | 332 } |
| 413 } | 333 } |
| 414 END_STATE() | 334 END_STATE() |
| 415 | 335 |
| 416 HTML_BEGIN_STATE(AfterAttributeNameState) { | 336 HTML_BEGIN_STATE(AfterAttributeNameState) { |
| 417 if (isTokenizerWhitespace(cc)) | 337 if (isTokenizerWhitespace(cc)) { |
| 418 HTML_ADVANCE_TO(AfterAttributeNameState); | 338 HTML_ADVANCE_TO(AfterAttributeNameState); |
| 419 else if (cc == '/') | 339 } else if (cc == '/') { |
| 420 HTML_ADVANCE_TO(SelfClosingStartTagState); | 340 HTML_ADVANCE_TO(VoidTagState); |
| 421 else if (cc == '=') | 341 } else if (cc == '=') { |
| 422 HTML_ADVANCE_TO(BeforeAttributeValueState); | 342 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 423 else if (cc == '>') | 343 } else if (cc == '>') { |
| 424 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 344 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 425 else if (isASCIIUpper(cc)) { | |
| 426 m_token->addNewAttribute(); | |
| 427 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | |
| 428 m_token->appendToAttributeName(toLowerCase(cc)); | |
| 429 HTML_ADVANCE_TO(AttributeNameState); | |
| 430 } else if (cc == kEndOfFileMarker) { | |
| 431 parseError(); | |
| 432 HTML_RECONSUME_IN(DataState); | |
| 433 } else { | 345 } else { |
| 434 if (cc == '"' || cc == '\'' || cc == '<') | |
| 435 parseError(); | |
| 436 m_token->addNewAttribute(); | 346 m_token->addNewAttribute(); |
| 437 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | 347 m_token->beginAttributeName(source.numberOfCharactersConsumed()); |
| 438 m_token->appendToAttributeName(cc); | 348 m_token->appendToAttributeName(cc); |
| 439 HTML_ADVANCE_TO(AttributeNameState); | 349 HTML_ADVANCE_TO(AttributeNameState); |
| 440 } | 350 } |
| 441 } | 351 } |
| 442 END_STATE() | 352 END_STATE() |
| 443 | 353 |
| 444 HTML_BEGIN_STATE(BeforeAttributeValueState) { | 354 HTML_BEGIN_STATE(BeforeAttributeValueState) { |
| 445 if (isTokenizerWhitespace(cc)) | 355 if (isTokenizerWhitespace(cc)) |
| 446 HTML_ADVANCE_TO(BeforeAttributeValueState); | 356 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 447 else if (cc == '"') { | 357 else if (cc == '"') { |
| 448 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); | 358 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); |
| 449 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); | 359 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
| 450 } else if (cc == '&') { | 360 } else if (cc == '&') { |
| 451 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); | 361 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); |
| 452 HTML_RECONSUME_IN(AttributeValueUnquotedState); | 362 HTML_RECONSUME_IN(AttributeValueUnquotedState); |
| 453 } else if (cc == '\'') { | 363 } else if (cc == '\'') { |
| 454 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); | 364 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); |
| 455 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); | 365 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
| 456 } else if (cc == '>') { | 366 } else if (cc == '>') { |
| 457 parseError(); | |
| 458 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 367 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 459 } else if (cc == kEndOfFileMarker) { | |
| 460 parseError(); | |
| 461 HTML_RECONSUME_IN(DataState); | |
| 462 } else { | 368 } else { |
| 463 if (cc == '<' || cc == '=' || cc == '`') | |
| 464 parseError(); | |
| 465 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); | 369 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); |
| 466 m_token->appendToAttributeValue(cc); | 370 m_token->appendToAttributeValue(cc); |
| 467 HTML_ADVANCE_TO(AttributeValueUnquotedState); | 371 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
| 468 } | 372 } |
| 469 } | 373 } |
| 470 END_STATE() | 374 END_STATE() |
| 471 | 375 |
| 472 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { | 376 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { |
| 473 if (cc == '"') { | 377 if (cc == '"') { |
| 474 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 378 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
| 475 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); | 379 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 476 } else if (cc == '&') { | 380 } else if (cc == '&') { |
| 477 m_returnState = AttributeValueDoubleQuotedState; | 381 m_returnState = AttributeValueDoubleQuotedState; |
| 478 m_entityParser.reset(); | 382 m_entityParser.reset(); |
| 479 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 383 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
| 480 } else if (cc == kEndOfFileMarker) { | |
| 481 parseError(); | |
| 482 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
| 483 HTML_RECONSUME_IN(DataState); | |
| 484 } else { | 384 } else { |
| 485 m_token->appendToAttributeValue(cc); | 385 m_token->appendToAttributeValue(cc); |
| 486 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); | 386 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
| 487 } | 387 } |
| 488 } | 388 } |
| 489 END_STATE() | 389 END_STATE() |
| 490 | 390 |
| 491 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { | 391 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { |
| 492 if (cc == '\'') { | 392 if (cc == '\'') { |
| 493 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 393 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
| 494 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); | 394 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 495 } else if (cc == '&') { | 395 } else if (cc == '&') { |
| 496 m_returnState = AttributeValueSingleQuotedState; | 396 m_returnState = AttributeValueSingleQuotedState; |
| 497 m_entityParser.reset(); | 397 m_entityParser.reset(); |
| 498 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 398 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
| 499 } else if (cc == kEndOfFileMarker) { | |
| 500 parseError(); | |
| 501 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
| 502 HTML_RECONSUME_IN(DataState); | |
| 503 } else { | 399 } else { |
| 504 m_token->appendToAttributeValue(cc); | 400 m_token->appendToAttributeValue(cc); |
| 505 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); | 401 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
| 506 } | 402 } |
| 507 } | 403 } |
| 508 END_STATE() | 404 END_STATE() |
| 509 | 405 |
| 510 HTML_BEGIN_STATE(AttributeValueUnquotedState) { | 406 HTML_BEGIN_STATE(AttributeValueUnquotedState) { |
| 511 if (isTokenizerWhitespace(cc)) { | 407 if (isTokenizerWhitespace(cc)) { |
| 512 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 408 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
| 513 HTML_ADVANCE_TO(BeforeAttributeNameState); | 409 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 514 } else if (cc == '&') { | 410 } else if (cc == '&') { |
| 515 m_returnState = AttributeValueUnquotedState; | 411 m_returnState = AttributeValueUnquotedState; |
| 516 m_entityParser.reset(); | 412 m_entityParser.reset(); |
| 517 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 413 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
| 518 } else if (cc == '>') { | 414 } else if (cc == '>') { |
| 519 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 415 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
| 520 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 416 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 521 } else if (cc == kEndOfFileMarker) { | |
| 522 parseError(); | |
| 523 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
| 524 HTML_RECONSUME_IN(DataState); | |
| 525 } else { | 417 } else { |
| 526 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') | |
| 527 parseError(); | |
| 528 m_token->appendToAttributeValue(cc); | 418 m_token->appendToAttributeValue(cc); |
| 529 HTML_ADVANCE_TO(AttributeValueUnquotedState); | 419 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
| 530 } | 420 } |
| 531 } | 421 } |
| 532 END_STATE() | 422 END_STATE() |
| 533 | 423 |
| 534 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { | 424 HTML_BEGIN_STATE(VoidTagState) { |
| 535 if (isTokenizerWhitespace(cc)) | 425 if (cc == '>') { |
| 536 HTML_ADVANCE_TO(BeforeAttributeNameState); | 426 m_token->setSelfClosing(); |
| 537 else if (cc == '/') | |
| 538 HTML_ADVANCE_TO(SelfClosingStartTagState); | |
| 539 else if (cc == '>') | |
| 540 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 427 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 541 else if (cc == kEndOfFileMarker) { | |
| 542 parseError(); | |
| 543 HTML_RECONSUME_IN(DataState); | |
| 544 } else { | 428 } else { |
| 545 parseError(); | |
| 546 HTML_RECONSUME_IN(BeforeAttributeNameState); | 429 HTML_RECONSUME_IN(BeforeAttributeNameState); |
| 547 } | 430 } |
| 548 } | 431 } |
| 549 END_STATE() | |
| 550 | |
| 551 HTML_BEGIN_STATE(SelfClosingStartTagState) { | |
| 552 if (cc == '>') { | |
| 553 m_token->setSelfClosing(); | |
| 554 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
| 555 } else if (cc == kEndOfFileMarker) { | |
| 556 parseError(); | |
| 557 HTML_RECONSUME_IN(DataState); | |
| 558 } else { | |
| 559 parseError(); | |
| 560 HTML_RECONSUME_IN(BeforeAttributeNameState); | |
| 561 } | |
| 562 } | |
| 563 END_STATE() | 432 END_STATE() |
| 564 | 433 |
| 565 HTML_BEGIN_STATE(CommentStart1State) { | 434 HTML_BEGIN_STATE(CommentStart1State) { |
| 566 if (cc == '-') { | 435 if (cc == '-') { |
| 567 HTML_ADVANCE_TO(CommentStart2State); | 436 HTML_ADVANCE_TO(CommentStart2State); |
| 568 } else { | 437 } else { |
| 569 bufferCharacter('<'); | 438 bufferCharacter('<'); |
| 570 bufferCharacter('!'); | 439 bufferCharacter('!'); |
| 571 HTML_RECONSUME_IN(DataState); | 440 HTML_RECONSUME_IN(DataState); |
| 572 } | 441 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 609 else | 478 else |
| 610 HTML_ADVANCE_TO(CommentState); | 479 HTML_ADVANCE_TO(CommentState); |
| 611 } | 480 } |
| 612 END_STATE() | 481 END_STATE() |
| 613 } | 482 } |
| 614 | 483 |
| 615 ASSERT_NOT_REACHED(); | 484 ASSERT_NOT_REACHED(); |
| 616 return false; | 485 return false; |
| 617 } | 486 } |
| 618 | 487 |
| 619 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) | |
| 620 { | |
| 621 return vectorEqualsString(m_temporaryBuffer, expectedString); | |
| 622 } | |
| 623 | |
| 624 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) | |
| 625 { | |
| 626 ASSERT(isEndTagBufferingState(m_state)); | |
| 627 m_bufferedEndTagName.append(cc); | |
| 628 } | |
| 629 | |
| 630 inline bool HTMLTokenizer::isAppropriateEndTag() | 488 inline bool HTMLTokenizer::isAppropriateEndTag() |
| 631 { | 489 { |
| 632 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) | 490 if (m_temporaryBuffer.size() != m_appropriateEndTagName.size()) |
| 633 return false; | 491 return false; |
| 634 | 492 |
| 635 size_t numCharacters = m_bufferedEndTagName.size(); | 493 size_t numCharacters = m_temporaryBuffer.size(); |
| 636 | 494 |
| 637 for (size_t i = 0; i < numCharacters; i++) { | 495 for (size_t i = 0; i < numCharacters; i++) { |
| 638 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) | 496 if (m_temporaryBuffer[i] != m_appropriateEndTagName[i]) |
| 639 return false; | 497 return false; |
| 640 } | 498 } |
| 641 | 499 |
| 642 return true; | 500 return true; |
| 643 } | 501 } |
| 644 | 502 |
| 645 inline void HTMLTokenizer::parseError() | 503 inline void HTMLTokenizer::parseError() |
| 646 { | 504 { |
| 647 notImplemented(); | 505 notImplemented(); |
| 648 } | 506 } |
| 649 | 507 |
| 650 } | 508 } |
| OLD | NEW |