Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Unified Diff: third_party/WebKit/WebCore/html/HTMLTokenizer.cpp

Issue 21165: Revert the merge. Mac build is mysteriously broken. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/WebKit/WebCore/html/HTMLParser.cpp ('k') | third_party/WebKit/WebCore/loader/EmptyClients.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/WebCore/html/HTMLTokenizer.cpp
===================================================================
--- third_party/WebKit/WebCore/html/HTMLTokenizer.cpp (revision 9383)
+++ third_party/WebKit/WebCore/html/HTMLTokenizer.cpp (working copy)
@@ -1,2045 +1,2045 @@
-/*
- Copyright (C) 1997 Martin Jones (mjones@kde.org)
- (C) 1997 Torben Weis (weis@kde.org)
- (C) 1998 Waldo Bastian (bastian@kde.org)
- (C) 1999 Lars Knoll (knoll@kde.org)
- (C) 1999 Antti Koivisto (koivisto@kde.org)
- (C) 2001 Dirk Mueller (mueller@kde.org)
- Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
- Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public License
- along with this library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
-*/
-
-#include "config.h"
-#include "HTMLTokenizer.h"
-
-#include "CSSHelper.h"
-#include "Cache.h"
-#include "CachedScript.h"
-#include "DocLoader.h"
-#include "DocumentFragment.h"
-#include "EventNames.h"
-#include "Frame.h"
-#include "FrameLoader.h"
-#include "FrameView.h"
-#include "HTMLElement.h"
-#include "HTMLNames.h"
-#include "HTMLParser.h"
-#include "HTMLScriptElement.h"
-#include "HTMLViewSourceDocument.h"
-#include "Page.h"
-#include "PreloadScanner.h"
-#include "ScriptController.h"
-#include "ScriptSourceCode.h"
-#include "ScriptValue.h"
-#include <wtf/ASCIICType.h>
-#include <wtf/CurrentTime.h>
-
-#include "HTMLEntityNames.c"
-
-#define PRELOAD_SCANNER_ENABLED 1
-// #define INSTRUMENT_LAYOUT_SCHEDULING 1
-
-using namespace WTF;
-using namespace std;
-
-namespace WebCore {
-
-using namespace HTMLNames;
-
-#if MOBILE
-// The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.
-// This value is used to define how many characters the tokenizer will process before
-// yeilding control.
-static const int defaultTokenizerChunkSize = 256;
-#else
-static const int defaultTokenizerChunkSize = 4096;
-#endif
-
-#if MOBILE
-// As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise
-// it will take way to long to load a page.
-static const double defaultTokenizerTimeDelay = 0.300;
-#else
-// FIXME: We would like this constant to be 200ms.
-// Yielding more aggressively results in increased responsiveness and better incremental rendering.
-// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
-static const double defaultTokenizerTimeDelay = 0.500;
-#endif
-
-static const char commentStart [] = "<!--";
-static const char doctypeStart [] = "<!doctype";
-static const char publicStart [] = "public";
-static const char systemStart [] = "system";
-static const char scriptEnd [] = "</script";
-static const char xmpEnd [] = "</xmp";
-static const char styleEnd [] = "</style";
-static const char textareaEnd [] = "</textarea";
-static const char titleEnd [] = "</title";
-static const char iframeEnd [] = "</iframe";
-
-// Full support for MS Windows extensions to Latin-1.
-// Technically these extensions should only be activated for pages
-// marked "windows-1252" or "cp1252", but
-// in the standard Microsoft way, these extensions infect hundreds of thousands
-// of web pages. Note that people with non-latin-1 Microsoft extensions
-// are SOL.
-//
-// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
-// http://www.bbsinc.com/iso8859.html
-// http://www.obviously.com/
-//
-// There may be better equivalents
-
-// We only need this for entities. For non-entity text, we handle this in the text encoding.
-
-static const UChar windowsLatin1ExtensionArray[32] = {
- 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
- 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
-};
-
-static inline UChar fixUpChar(UChar c)
-{
- if ((c & ~0x1F) != 0x0080)
- return c;
- return windowsLatin1ExtensionArray[c - 0x80];
-}
-
-static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
-{
- for (unsigned i = 0; i != length; ++i) {
- unsigned char c1 = s1[i];
- unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
- UChar c2 = s2[i];
- if (c1 != c2 && uc1 != c2)
- return false;
- }
- return true;
-}
-
-inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
-{
- if (!attrName.isEmpty()) {
- ASSERT(!attrName.contains('/'));
- RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);
- if (!attrs) {
- attrs = NamedMappedAttrMap::create();
- attrs->reserveInitialCapacity(10);
- }
- attrs->insertAttribute(a.release(), viewSourceMode);
- }
-
- attrName = emptyAtom;
-}
-
-// ----------------------------------------------------------------------------
-
-HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)
- : Tokenizer()
- , m_buffer(0)
- , m_scriptCode(0)
- , m_scriptCodeSize(0)
- , m_scriptCodeCapacity(0)
- , m_scriptCodeResync(0)
- , m_executingScript(0)
- , m_requestingScript(false)
- , m_hasScriptsWaitingForStylesheets(false)
- , m_timer(this, &HTMLTokenizer::timerFired)
- , m_doc(doc)
- , m_parser(new HTMLParser(doc, reportErrors))
- , m_inWrite(false)
- , m_fragment(false)
-{
- begin();
-}
-
-HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
- : Tokenizer(true)
- , m_buffer(0)
- , m_scriptCode(0)
- , m_scriptCodeSize(0)
- , m_scriptCodeCapacity(0)
- , m_scriptCodeResync(0)
- , m_executingScript(0)
- , m_requestingScript(false)
- , m_hasScriptsWaitingForStylesheets(false)
- , m_timer(this, &HTMLTokenizer::timerFired)
- , m_doc(doc)
- , m_parser(0)
- , m_inWrite(false)
- , m_fragment(false)
-{
- begin();
-}
-
-HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
- : m_buffer(0)
- , m_scriptCode(0)
- , m_scriptCodeSize(0)
- , m_scriptCodeCapacity(0)
- , m_scriptCodeResync(0)
- , m_executingScript(0)
- , m_requestingScript(false)
- , m_hasScriptsWaitingForStylesheets(false)
- , m_timer(this, &HTMLTokenizer::timerFired)
- , m_doc(frag->document())
- , m_parser(new HTMLParser(frag))
- , m_inWrite(false)
- , m_fragment(true)
-{
- begin();
-}
-
-void HTMLTokenizer::reset()
-{
- ASSERT(m_executingScript == 0);
-
- while (!m_pendingScripts.isEmpty()) {
- CachedScript* cs = m_pendingScripts.first().get();
- m_pendingScripts.removeFirst();
- ASSERT(cache()->disabled() || cs->accessCount() > 0);
- cs->removeClient(this);
- }
-
- fastFree(m_buffer);
- m_buffer = m_dest = 0;
- m_bufferSize = 0;
-
- fastFree(m_scriptCode);
- m_scriptCode = 0;
- m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
-
- m_timer.stop();
- m_state.setAllowYield(false);
- m_state.setForceSynchronous(false);
-
- m_currentToken.reset();
- m_doctypeToken.reset();
- m_doctypeSearchCount = 0;
- m_doctypeSecondarySearchCount = 0;
- m_hasScriptsWaitingForStylesheets = false;
-}
-
-void HTMLTokenizer::begin()
-{
- m_executingScript = 0;
- m_requestingScript = false;
- m_hasScriptsWaitingForStylesheets = false;
- m_state.setLoadingExtScript(false);
- reset();
- m_bufferSize = 254;
- m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
- m_dest = m_buffer;
- tquote = NoQuote;
- searchCount = 0;
- m_state.setEntityState(NoEntity);
- m_scriptTagSrcAttrValue = String();
- m_pendingSrc.clear();
- m_currentPrependingSrc = 0;
- m_noMoreData = false;
- m_brokenComments = false;
- m_brokenServer = false;
- m_lineNumber = 0;
- m_currentScriptTagStartLineNumber = 0;
- m_currentTagStartLineNumber = 0;
- m_state.setForceSynchronous(false);
-
- Page* page = m_doc->page();
- if (page && page->hasCustomHTMLTokenizerTimeDelay())
- m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
- else
- m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
-
- if (page && page->hasCustomHTMLTokenizerChunkSize())
- m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
- else
- m_tokenizerChunkSize = defaultTokenizerChunkSize;
-}
-
-void HTMLTokenizer::setForceSynchronous(bool force)
-{
- m_state.setForceSynchronous(force);
-}
-
-HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
-{
- // This function adds the listing 'list' as
- // preformatted text-tokens to the token-collection
- while (!list.isEmpty()) {
- if (state.skipLF()) {
- state.setSkipLF(false);
- if (*list == '\n') {
- list.advance();
- continue;
- }
- }
-
- checkBuffer();
-
- if (*list == '\n' || *list == '\r') {
- if (state.discardLF())
- // Ignore this LF
- state.setDiscardLF(false); // We have discarded 1 LF
- else
- *m_dest++ = '\n';
-
- /* Check for MS-DOS CRLF sequence */
- if (*list == '\r')
- state.setSkipLF(true);
-
- list.advance();
- } else {
- state.setDiscardLF(false);
- *m_dest++ = *list;
- list.advance();
- }
- }
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString& src, State state)
-{
- ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
- ASSERT(!state.hasTagState());
- ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );
- if (state.inScript() && !m_currentScriptTagStartLineNumber)
- m_currentScriptTagStartLineNumber = m_lineNumber;
-
- if (state.inComment())
- state = parseComment(src, state);
-
- int lastDecodedEntityPosition = -1;
- while (!src.isEmpty()) {
- checkScriptBuffer();
- UChar ch = *src;
-
- if (!m_scriptCodeResync && !m_brokenComments &&
- !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
- m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
- (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
- state.setInComment(true);
- state = parseComment(src, state);
- continue;
- }
- if (m_scriptCodeResync && !tquote && ch == '>') {
- src.advancePastNonNewline();
- m_scriptCodeSize = m_scriptCodeResync - 1;
- m_scriptCodeResync = 0;
- m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
- if (state.inScript())
- state = scriptHandler(state);
- else {
- state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
- processToken();
- if (state.inStyle()) {
- m_currentToken.tagName = styleTag.localName();
- m_currentToken.beginTag = false;
- } else if (state.inTextArea()) {
- m_currentToken.tagName = textareaTag.localName();
- m_currentToken.beginTag = false;
- } else if (state.inTitle()) {
- m_currentToken.tagName = titleTag.localName();
- m_currentToken.beginTag = false;
- } else if (state.inXmp()) {
- m_currentToken.tagName = xmpTag.localName();
- m_currentToken.beginTag = false;
- } else if (state.inIFrame()) {
- m_currentToken.tagName = iframeTag.localName();
- m_currentToken.beginTag = false;
- }
- processToken();
- state.setInStyle(false);
- state.setInScript(false);
- state.setInTextArea(false);
- state.setInTitle(false);
- state.setInXmp(false);
- state.setInIFrame(false);
- tquote = NoQuote;
- m_scriptCodeSize = m_scriptCodeResync = 0;
- }
- return state;
- }
- // possible end of tagname, lets check.
- if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
- m_scriptCodeSize >= m_searchStopperLength &&
- tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
- (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
- m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
- tquote = NoQuote;
- continue;
- }
- if (m_scriptCodeResync && !state.escaped()) {
- if (ch == '\"')
- tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
- else if (ch == '\'')
- tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
- else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
- tquote = NoQuote;
- }
- state.setEscaped(!state.escaped() && ch == '\\');
- if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
- UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
- src.advancePastNonNewline();
- state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
- if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
- lastDecodedEntityPosition = m_scriptCodeSize;
- else
- m_scriptCodeSize = scriptCodeDest - m_scriptCode;
- } else {
- m_scriptCode[m_scriptCodeSize++] = ch;
- src.advance(m_lineNumber);
- }
- }
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
-{
- // We are inside a <script>
- bool doScriptExec = false;
- int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
-
- // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
- m_currentScriptTagStartLineNumber = 0;
-
- // (Bugzilla 3837) Scripts following a frameset element should not execute or,
- // in the case of extern scripts, even load.
- bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));
-
- CachedScript* cs = 0;
- // don't load external scripts for standalone documents (for now)
- if (!inViewSourceMode()) {
- if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {
- // forget what we just got; load from src url instead
- if (!m_parser->skipMode() && !followingFrameset) {
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("Requesting script at time %d\n", m_doc->elapsedTime());
-#endif
- // The parser might have been stopped by for example a window.close call in an earlier script.
- // If so, we don't want to load scripts.
- if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
- m_pendingScripts.append(cs);
- else
- m_scriptNode = 0;
- } else
- m_scriptNode = 0;
- m_scriptTagSrcAttrValue = String();
- } else {
- // Parse m_scriptCode containing <script> info
-#if USE(LOW_BANDWIDTH_DISPLAY)
- if (m_doc->inLowBandwidthDisplay()) {
- // ideal solution is only skipping internal JavaScript if there is external JavaScript.
- // but internal JavaScript can use document.write() to create an external JavaScript,
- // so we have to skip internal JavaScript all the time.
- m_doc->frame()->loader()->needToSwitchOutLowBandwidthDisplay();
- doScriptExec = false;
- } else
-#endif
- doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
- m_scriptNode = 0;
- }
- }
-
- state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
- RefPtr<Node> node = processToken();
- String scriptString = node ? node->textContent() : "";
- m_currentToken.tagName = scriptTag.localName();
- m_currentToken.beginTag = false;
- processToken();
-
- state.setInScript(false);
- m_scriptCodeSize = m_scriptCodeResync = 0;
-
- // FIXME: The script should be syntax highlighted.
- if (inViewSourceMode())
- return state;
-
- SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
- SegmentedString prependingSrc;
- m_currentPrependingSrc = &prependingSrc;
-
- if (!m_parser->skipMode() && !followingFrameset) {
- if (cs) {
- if (savedPrependingSrc)
- savedPrependingSrc->append(m_src);
- else
- m_pendingSrc.prepend(m_src);
- setSrc(SegmentedString());
-
- // the ref() call below may call notifyFinished if the script is already in cache,
- // and that mucks with the state directly, so we must write it back to the object.
- m_state = state;
- bool savedRequestingScript = m_requestingScript;
- m_requestingScript = true;
- cs->addClient(this);
- m_requestingScript = savedRequestingScript;
- state = m_state;
- // will be 0 if script was already loaded and ref() executed it
- if (!m_pendingScripts.isEmpty())
- state.setLoadingExtScript(true);
- } else if (!m_fragment && doScriptExec) {
- if (!m_executingScript)
- m_pendingSrc.prepend(m_src);
- else
- prependingSrc = m_src;
- setSrc(SegmentedString());
- state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);
- }
- }
-
- if (!m_executingScript && !state.loadingExtScript()) {
- m_src.append(m_pendingSrc);
- m_pendingSrc.clear();
- } else if (!prependingSrc.isEmpty()) {
- // restore first so that the write appends in the right place
- // (does not hurt to do it again below)
- m_currentPrependingSrc = savedPrependingSrc;
-
- // we need to do this slightly modified bit of one of the write() cases
- // because we want to prepend to m_pendingSrc rather than appending
- // if there's no previous prependingSrc
- if (!m_pendingScripts.isEmpty()) {
- if (m_currentPrependingSrc)
- m_currentPrependingSrc->append(prependingSrc);
- else
- m_pendingSrc.prepend(prependingSrc);
- } else {
- m_state = state;
- write(prependingSrc, false);
- state = m_state;
- }
- }
-
-#if PRELOAD_SCANNER_ENABLED
- if (!m_pendingScripts.isEmpty() && !m_executingScript) {
- if (!m_preloadScanner)
- m_preloadScanner.set(new PreloadScanner(m_doc));
- if (!m_preloadScanner->inProgress()) {
- m_preloadScanner->begin();
- m_preloadScanner->write(m_pendingSrc);
- }
- }
-#endif
- m_currentPrependingSrc = savedPrependingSrc;
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state)
-{
- if (m_fragment || !m_doc->frame())
- return state;
- m_executingScript++;
-
- SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
- SegmentedString prependingSrc;
- m_currentPrependingSrc = &prependingSrc;
-
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("beginning script execution at %d\n", m_doc->elapsedTime());
-#endif
-
- m_state = state;
- m_doc->frame()->loader()->executeScript(sourceCode);
- state = m_state;
-
- state.setAllowYield(true);
-
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("ending script execution at %d\n", m_doc->elapsedTime());
-#endif
-
- m_executingScript--;
-
- if (!m_executingScript && !state.loadingExtScript()) {
- m_pendingSrc.prepend(prependingSrc);
- m_src.append(m_pendingSrc);
- m_pendingSrc.clear();
- } else if (!prependingSrc.isEmpty()) {
- // restore first so that the write appends in the right place
- // (does not hurt to do it again below)
- m_currentPrependingSrc = savedPrependingSrc;
-
- // we need to do this slightly modified bit of one of the write() cases
- // because we want to prepend to m_pendingSrc rather than appending
- // if there's no previous prependingSrc
- if (!m_pendingScripts.isEmpty()) {
- if (m_currentPrependingSrc)
- m_currentPrependingSrc->append(prependingSrc);
- else
- m_pendingSrc.prepend(prependingSrc);
-
-#if PRELOAD_SCANNER_ENABLED
- // We are stuck waiting for another script. Lets check the source that
- // was just document.write()n for anything to load.
- PreloadScanner documentWritePreloadScanner(m_doc);
- documentWritePreloadScanner.begin();
- documentWritePreloadScanner.write(prependingSrc);
- documentWritePreloadScanner.end();
-#endif
- } else {
- m_state = state;
- write(prependingSrc, false);
- state = m_state;
- }
- }
-
- m_currentPrependingSrc = savedPrependingSrc;
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state)
-{
- // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
- checkScriptBuffer(src.length());
- while (!src.isEmpty()) {
- UChar ch = *src;
- m_scriptCode[m_scriptCodeSize++] = ch;
- if (ch == '>') {
- bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
- int endCharsCount = 1; // start off with one for the '>' character
- if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
- endCharsCount = 3;
- } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
- m_scriptCode[m_scriptCodeSize-2] == '!') {
- // Other browsers will accept --!> as a close comment, even though it's
- // not technically valid.
- endCharsCount = 4;
- }
- if (handleBrokenComments || endCharsCount > 1) {
- src.advancePastNonNewline();
- if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
- checkScriptBuffer();
- m_scriptCode[m_scriptCodeSize] = 0;
- m_scriptCode[m_scriptCodeSize + 1] = 0;
- m_currentToken.tagName = commentAtom;
- m_currentToken.beginTag = true;
- state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
- processToken();
- m_currentToken.tagName = commentAtom;
- m_currentToken.beginTag = false;
- processToken();
- m_scriptCodeSize = 0;
- }
- state.setInComment(false);
- return state; // Finished parsing comment
- }
- }
- src.advance(m_lineNumber);
- }
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
-{
- checkScriptBuffer(src.length());
- while (!src.isEmpty()) {
- UChar ch = *src;
- m_scriptCode[m_scriptCodeSize++] = ch;
- if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
- src.advancePastNonNewline();
- state.setInServer(false);
- m_scriptCodeSize = 0;
- return state; // Finished parsing server include
- }
- src.advance(m_lineNumber);
- }
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state)
-{
- UChar oldchar = 0;
- while (!src.isEmpty()) {
- UChar chbegin = *src;
- if (chbegin == '\'')
- tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
- else if (chbegin == '\"')
- tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
- // Look for '?>'
- // Some crappy sites omit the "?" before it, so
- // we look for an unquoted '>' instead. (IE compatible)
- else if (chbegin == '>' && (!tquote || oldchar == '?')) {
- // We got a '?>' sequence
- state.setInProcessingInstruction(false);
- src.advancePastNonNewline();
- state.setDiscardLF(true);
- return state; // Finished parsing comment!
- }
- src.advance(m_lineNumber);
- oldchar = chbegin;
- }
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state)
-{
- while (!src.isEmpty()) {
- UChar cc = *src;
-
- if (state.skipLF()) {
- state.setSkipLF(false);
- if (cc == '\n') {
- src.advancePastNewline(m_lineNumber);
- continue;
- }
- }
-
- // do we need to enlarge the buffer?
- checkBuffer();
-
- if (cc == '\r') {
- state.setSkipLF(true);
- *m_dest++ = '\n';
- } else
- *m_dest++ = cc;
- src.advance(m_lineNumber);
- }
-
- return state;
-}
-
-
-HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
-{
- if (start) {
- cBufferPos = 0;
- state.setEntityState(SearchEntity);
- EntityUnicodeValue = 0;
- }
-
- while(!src.isEmpty()) {
- UChar cc = *src;
- switch(state.entityState()) {
- case NoEntity:
- ASSERT(state.entityState() != NoEntity);
- return state;
-
- case SearchEntity:
- if (cc == '#') {
- m_cBuffer[cBufferPos++] = cc;
- src.advancePastNonNewline();
- state.setEntityState(NumericSearch);
- } else
- state.setEntityState(EntityName);
- break;
-
- case NumericSearch:
- if (cc == 'x' || cc == 'X') {
- m_cBuffer[cBufferPos++] = cc;
- src.advancePastNonNewline();
- state.setEntityState(Hexadecimal);
- } else if (cc >= '0' && cc <= '9')
- state.setEntityState(Decimal);
- else
- state.setEntityState(SearchSemicolon);
- break;
-
- case Hexadecimal: {
- int ll = min(src.length(), 10 - cBufferPos);
- while (ll--) {
- cc = *src;
- if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
- state.setEntityState(SearchSemicolon);
- break;
- }
- int digit;
- if (cc < 'A')
- digit = cc - '0';
- else
- digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
- EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
- m_cBuffer[cBufferPos++] = cc;
- src.advancePastNonNewline();
- }
- if (cBufferPos == 10)
- state.setEntityState(SearchSemicolon);
- break;
- }
- case Decimal:
- {
- int ll = min(src.length(), 9-cBufferPos);
- while(ll--) {
- cc = *src;
-
- if (!(cc >= '0' && cc <= '9')) {
- state.setEntityState(SearchSemicolon);
- break;
- }
-
- EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
- m_cBuffer[cBufferPos++] = cc;
- src.advancePastNonNewline();
- }
- if (cBufferPos == 9)
- state.setEntityState(SearchSemicolon);
- break;
- }
- case EntityName:
- {
- int ll = min(src.length(), 9-cBufferPos);
- while(ll--) {
- cc = *src;
-
- if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
- state.setEntityState(SearchSemicolon);
- break;
- }
-
- m_cBuffer[cBufferPos++] = cc;
- src.advancePastNonNewline();
- }
- if (cBufferPos == 9)
- state.setEntityState(SearchSemicolon);
- if (state.entityState() == SearchSemicolon) {
- if(cBufferPos > 1) {
- // Since the maximum length of entity name is 9,
- // so a single char array which is allocated on
- // the stack, its length is 10, should be OK.
- // Also if we have an illegal character, we treat it
- // as illegal entity name.
- unsigned testedEntityNameLen = 0;
- char tmpEntityNameBuffer[10];
-
- ASSERT(cBufferPos < 10);
- for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
- if (m_cBuffer[testedEntityNameLen] > 0x7e)
- break;
- tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
- }
-
- const Entity *e;
-
- if (testedEntityNameLen == cBufferPos)
- e = findEntity(tmpEntityNameBuffer, cBufferPos);
- else
- e = 0;
-
- if(e)
- EntityUnicodeValue = e->code;
-
- // be IE compatible
- if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
- EntityUnicodeValue = 0;
- }
- }
- else
- break;
- }
- case SearchSemicolon:
- // Don't allow values that are more than 21 bits.
- if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
- if (!inViewSourceMode()) {
- if (*src == ';')
- src.advancePastNonNewline();
- if (EntityUnicodeValue <= 0xFFFF) {
- checkBuffer();
- src.push(fixUpChar(EntityUnicodeValue));
- } else {
- // Convert to UTF-16, using surrogate code points.
- checkBuffer(2);
- src.push(U16_LEAD(EntityUnicodeValue));
- src.push(U16_TRAIL(EntityUnicodeValue));
- }
- } else {
- // FIXME: We should eventually colorize entities by sending them as a special token.
- checkBuffer(11);
- *dest++ = '&';
- for (unsigned i = 0; i < cBufferPos; i++)
- dest[i] = m_cBuffer[i];
- dest += cBufferPos;
- if (*src == ';') {
- *dest++ = ';';
- src.advancePastNonNewline();
- }
- }
- } else {
- checkBuffer(10);
- // ignore the sequence, add it to the buffer as plaintext
- *dest++ = '&';
- for (unsigned i = 0; i < cBufferPos; i++)
- dest[i] = m_cBuffer[i];
- dest += cBufferPos;
- }
-
- state.setEntityState(NoEntity);
- return state;
- }
- }
-
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state)
-{
- ASSERT(state.inDoctype());
- while (!src.isEmpty() && state.inDoctype()) {
- UChar c = *src;
- bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
- switch (m_doctypeToken.state()) {
- case DoctypeBegin: {
- m_doctypeToken.setState(DoctypeBeforeName);
- if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- }
- case DoctypeBeforeName: {
- if (c == '>') {
- // Malformed. Just exit.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- if (inViewSourceMode())
- processDoctypeToken();
- } else if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else
- m_doctypeToken.setState(DoctypeName);
- break;
- }
- case DoctypeName: {
- if (c == '>') {
- // Valid doctype. Emit it.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- processDoctypeToken();
- } else if (isWhitespace) {
- m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
- m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
- m_doctypeToken.setState(DoctypeAfterName);
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else {
- src.advancePastNonNewline();
- m_doctypeToken.m_name.append(c);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- }
- case DoctypeAfterName: {
- if (c == '>') {
- // Valid doctype. Emit it.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- processDoctypeToken();
- } else if (!isWhitespace) {
- src.advancePastNonNewline();
- if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
- m_doctypeSearchCount++;
- if (m_doctypeSearchCount == 6)
- // Found 'PUBLIC' sequence
- m_doctypeToken.setState(DoctypeBeforePublicID);
- } else if (m_doctypeSearchCount > 0) {
- m_doctypeSearchCount = 0;
- m_doctypeToken.setState(DoctypeBogus);
- } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
- m_doctypeSecondarySearchCount++;
- if (m_doctypeSecondarySearchCount == 6)
- // Found 'SYSTEM' sequence
- m_doctypeToken.setState(DoctypeBeforeSystemID);
- } else {
- m_doctypeSecondarySearchCount = 0;
- m_doctypeToken.setState(DoctypeBogus);
- }
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else {
- src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- }
- case DoctypeBeforePublicID: {
- if (c == '\"' || c == '\'') {
- tquote = c == '\"' ? DoubleQuote : SingleQuote;
- m_doctypeToken.setState(DoctypePublicID);
- src.advancePastNonNewline();
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else if (c == '>') {
- // Considered bogus. Don't process the doctype.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- if (inViewSourceMode())
- processDoctypeToken();
- } else if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else
- m_doctypeToken.setState(DoctypeBogus);
- break;
- }
- case DoctypePublicID: {
- if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
- src.advancePastNonNewline();
- m_doctypeToken.setState(DoctypeAfterPublicID);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else if (c == '>') {
- // Considered bogus. Don't process the doctype.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- if (inViewSourceMode())
- processDoctypeToken();
- } else {
- m_doctypeToken.m_publicID.append(c);
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- }
- case DoctypeAfterPublicID:
- if (c == '\"' || c == '\'') {
- tquote = c == '\"' ? DoubleQuote : SingleQuote;
- m_doctypeToken.setState(DoctypeSystemID);
- src.advancePastNonNewline();
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else if (c == '>') {
- // Valid doctype. Emit it now.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- processDoctypeToken();
- } else if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else
- m_doctypeToken.setState(DoctypeBogus);
- break;
- case DoctypeBeforeSystemID:
- if (c == '\"' || c == '\'') {
- tquote = c == '\"' ? DoubleQuote : SingleQuote;
- m_doctypeToken.setState(DoctypeSystemID);
- src.advancePastNonNewline();
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else if (c == '>') {
- // Considered bogus. Don't process the doctype.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- } else if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else
- m_doctypeToken.setState(DoctypeBogus);
- break;
- case DoctypeSystemID:
- if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
- src.advancePastNonNewline();
- m_doctypeToken.setState(DoctypeAfterSystemID);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else if (c == '>') {
- // Considered bogus. Don't process the doctype.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- if (inViewSourceMode())
- processDoctypeToken();
- } else {
- m_doctypeToken.m_systemID.append(c);
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- case DoctypeAfterSystemID:
- if (c == '>') {
- // Valid doctype. Emit it now.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- processDoctypeToken();
- } else if (isWhitespace) {
- src.advance(m_lineNumber);
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- } else
- m_doctypeToken.setState(DoctypeBogus);
- break;
- case DoctypeBogus:
- if (c == '>') {
- // Done with the bogus doctype.
- src.advancePastNonNewline();
- state.setInDoctype(false);
- if (inViewSourceMode())
- processDoctypeToken();
- } else {
- src.advance(m_lineNumber); // Just keep scanning for '>'
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(c);
- }
- break;
- default:
- break;
- }
- }
- return state;
-}
-
-HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state)
-{
- ASSERT(!state.hasEntityState());
-
- unsigned cBufferPos = m_cBufferPos;
-
- bool lastIsSlash = false;
-
- while (!src.isEmpty()) {
- checkBuffer();
- switch(state.tagState()) {
- case NoTag:
- {
- m_cBufferPos = cBufferPos;
- return state;
- }
- case TagName:
- {
- if (searchCount > 0) {
- if (*src == commentStart[searchCount]) {
- searchCount++;
- if (searchCount == 2)
- m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
- else
- m_doctypeSearchCount = 0;
- if (searchCount == 4) {
- // Found '<!--' sequence
- src.advancePastNonNewline();
- m_dest = m_buffer; // ignore the previous part of this tag
- state.setInComment(true);
- state.setTagState(NoTag);
-
- // Fix bug 34302 at kde.bugs.org. Go ahead and treat
- // <!--> as a valid comment, since both mozilla and IE on windows
- // can handle this case. Only do this in quirks mode. -dwh
- if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
- state.setInComment(false);
- src.advancePastNonNewline();
- if (!src.isEmpty())
- m_cBuffer[cBufferPos++] = *src;
- } else
- state = parseComment(src, state);
-
- m_cBufferPos = cBufferPos;
- return state; // Finished parsing tag!
- }
- m_cBuffer[cBufferPos++] = *src;
- src.advancePastNonNewline();
- break;
- } else
- searchCount = 0; // Stop looking for '<!--' sequence
- }
-
- if (m_doctypeSearchCount > 0) {
- if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
- m_doctypeSearchCount++;
- m_cBuffer[cBufferPos++] = *src;
- src.advancePastNonNewline();
- if (m_doctypeSearchCount == 9) {
- // Found '<!DOCTYPE' sequence
- state.setInDoctype(true);
- state.setTagState(NoTag);
- m_doctypeToken.reset();
- if (inViewSourceMode())
- m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
- state = parseDoctype(src, state);
- m_cBufferPos = cBufferPos;
- return state;
- }
- break;
- } else
- m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
- }
-
- bool finish = false;
- unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
- while (ll--) {
- UChar curchar = *src;
- if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
- finish = true;
- break;
- }
-
- // tolower() shows up on profiles. This is faster!
- if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
- m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
- else
- m_cBuffer[cBufferPos++] = curchar;
- src.advancePastNonNewline();
- }
-
- // Disadvantage: we add the possible rest of the tag
- // as attribute names. ### judge if this causes problems
- if (finish || CBUFLEN == cBufferPos) {
- bool beginTag;
- UChar* ptr = m_cBuffer;
- unsigned int len = cBufferPos;
- m_cBuffer[cBufferPos] = '\0';
- if ((cBufferPos > 0) && (*ptr == '/')) {
- // End Tag
- beginTag = false;
- ptr++;
- len--;
- }
- else
- // Start Tag
- beginTag = true;
-
- // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
- if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
- ptr[--len] = '\0';
-
- // Now that we've shaved off any invalid / that might have followed the name), make the tag.
- // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
- if (ptr[0] != '!' || inViewSourceMode()) {
- m_currentToken.tagName = AtomicString(ptr);
- m_currentToken.beginTag = beginTag;
- }
- m_dest = m_buffer;
- state.setTagState(SearchAttribute);
- cBufferPos = 0;
- }
- break;
- }
- case SearchAttribute:
- while(!src.isEmpty()) {
- UChar curchar = *src;
- // In this mode just ignore any quotes we encounter and treat them like spaces.
- if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
- if (curchar == '<' || curchar == '>')
- state.setTagState(SearchEnd);
- else
- state.setTagState(AttributeName);
-
- cBufferPos = 0;
- break;
- }
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
- src.advance(m_lineNumber);
- }
- break;
- case AttributeName:
- {
- int ll = min(src.length(), CBUFLEN - cBufferPos);
- while (ll--) {
- UChar curchar = *src;
- // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the
- // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
- if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
- m_cBuffer[cBufferPos] = '\0';
- m_attrName = AtomicString(m_cBuffer);
- m_dest = m_buffer;
- *m_dest++ = 0;
- state.setTagState(SearchEqual);
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar('a');
- break;
- }
-
- // tolower() shows up on profiles. This is faster!
- if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
- m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
- else
- m_cBuffer[cBufferPos++] = curchar;
-
- src.advance(m_lineNumber);
- }
- if (cBufferPos == CBUFLEN) {
- m_cBuffer[cBufferPos] = '\0';
- m_attrName = AtomicString(m_cBuffer);
- m_dest = m_buffer;
- *m_dest++ = 0;
- state.setTagState(SearchEqual);
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar('a');
- }
- break;
- }
- case SearchEqual:
- while (!src.isEmpty()) {
- UChar curchar = *src;
-
- if (lastIsSlash && curchar == '>') {
- // This is a quirk (with a long sad history). We have to do this
- // since widgets do <script src="foo.js"/> and expect the tag to close.
- if (m_currentToken.tagName == scriptTag)
- m_currentToken.selfClosingTag = true;
- m_currentToken.brokenXMLStyle = true;
- }
-
- // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
- if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
- if (curchar == '=') {
- state.setTagState(SearchValue);
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
- src.advancePastNonNewline();
- } else {
- m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
- m_dest = m_buffer;
- state.setTagState(SearchAttribute);
- lastIsSlash = false;
- }
- break;
- }
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
-
- lastIsSlash = curchar == '/';
-
- src.advance(m_lineNumber);
- }
- break;
- case SearchValue:
- while (!src.isEmpty()) {
- UChar curchar = *src;
- if (!isASCIISpace(curchar)) {
- if (curchar == '\'' || curchar == '\"') {
- tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
- state.setTagState(QuotedValue);
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
- src.advancePastNonNewline();
- } else
- state.setTagState(Value);
-
- break;
- }
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
- src.advance(m_lineNumber);
- }
- break;
- case QuotedValue:
- while (!src.isEmpty()) {
- checkBuffer();
-
- UChar curchar = *src;
- if (curchar <= '>' && !src.escaped()) {
- if (curchar == '>' && m_attrName.isEmpty()) {
- // Handle a case like <img '>. Just go ahead and be willing
- // to close the whole tag. Don't consume the character and
- // just go back into SearchEnd while ignoring the whole
- // value.
- // FIXME: Note that this is actually not a very good solution.
- // It doesn't handle the general case of
- // unmatched quotes among attributes that have names. -dwh
- while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
- m_dest--; // remove trailing newlines
- AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
- if (!attributeValue.contains('/'))
- m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
- m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar('x');
- state.setTagState(SearchAttribute);
- m_dest = m_buffer;
- tquote = NoQuote;
- break;
- }
-
- if (curchar == '&') {
- src.advancePastNonNewline();
- state = parseEntity(src, m_dest, state, cBufferPos, true, true);
- break;
- }
-
- if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
- // some <input type=hidden> rely on trailing spaces. argh
- while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
- m_dest--; // remove trailing newlines
- AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
- if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
- m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar('x');
- } else if (inViewSourceMode())
- m_currentToken.addViewSourceChar('v');
- m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
- m_dest = m_buffer;
- state.setTagState(SearchAttribute);
- tquote = NoQuote;
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(curchar);
- src.advancePastNonNewline();
- break;
- }
- }
-
- *m_dest++ = curchar;
- src.advance(m_lineNumber);
- }
- break;
- case Value:
- while(!src.isEmpty()) {
- checkBuffer();
- UChar curchar = *src;
- if (curchar <= '>' && !src.escaped()) {
- // parse Entities
- if (curchar == '&') {
- src.advancePastNonNewline();
- state = parseEntity(src, m_dest, state, cBufferPos, true, true);
- break;
- }
- // no quotes. Every space means end of value
- // '/' does not delimit in IE!
- if (isASCIISpace(curchar) || curchar == '>') {
- AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
- m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar('v');
- m_dest = m_buffer;
- state.setTagState(SearchAttribute);
- break;
- }
- }
-
- *m_dest++ = curchar;
- src.advance(m_lineNumber);
- }
- break;
- case SearchEnd:
- {
- while (!src.isEmpty()) {
- UChar ch = *src;
- if (ch == '>' || ch == '<')
- break;
- if (ch == '/')
- m_currentToken.selfClosingTag = true;
- if (inViewSourceMode())
- m_currentToken.addViewSourceChar(ch);
- src.advance(m_lineNumber);
- }
- if (src.isEmpty())
- break;
-
- searchCount = 0; // Stop looking for '<!--' sequence
- state.setTagState(NoTag);
- tquote = NoQuote;
-
- if (*src != '<')
- src.advance(m_lineNumber);
-
- if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
- m_cBufferPos = cBufferPos;
- return state;
- }
-
- AtomicString tagName = m_currentToken.tagName;
-
- // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
- // compatibility.
- bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
- bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
- if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {
- Attribute* a = 0;
- m_scriptTagSrcAttrValue = String();
- m_scriptTagCharsetAttrValue = String();
- if (m_currentToken.attrs && !m_fragment) {
- if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {
- if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
- m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();
- }
- }
- }
-
- RefPtr<Node> n = processToken();
- m_cBufferPos = cBufferPos;
- if (n || inViewSourceMode()) {
- if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
- if (beginTag)
- state.setDiscardLF(true); // Discard the first LF after we open a pre.
- } else if (tagName == scriptTag) {
- ASSERT(!m_scriptNode);
- m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
- if (m_scriptNode)
- m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
- if (beginTag) {
- m_searchStopper = scriptEnd;
- m_searchStopperLength = 8;
- state.setInScript(true);
- state = parseSpecial(src, state);
- } else if (isSelfClosingScript) { // Handle <script src="foo"/>
- state.setInScript(true);
- state = scriptHandler(state);
- }
- } else if (tagName == styleTag) {
- if (beginTag) {
- m_searchStopper = styleEnd;
- m_searchStopperLength = 7;
- state.setInStyle(true);
- state = parseSpecial(src, state);
- }
- } else if (tagName == textareaTag) {
- if (beginTag) {
- m_searchStopper = textareaEnd;
- m_searchStopperLength = 10;
- state.setInTextArea(true);
- state = parseSpecial(src, state);
- }
- } else if (tagName == titleTag) {
- if (beginTag) {
- m_searchStopper = titleEnd;
- m_searchStopperLength = 7;
- State savedState = state;
- SegmentedString savedSrc = src;
- long savedLineno = m_lineNumber;
- state.setInTitle(true);
- state = parseSpecial(src, state);
- if (state.inTitle() && src.isEmpty()) {
- // We just ate the rest of the document as the title #text node!
- // Reset the state then retokenize without special title handling.
- // Let the parser clean up the missing </title> tag.
- // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
- // at the end of the document unless m_noMoreData is also true. We need
- // to detect this case elsewhere, and save the state somewhere other
- // than a local variable.
- state = savedState;
- src = savedSrc;
- m_lineNumber = savedLineno;
- m_scriptCodeSize = 0;
- }
- }
- } else if (tagName == xmpTag) {
- if (beginTag) {
- m_searchStopper = xmpEnd;
- m_searchStopperLength = 5;
- state.setInXmp(true);
- state = parseSpecial(src, state);
- }
- } else if (tagName == iframeTag) {
- if (beginTag) {
- m_searchStopper = iframeEnd;
- m_searchStopperLength = 8;
- state.setInIFrame(true);
- state = parseSpecial(src, state);
- }
- }
- }
- if (tagName == plaintextTag)
- state.setInPlainText(beginTag);
- return state; // Finished parsing tag!
- }
- } // end switch
- }
- m_cBufferPos = cBufferPos;
- return state;
-}
-
-inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
-{
- // We don't want to be checking elapsed time with every character, so we only check after we've
- // processed a certain number of characters.
- bool allowedYield = state.allowYield();
- state.setAllowYield(false);
- if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
- processedCount = 0;
- if (currentTime() - startTime > m_tokenizerTimeDelay) {
- /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
- load, but this hurts overall performance on slower machines. For now turn this
- off.
- || (!m_doc->haveStylesheetsLoaded() &&
- (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
- // Schedule the timer to keep processing as soon as possible.
- m_timer.startOneShot(0);
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (currentTime() - startTime > m_tokenizerTimeDelay)
- printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
-#endif
- return false;
- }
- }
-
- processedCount++;
- return true;
-}
-
-bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
-{
- if (!m_buffer)
- return false;
-
- if (m_parserStopped)
- return false;
-
- SegmentedString source(str);
- if (m_executingScript)
- source.setExcludeLineNumbers();
-
- if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
- // don't parse; we will do this later
- if (m_currentPrependingSrc)
- m_currentPrependingSrc->append(source);
- else {
- m_pendingSrc.append(source);
-#if PRELOAD_SCANNER_ENABLED
- if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
- m_preloadScanner->write(source);
-#endif
- }
- return false;
- }
-
-#if PRELOAD_SCANNER_ENABLED
- if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
- m_preloadScanner->end();
-#endif
-
- if (!m_src.isEmpty())
- m_src.append(source);
- else
- setSrc(source);
-
- // Once a timer is set, it has control of when the tokenizer continues.
- if (m_timer.isActive())
- return false;
-
- bool wasInWrite = m_inWrite;
- m_inWrite = true;
-
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("Beginning write at time %d\n", m_doc->elapsedTime());
-#endif
-
- int processedCount = 0;
- double startTime = currentTime();
-
- Frame* frame = m_doc->frame();
-
- State state = m_state;
-
- while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
- if (!continueProcessing(processedCount, startTime, state))
- break;
-
- // do we need to enlarge the buffer?
- checkBuffer();
-
- UChar cc = *m_src;
-
- bool wasSkipLF = state.skipLF();
- if (wasSkipLF)
- state.setSkipLF(false);
-
- if (wasSkipLF && (cc == '\n'))
- m_src.advance();
- else if (state.needsSpecialWriteHandling()) {
- // it's important to keep needsSpecialWriteHandling with the flags this block tests
- if (state.hasEntityState())
- state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
- else if (state.inPlainText())
- state = parseText(m_src, state);
- else if (state.inAnySpecial())
- state = parseSpecial(m_src, state);
- else if (state.inComment())
- state = parseComment(m_src, state);
- else if (state.inDoctype())
- state = parseDoctype(m_src, state);
- else if (state.inServer())
- state = parseServer(m_src, state);
- else if (state.inProcessingInstruction())
- state = parseProcessingInstruction(m_src, state);
- else if (state.hasTagState())
- state = parseTag(m_src, state);
- else if (state.startTag()) {
- state.setStartTag(false);
-
- switch(cc) {
- case '/':
- break;
- case '!': {
- // <!-- comment --> or <!DOCTYPE ...>
- searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
- m_doctypeSearchCount = 1;
- break;
- }
- case '?': {
- // xml processing instruction
- state.setInProcessingInstruction(true);
- tquote = NoQuote;
- state = parseProcessingInstruction(m_src, state);
- continue;
-
- break;
- }
- case '%':
- if (!m_brokenServer) {
- // <% server stuff, handle as comment %>
- state.setInServer(true);
- tquote = NoQuote;
- state = parseServer(m_src, state);
- continue;
- }
- // else fall through
- default: {
- if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
- // Start of a Start-Tag
- } else {
- // Invalid tag
- // Add as is
- *m_dest = '<';
- m_dest++;
- continue;
- }
- }
- }; // end case
-
- processToken();
-
- m_cBufferPos = 0;
- state.setTagState(TagName);
- state = parseTag(m_src, state);
- }
- } else if (cc == '&' && !m_src.escaped()) {
- m_src.advancePastNonNewline();
- state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
- } else if (cc == '<' && !m_src.escaped()) {
- m_currentTagStartLineNumber = m_lineNumber;
- m_src.advancePastNonNewline();
- state.setStartTag(true);
- state.setDiscardLF(false);
- } else if (cc == '\n' || cc == '\r') {
- if (state.discardLF())
- // Ignore this LF
- state.setDiscardLF(false); // We have discarded 1 LF
- else {
- // Process this LF
- *m_dest++ = '\n';
- if (cc == '\r' && !m_src.excludeLineNumbers())
- m_lineNumber++;
- }
-
- /* Check for MS-DOS CRLF sequence */
- if (cc == '\r')
- state.setSkipLF(true);
- m_src.advance(m_lineNumber);
- } else {
- state.setDiscardLF(false);
- *m_dest++ = cc;
- m_src.advancePastNonNewline();
- }
- }
-
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("Ending write at time %d\n", m_doc->elapsedTime());
-#endif
-
- m_inWrite = wasInWrite;
-
- m_state = state;
-
- if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
- end(); // this actually causes us to be deleted
- return true;
- }
- return false;
-}
-
-void HTMLTokenizer::stopParsing()
-{
- Tokenizer::stopParsing();
- m_timer.stop();
-
- // The part needs to know that the tokenizer has finished with its data,
- // regardless of whether it happened naturally or due to manual intervention.
- if (!m_fragment && m_doc->frame())
- m_doc->frame()->loader()->tokenizerProcessedData();
-}
-
-bool HTMLTokenizer::processingData() const
-{
- return m_timer.isActive() || m_inWrite;
-}
-
-void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
-{
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
-#endif
-
- if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
- // Restart the timer and let layout win. This is basically a way of ensuring that the layout
- // timer has higher priority than our timer.
- m_timer.startOneShot(0);
- return;
- }
-
- // Invoke write() as though more data came in. This might cause us to get deleted.
- write(SegmentedString(), true);
-}
-
-void HTMLTokenizer::end()
-{
- ASSERT(!m_timer.isActive());
- m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
-
- if (m_buffer) {
- // parseTag is using the buffer for different matters
- if (!m_state.hasTagState())
- processToken();
-
- fastFree(m_scriptCode);
- m_scriptCode = 0;
- m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
-
- fastFree(m_buffer);
- m_buffer = 0;
- }
-
- if (!inViewSourceMode())
- m_parser->finished();
- else
- m_doc->finishedParsing();
-}
-
-void HTMLTokenizer::finish()
-{
- // do this as long as we don't find matching comment ends
- while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
- // we've found an unmatched comment start
- if (m_state.inComment())
- m_brokenComments = true;
- else
- m_brokenServer = true;
- checkScriptBuffer();
- m_scriptCode[m_scriptCodeSize] = 0;
- m_scriptCode[m_scriptCodeSize + 1] = 0;
- int pos;
- String food;
- if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
- food = String(m_scriptCode, m_scriptCodeSize);
- else if (m_state.inServer()) {
- food = "<";
- food.append(m_scriptCode, m_scriptCodeSize);
- } else {
- pos = find(m_scriptCode, m_scriptCodeSize, '>');
- food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
- }
- fastFree(m_scriptCode);
- m_scriptCode = 0;
- m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
- m_state.setInComment(false);
- m_state.setInServer(false);
- if (!food.isEmpty())
- write(food, true);
- }
- // this indicates we will not receive any more data... but if we are waiting on
- // an external script to load, we can't finish parsing until that is done
- m_noMoreData = true;
- if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
- end(); // this actually causes us to be deleted
-}
-
-PassRefPtr<Node> HTMLTokenizer::processToken()
-{
- ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
- if (scriptController && scriptController->isEnabled())
- // FIXME: Why isn't this m_currentScriptTagStartLineNumber? I suspect this is wrong.
- scriptController->setEventHandlerLineno(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
- if (m_dest > m_buffer) {
- m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
- if (m_currentToken.tagName != commentAtom)
- m_currentToken.tagName = textAtom;
- } else if (m_currentToken.tagName == nullAtom) {
- m_currentToken.reset();
- if (scriptController)
- scriptController->setEventHandlerLineno(m_lineNumber + 1); // Script line numbers are 1 based.
- return 0;
- }
-
- m_dest = m_buffer;
-
- RefPtr<Node> n;
-
- if (!m_parserStopped) {
- if (NamedMappedAttrMap* map = m_currentToken.attrs.get())
- map->shrinkToLength();
- if (inViewSourceMode())
- static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
- else
- // pass the token over to the parser, the parser DOES NOT delete the token
- n = m_parser->parseToken(&m_currentToken);
- }
- m_currentToken.reset();
- if (scriptController)
- scriptController->setEventHandlerLineno(0);
-
- return n.release();
-}
-
-void HTMLTokenizer::processDoctypeToken()
-{
- if (inViewSourceMode())
- static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
- else
- m_parser->parseDoctypeToken(&m_doctypeToken);
-}
-
-HTMLTokenizer::~HTMLTokenizer()
-{
- ASSERT(!m_inWrite);
- reset();
-}
-
-
-void HTMLTokenizer::enlargeBuffer(int len)
-{
- int newSize = max(m_bufferSize * 2, m_bufferSize + len);
- int oldOffset = m_dest - m_buffer;
- m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
- m_dest = m_buffer + oldOffset;
- m_bufferSize = newSize;
-}
-
-void HTMLTokenizer::enlargeScriptBuffer(int len)
-{
- int newSize = max(m_scriptCodeCapacity * 2, m_scriptCodeCapacity + len);
- m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
- m_scriptCodeCapacity = newSize;
-}
-
-void HTMLTokenizer::executeScriptsWaitingForStylesheets()
-{
- ASSERT(m_doc->haveStylesheetsLoaded());
-
- if (m_hasScriptsWaitingForStylesheets)
- notifyFinished(0);
-}
-
-void HTMLTokenizer::notifyFinished(CachedResource*)
-{
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("script loaded at %d\n", m_doc->elapsedTime());
-#endif
-
- ASSERT(!m_pendingScripts.isEmpty());
-
- // Make external scripts wait for external stylesheets.
- // FIXME: This needs to be done for inline scripts too.
- m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
- if (m_hasScriptsWaitingForStylesheets)
- return;
-
- bool finished = false;
- while (!finished && m_pendingScripts.first()->isLoaded()) {
- CachedScript* cs = m_pendingScripts.first().get();
- m_pendingScripts.removeFirst();
- ASSERT(cache()->disabled() || cs->accessCount() > 0);
-
- setSrc(SegmentedString());
-
- // make sure we forget about the script before we execute the new one
- // infinite recursion might happen otherwise
- ScriptSourceCode sourceCode(cs);
- bool errorOccurred = cs->errorOccurred();
- cs->removeClient(this);
-
- RefPtr<Node> n = m_scriptNode.release();
-
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("external script beginning execution at %d\n", m_doc->elapsedTime());
-#endif
-
- if (errorOccurred)
- n->dispatchEventForType(eventNames().errorEvent, true, false);
- else {
- if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
- m_state = scriptExecution(sourceCode, m_state);
- n->dispatchEventForType(eventNames().loadEvent, false, false);
- }
-
- // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
- // call above, so test afterwards.
- finished = m_pendingScripts.isEmpty();
- if (finished) {
- ASSERT(!m_hasScriptsWaitingForStylesheets);
- m_state.setLoadingExtScript(false);
-#ifdef INSTRUMENT_LAYOUT_SCHEDULING
- if (!m_doc->ownerElement())
- printf("external script finished execution at %d\n", m_doc->elapsedTime());
-#endif
- } else if (m_hasScriptsWaitingForStylesheets) {
- // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
- // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
- finished = true;
- }
-
- // 'm_requestingScript' is true when we are called synchronously from
- // scriptHandler(). In that case scriptHandler() will take care
- // of m_pendingSrc.
- if (!m_requestingScript) {
- SegmentedString rest = m_pendingSrc;
- m_pendingSrc.clear();
- write(rest, false);
- // we might be deleted at this point, do not access any members.
- }
- }
-}
-
-bool HTMLTokenizer::isWaitingForScripts() const
-{
- return m_state.loadingExtScript();
-}
-
-void HTMLTokenizer::setSrc(const SegmentedString& source)
-{
- m_src = source;
-}
-
-void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
-{
- HTMLTokenizer tok(fragment);
- tok.setForceSynchronous(true);
- tok.write(source, true);
- tok.finish();
- ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
-}
-
-UChar decodeNamedEntity(const char* name)
-{
- const Entity* e = findEntity(name, strlen(name));
- return e ? e->code : 0;
-}
-
-}
-
-
+/*
+ Copyright (C) 1997 Martin Jones (mjones@kde.org)
+ (C) 1997 Torben Weis (weis@kde.org)
+ (C) 1998 Waldo Bastian (bastian@kde.org)
+ (C) 1999 Lars Knoll (knoll@kde.org)
+ (C) 1999 Antti Koivisto (koivisto@kde.org)
+ (C) 2001 Dirk Mueller (mueller@kde.org)
+ Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#include "config.h"
+#include "HTMLTokenizer.h"
+
+#include "CSSHelper.h"
+#include "Cache.h"
+#include "CachedScript.h"
+#include "DocLoader.h"
+#include "DocumentFragment.h"
+#include "EventNames.h"
+#include "Frame.h"
+#include "FrameLoader.h"
+#include "FrameView.h"
+#include "HTMLElement.h"
+#include "HTMLNames.h"
+#include "HTMLParser.h"
+#include "HTMLScriptElement.h"
+#include "HTMLViewSourceDocument.h"
+#include "Page.h"
+#include "PreloadScanner.h"
+#include "ScriptController.h"
+#include "ScriptSourceCode.h"
+#include "ScriptValue.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/CurrentTime.h>
+
+#include "HTMLEntityNames.c"
+
+#define PRELOAD_SCANNER_ENABLED 1
+// #define INSTRUMENT_LAYOUT_SCHEDULING 1
+
+using namespace WTF;
+using namespace std;
+
+namespace WebCore {
+
+using namespace HTMLNames;
+
+#if MOBILE
+// The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.
+// This value is used to define how many characters the tokenizer will process before
+// yeilding control.
+static const int defaultTokenizerChunkSize = 256;
+#else
+static const int defaultTokenizerChunkSize = 4096;
+#endif
+
+#if MOBILE
+// As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise
+// it will take way to long to load a page.
+static const double defaultTokenizerTimeDelay = 0.300;
+#else
+// FIXME: We would like this constant to be 200ms.
+// Yielding more aggressively results in increased responsiveness and better incremental rendering.
+// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
+static const double defaultTokenizerTimeDelay = 0.500;
+#endif
+
+static const char commentStart [] = "<!--";
+static const char doctypeStart [] = "<!doctype";
+static const char publicStart [] = "public";
+static const char systemStart [] = "system";
+static const char scriptEnd [] = "</script";
+static const char xmpEnd [] = "</xmp";
+static const char styleEnd [] = "</style";
+static const char textareaEnd [] = "</textarea";
+static const char titleEnd [] = "</title";
+static const char iframeEnd [] = "</iframe";
+
+// Full support for MS Windows extensions to Latin-1.
+// Technically these extensions should only be activated for pages
+// marked "windows-1252" or "cp1252", but
+// in the standard Microsoft way, these extensions infect hundreds of thousands
+// of web pages. Note that people with non-latin-1 Microsoft extensions
+// are SOL.
+//
+// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
+// http://www.bbsinc.com/iso8859.html
+// http://www.obviously.com/
+//
+// There may be better equivalents
+
+// We only need this for entities. For non-entity text, we handle this in the text encoding.
+
+static const UChar windowsLatin1ExtensionArray[32] = {
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
+};
+
+static inline UChar fixUpChar(UChar c)
+{
+ if ((c & ~0x1F) != 0x0080)
+ return c;
+ return windowsLatin1ExtensionArray[c - 0x80];
+}
+
+static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
+{
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char c1 = s1[i];
+ unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
+ UChar c2 = s2[i];
+ if (c1 != c2 && uc1 != c2)
+ return false;
+ }
+ return true;
+}
+
+inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
+{
+ if (!attrName.isEmpty()) {
+ ASSERT(!attrName.contains('/'));
+ RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);
+ if (!attrs) {
+ attrs = NamedMappedAttrMap::create();
+ attrs->reserveInitialCapacity(10);
+ }
+ attrs->insertAttribute(a.release(), viewSourceMode);
+ }
+
+ attrName = emptyAtom;
+}
+
+// ----------------------------------------------------------------------------
+
+HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)
+ : Tokenizer()
+ , m_buffer(0)
+ , m_scriptCode(0)
+ , m_scriptCodeSize(0)
+ , m_scriptCodeCapacity(0)
+ , m_scriptCodeResync(0)
+ , m_executingScript(0)
+ , m_requestingScript(false)
+ , m_hasScriptsWaitingForStylesheets(false)
+ , m_timer(this, &HTMLTokenizer::timerFired)
+ , m_doc(doc)
+ , m_parser(new HTMLParser(doc, reportErrors))
+ , m_inWrite(false)
+ , m_fragment(false)
+{
+ begin();
+}
+
+HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
+ : Tokenizer(true)
+ , m_buffer(0)
+ , m_scriptCode(0)
+ , m_scriptCodeSize(0)
+ , m_scriptCodeCapacity(0)
+ , m_scriptCodeResync(0)
+ , m_executingScript(0)
+ , m_requestingScript(false)
+ , m_hasScriptsWaitingForStylesheets(false)
+ , m_timer(this, &HTMLTokenizer::timerFired)
+ , m_doc(doc)
+ , m_parser(0)
+ , m_inWrite(false)
+ , m_fragment(false)
+{
+ begin();
+}
+
+HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
+ : m_buffer(0)
+ , m_scriptCode(0)
+ , m_scriptCodeSize(0)
+ , m_scriptCodeCapacity(0)
+ , m_scriptCodeResync(0)
+ , m_executingScript(0)
+ , m_requestingScript(false)
+ , m_hasScriptsWaitingForStylesheets(false)
+ , m_timer(this, &HTMLTokenizer::timerFired)
+ , m_doc(frag->document())
+ , m_parser(new HTMLParser(frag))
+ , m_inWrite(false)
+ , m_fragment(true)
+{
+ begin();
+}
+
+void HTMLTokenizer::reset()
+{
+ ASSERT(m_executingScript == 0);
+
+ while (!m_pendingScripts.isEmpty()) {
+ CachedScript* cs = m_pendingScripts.first().get();
+ m_pendingScripts.removeFirst();
+ ASSERT(cache()->disabled() || cs->accessCount() > 0);
+ cs->removeClient(this);
+ }
+
+ fastFree(m_buffer);
+ m_buffer = m_dest = 0;
+ m_bufferSize = 0;
+
+ fastFree(m_scriptCode);
+ m_scriptCode = 0;
+ m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+
+ m_timer.stop();
+ m_state.setAllowYield(false);
+ m_state.setForceSynchronous(false);
+
+ m_currentToken.reset();
+ m_doctypeToken.reset();
+ m_doctypeSearchCount = 0;
+ m_doctypeSecondarySearchCount = 0;
+ m_hasScriptsWaitingForStylesheets = false;
+}
+
+void HTMLTokenizer::begin()
+{
+ m_executingScript = 0;
+ m_requestingScript = false;
+ m_hasScriptsWaitingForStylesheets = false;
+ m_state.setLoadingExtScript(false);
+ reset();
+ m_bufferSize = 254;
+ m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
+ m_dest = m_buffer;
+ tquote = NoQuote;
+ searchCount = 0;
+ m_state.setEntityState(NoEntity);
+ m_scriptTagSrcAttrValue = String();
+ m_pendingSrc.clear();
+ m_currentPrependingSrc = 0;
+ m_noMoreData = false;
+ m_brokenComments = false;
+ m_brokenServer = false;
+ m_lineNumber = 0;
+ m_currentScriptTagStartLineNumber = 0;
+ m_currentTagStartLineNumber = 0;
+ m_state.setForceSynchronous(false);
+
+ Page* page = m_doc->page();
+ if (page && page->hasCustomHTMLTokenizerTimeDelay())
+ m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
+ else
+ m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
+
+ if (page && page->hasCustomHTMLTokenizerChunkSize())
+ m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
+ else
+ m_tokenizerChunkSize = defaultTokenizerChunkSize;
+}
+
+void HTMLTokenizer::setForceSynchronous(bool force)
+{
+ m_state.setForceSynchronous(force);
+}
+
+HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
+{
+ // This function adds the listing 'list' as
+ // preformatted text-tokens to the token-collection
+ while (!list.isEmpty()) {
+ if (state.skipLF()) {
+ state.setSkipLF(false);
+ if (*list == '\n') {
+ list.advance();
+ continue;
+ }
+ }
+
+ checkBuffer();
+
+ if (*list == '\n' || *list == '\r') {
+ if (state.discardLF())
+ // Ignore this LF
+ state.setDiscardLF(false); // We have discarded 1 LF
+ else
+ *m_dest++ = '\n';
+
+ /* Check for MS-DOS CRLF sequence */
+ if (*list == '\r')
+ state.setSkipLF(true);
+
+ list.advance();
+ } else {
+ state.setDiscardLF(false);
+ *m_dest++ = *list;
+ list.advance();
+ }
+ }
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString& src, State state)
+{
+ ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
+ ASSERT(!state.hasTagState());
+ ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );
+ if (state.inScript() && !m_currentScriptTagStartLineNumber)
+ m_currentScriptTagStartLineNumber = m_lineNumber;
+
+ if (state.inComment())
+ state = parseComment(src, state);
+
+ int lastDecodedEntityPosition = -1;
+ while (!src.isEmpty()) {
+ checkScriptBuffer();
+ UChar ch = *src;
+
+ if (!m_scriptCodeResync && !m_brokenComments &&
+ !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
+ m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
+ (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
+ state.setInComment(true);
+ state = parseComment(src, state);
+ continue;
+ }
+ if (m_scriptCodeResync && !tquote && ch == '>') {
+ src.advancePastNonNewline();
+ m_scriptCodeSize = m_scriptCodeResync - 1;
+ m_scriptCodeResync = 0;
+ m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
+ if (state.inScript())
+ state = scriptHandler(state);
+ else {
+ state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
+ processToken();
+ if (state.inStyle()) {
+ m_currentToken.tagName = styleTag.localName();
+ m_currentToken.beginTag = false;
+ } else if (state.inTextArea()) {
+ m_currentToken.tagName = textareaTag.localName();
+ m_currentToken.beginTag = false;
+ } else if (state.inTitle()) {
+ m_currentToken.tagName = titleTag.localName();
+ m_currentToken.beginTag = false;
+ } else if (state.inXmp()) {
+ m_currentToken.tagName = xmpTag.localName();
+ m_currentToken.beginTag = false;
+ } else if (state.inIFrame()) {
+ m_currentToken.tagName = iframeTag.localName();
+ m_currentToken.beginTag = false;
+ }
+ processToken();
+ state.setInStyle(false);
+ state.setInScript(false);
+ state.setInTextArea(false);
+ state.setInTitle(false);
+ state.setInXmp(false);
+ state.setInIFrame(false);
+ tquote = NoQuote;
+ m_scriptCodeSize = m_scriptCodeResync = 0;
+ }
+ return state;
+ }
+ // possible end of tagname, lets check.
+ if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
+ m_scriptCodeSize >= m_searchStopperLength &&
+ tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
+ (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
+ m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
+ tquote = NoQuote;
+ continue;
+ }
+ if (m_scriptCodeResync && !state.escaped()) {
+ if (ch == '\"')
+ tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
+ else if (ch == '\'')
+ tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
+ else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
+ tquote = NoQuote;
+ }
+ state.setEscaped(!state.escaped() && ch == '\\');
+ if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
+ UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
+ src.advancePastNonNewline();
+ state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
+ if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
+ lastDecodedEntityPosition = m_scriptCodeSize;
+ else
+ m_scriptCodeSize = scriptCodeDest - m_scriptCode;
+ } else {
+ m_scriptCode[m_scriptCodeSize++] = ch;
+ src.advance(m_lineNumber);
+ }
+ }
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
+{
+ // We are inside a <script>
+ bool doScriptExec = false;
+ int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
+
+ // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
+ m_currentScriptTagStartLineNumber = 0;
+
+ // (Bugzilla 3837) Scripts following a frameset element should not execute or,
+ // in the case of extern scripts, even load.
+ bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));
+
+ CachedScript* cs = 0;
+ // don't load external scripts for standalone documents (for now)
+ if (!inViewSourceMode()) {
+ if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {
+ // forget what we just got; load from src url instead
+ if (!m_parser->skipMode() && !followingFrameset) {
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("Requesting script at time %d\n", m_doc->elapsedTime());
+#endif
+ // The parser might have been stopped by for example a window.close call in an earlier script.
+ // If so, we don't want to load scripts.
+ if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
+ m_pendingScripts.append(cs);
+ else
+ m_scriptNode = 0;
+ } else
+ m_scriptNode = 0;
+ m_scriptTagSrcAttrValue = String();
+ } else {
+ // Parse m_scriptCode containing <script> info
+#if USE(LOW_BANDWIDTH_DISPLAY)
+ if (m_doc->inLowBandwidthDisplay()) {
+ // ideal solution is only skipping internal JavaScript if there is external JavaScript.
+ // but internal JavaScript can use document.write() to create an external JavaScript,
+ // so we have to skip internal JavaScript all the time.
+ m_doc->frame()->loader()->needToSwitchOutLowBandwidthDisplay();
+ doScriptExec = false;
+ } else
+#endif
+ doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
+ m_scriptNode = 0;
+ }
+ }
+
+ state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
+ RefPtr<Node> node = processToken();
+ String scriptString = node ? node->textContent() : "";
+ m_currentToken.tagName = scriptTag.localName();
+ m_currentToken.beginTag = false;
+ processToken();
+
+ state.setInScript(false);
+ m_scriptCodeSize = m_scriptCodeResync = 0;
+
+ // FIXME: The script should be syntax highlighted.
+ if (inViewSourceMode())
+ return state;
+
+ SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
+ SegmentedString prependingSrc;
+ m_currentPrependingSrc = &prependingSrc;
+
+ if (!m_parser->skipMode() && !followingFrameset) {
+ if (cs) {
+ if (savedPrependingSrc)
+ savedPrependingSrc->append(m_src);
+ else
+ m_pendingSrc.prepend(m_src);
+ setSrc(SegmentedString());
+
+ // the ref() call below may call notifyFinished if the script is already in cache,
+ // and that mucks with the state directly, so we must write it back to the object.
+ m_state = state;
+ bool savedRequestingScript = m_requestingScript;
+ m_requestingScript = true;
+ cs->addClient(this);
+ m_requestingScript = savedRequestingScript;
+ state = m_state;
+ // will be 0 if script was already loaded and ref() executed it
+ if (!m_pendingScripts.isEmpty())
+ state.setLoadingExtScript(true);
+ } else if (!m_fragment && doScriptExec) {
+ if (!m_executingScript)
+ m_pendingSrc.prepend(m_src);
+ else
+ prependingSrc = m_src;
+ setSrc(SegmentedString());
+ state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);
+ }
+ }
+
+ if (!m_executingScript && !state.loadingExtScript()) {
+ m_src.append(m_pendingSrc);
+ m_pendingSrc.clear();
+ } else if (!prependingSrc.isEmpty()) {
+ // restore first so that the write appends in the right place
+ // (does not hurt to do it again below)
+ m_currentPrependingSrc = savedPrependingSrc;
+
+ // we need to do this slightly modified bit of one of the write() cases
+ // because we want to prepend to m_pendingSrc rather than appending
+ // if there's no previous prependingSrc
+ if (!m_pendingScripts.isEmpty()) {
+ if (m_currentPrependingSrc)
+ m_currentPrependingSrc->append(prependingSrc);
+ else
+ m_pendingSrc.prepend(prependingSrc);
+ } else {
+ m_state = state;
+ write(prependingSrc, false);
+ state = m_state;
+ }
+ }
+
+#if PRELOAD_SCANNER_ENABLED
+ if (!m_pendingScripts.isEmpty() && !m_executingScript) {
+ if (!m_preloadScanner)
+ m_preloadScanner.set(new PreloadScanner(m_doc));
+ if (!m_preloadScanner->inProgress()) {
+ m_preloadScanner->begin();
+ m_preloadScanner->write(m_pendingSrc);
+ }
+ }
+#endif
+ m_currentPrependingSrc = savedPrependingSrc;
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state)
+{
+ if (m_fragment || !m_doc->frame())
+ return state;
+ m_executingScript++;
+
+ SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
+ SegmentedString prependingSrc;
+ m_currentPrependingSrc = &prependingSrc;
+
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("beginning script execution at %d\n", m_doc->elapsedTime());
+#endif
+
+ m_state = state;
+ m_doc->frame()->loader()->executeScript(sourceCode);
+ state = m_state;
+
+ state.setAllowYield(true);
+
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("ending script execution at %d\n", m_doc->elapsedTime());
+#endif
+
+ m_executingScript--;
+
+ if (!m_executingScript && !state.loadingExtScript()) {
+ m_pendingSrc.prepend(prependingSrc);
+ m_src.append(m_pendingSrc);
+ m_pendingSrc.clear();
+ } else if (!prependingSrc.isEmpty()) {
+ // restore first so that the write appends in the right place
+ // (does not hurt to do it again below)
+ m_currentPrependingSrc = savedPrependingSrc;
+
+ // we need to do this slightly modified bit of one of the write() cases
+ // because we want to prepend to m_pendingSrc rather than appending
+ // if there's no previous prependingSrc
+ if (!m_pendingScripts.isEmpty()) {
+ if (m_currentPrependingSrc)
+ m_currentPrependingSrc->append(prependingSrc);
+ else
+ m_pendingSrc.prepend(prependingSrc);
+
+#if PRELOAD_SCANNER_ENABLED
+ // We are stuck waiting for another script. Lets check the source that
+ // was just document.write()n for anything to load.
+ PreloadScanner documentWritePreloadScanner(m_doc);
+ documentWritePreloadScanner.begin();
+ documentWritePreloadScanner.write(prependingSrc);
+ documentWritePreloadScanner.end();
+#endif
+ } else {
+ m_state = state;
+ write(prependingSrc, false);
+ state = m_state;
+ }
+ }
+
+ m_currentPrependingSrc = savedPrependingSrc;
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state)
+{
+ // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
+ checkScriptBuffer(src.length());
+ while (!src.isEmpty()) {
+ UChar ch = *src;
+ m_scriptCode[m_scriptCodeSize++] = ch;
+ if (ch == '>') {
+ bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
+ int endCharsCount = 1; // start off with one for the '>' character
+ if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
+ endCharsCount = 3;
+ } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
+ m_scriptCode[m_scriptCodeSize-2] == '!') {
+ // Other browsers will accept --!> as a close comment, even though it's
+ // not technically valid.
+ endCharsCount = 4;
+ }
+ if (handleBrokenComments || endCharsCount > 1) {
+ src.advancePastNonNewline();
+ if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
+ checkScriptBuffer();
+ m_scriptCode[m_scriptCodeSize] = 0;
+ m_scriptCode[m_scriptCodeSize + 1] = 0;
+ m_currentToken.tagName = commentAtom;
+ m_currentToken.beginTag = true;
+ state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
+ processToken();
+ m_currentToken.tagName = commentAtom;
+ m_currentToken.beginTag = false;
+ processToken();
+ m_scriptCodeSize = 0;
+ }
+ state.setInComment(false);
+ return state; // Finished parsing comment
+ }
+ }
+ src.advance(m_lineNumber);
+ }
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
+{
+ checkScriptBuffer(src.length());
+ while (!src.isEmpty()) {
+ UChar ch = *src;
+ m_scriptCode[m_scriptCodeSize++] = ch;
+ if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
+ src.advancePastNonNewline();
+ state.setInServer(false);
+ m_scriptCodeSize = 0;
+ return state; // Finished parsing server include
+ }
+ src.advance(m_lineNumber);
+ }
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state)
+{
+ UChar oldchar = 0;
+ while (!src.isEmpty()) {
+ UChar chbegin = *src;
+ if (chbegin == '\'')
+ tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
+ else if (chbegin == '\"')
+ tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
+ // Look for '?>'
+ // Some crappy sites omit the "?" before it, so
+ // we look for an unquoted '>' instead. (IE compatible)
+ else if (chbegin == '>' && (!tquote || oldchar == '?')) {
+ // We got a '?>' sequence
+ state.setInProcessingInstruction(false);
+ src.advancePastNonNewline();
+ state.setDiscardLF(true);
+ return state; // Finished parsing comment!
+ }
+ src.advance(m_lineNumber);
+ oldchar = chbegin;
+ }
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state)
+{
+ while (!src.isEmpty()) {
+ UChar cc = *src;
+
+ if (state.skipLF()) {
+ state.setSkipLF(false);
+ if (cc == '\n') {
+ src.advancePastNewline(m_lineNumber);
+ continue;
+ }
+ }
+
+ // do we need to enlarge the buffer?
+ checkBuffer();
+
+ if (cc == '\r') {
+ state.setSkipLF(true);
+ *m_dest++ = '\n';
+ } else
+ *m_dest++ = cc;
+ src.advance(m_lineNumber);
+ }
+
+ return state;
+}
+
+
+HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
+{
+ if (start) {
+ cBufferPos = 0;
+ state.setEntityState(SearchEntity);
+ EntityUnicodeValue = 0;
+ }
+
+ while(!src.isEmpty()) {
+ UChar cc = *src;
+ switch(state.entityState()) {
+ case NoEntity:
+ ASSERT(state.entityState() != NoEntity);
+ return state;
+
+ case SearchEntity:
+ if (cc == '#') {
+ m_cBuffer[cBufferPos++] = cc;
+ src.advancePastNonNewline();
+ state.setEntityState(NumericSearch);
+ } else
+ state.setEntityState(EntityName);
+ break;
+
+ case NumericSearch:
+ if (cc == 'x' || cc == 'X') {
+ m_cBuffer[cBufferPos++] = cc;
+ src.advancePastNonNewline();
+ state.setEntityState(Hexadecimal);
+ } else if (cc >= '0' && cc <= '9')
+ state.setEntityState(Decimal);
+ else
+ state.setEntityState(SearchSemicolon);
+ break;
+
+ case Hexadecimal: {
+ int ll = min(src.length(), 10 - cBufferPos);
+ while (ll--) {
+ cc = *src;
+ if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
+ state.setEntityState(SearchSemicolon);
+ break;
+ }
+ int digit;
+ if (cc < 'A')
+ digit = cc - '0';
+ else
+ digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
+ EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
+ m_cBuffer[cBufferPos++] = cc;
+ src.advancePastNonNewline();
+ }
+ if (cBufferPos == 10)
+ state.setEntityState(SearchSemicolon);
+ break;
+ }
+ case Decimal:
+ {
+ int ll = min(src.length(), 9-cBufferPos);
+ while(ll--) {
+ cc = *src;
+
+ if (!(cc >= '0' && cc <= '9')) {
+ state.setEntityState(SearchSemicolon);
+ break;
+ }
+
+ EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
+ m_cBuffer[cBufferPos++] = cc;
+ src.advancePastNonNewline();
+ }
+ if (cBufferPos == 9)
+ state.setEntityState(SearchSemicolon);
+ break;
+ }
+ case EntityName:
+ {
+ int ll = min(src.length(), 9-cBufferPos);
+ while(ll--) {
+ cc = *src;
+
+ if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
+ state.setEntityState(SearchSemicolon);
+ break;
+ }
+
+ m_cBuffer[cBufferPos++] = cc;
+ src.advancePastNonNewline();
+ }
+ if (cBufferPos == 9)
+ state.setEntityState(SearchSemicolon);
+ if (state.entityState() == SearchSemicolon) {
+ if(cBufferPos > 1) {
+ // Since the maximum length of entity name is 9,
+ // so a single char array which is allocated on
+ // the stack, its length is 10, should be OK.
+ // Also if we have an illegal character, we treat it
+ // as illegal entity name.
+ unsigned testedEntityNameLen = 0;
+ char tmpEntityNameBuffer[10];
+
+ ASSERT(cBufferPos < 10);
+ for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
+ if (m_cBuffer[testedEntityNameLen] > 0x7e)
+ break;
+ tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
+ }
+
+ const Entity *e;
+
+ if (testedEntityNameLen == cBufferPos)
+ e = findEntity(tmpEntityNameBuffer, cBufferPos);
+ else
+ e = 0;
+
+ if(e)
+ EntityUnicodeValue = e->code;
+
+ // be IE compatible
+ if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
+ EntityUnicodeValue = 0;
+ }
+ }
+ else
+ break;
+ }
+ case SearchSemicolon:
+ // Don't allow values that are more than 21 bits.
+ if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
+ if (!inViewSourceMode()) {
+ if (*src == ';')
+ src.advancePastNonNewline();
+ if (EntityUnicodeValue <= 0xFFFF) {
+ checkBuffer();
+ src.push(fixUpChar(EntityUnicodeValue));
+ } else {
+ // Convert to UTF-16, using surrogate code points.
+ checkBuffer(2);
+ src.push(U16_LEAD(EntityUnicodeValue));
+ src.push(U16_TRAIL(EntityUnicodeValue));
+ }
+ } else {
+ // FIXME: We should eventually colorize entities by sending them as a special token.
+ checkBuffer(11);
+ *dest++ = '&';
+ for (unsigned i = 0; i < cBufferPos; i++)
+ dest[i] = m_cBuffer[i];
+ dest += cBufferPos;
+ if (*src == ';') {
+ *dest++ = ';';
+ src.advancePastNonNewline();
+ }
+ }
+ } else {
+ checkBuffer(10);
+ // ignore the sequence, add it to the buffer as plaintext
+ *dest++ = '&';
+ for (unsigned i = 0; i < cBufferPos; i++)
+ dest[i] = m_cBuffer[i];
+ dest += cBufferPos;
+ }
+
+ state.setEntityState(NoEntity);
+ return state;
+ }
+ }
+
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state)
+{
+ ASSERT(state.inDoctype());
+ while (!src.isEmpty() && state.inDoctype()) {
+ UChar c = *src;
+ bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
+ switch (m_doctypeToken.state()) {
+ case DoctypeBegin: {
+ m_doctypeToken.setState(DoctypeBeforeName);
+ if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ }
+ case DoctypeBeforeName: {
+ if (c == '>') {
+ // Malformed. Just exit.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ if (inViewSourceMode())
+ processDoctypeToken();
+ } else if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else
+ m_doctypeToken.setState(DoctypeName);
+ break;
+ }
+ case DoctypeName: {
+ if (c == '>') {
+ // Valid doctype. Emit it.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ processDoctypeToken();
+ } else if (isWhitespace) {
+ m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
+ m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
+ m_doctypeToken.setState(DoctypeAfterName);
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else {
+ src.advancePastNonNewline();
+ m_doctypeToken.m_name.append(c);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ }
+ case DoctypeAfterName: {
+ if (c == '>') {
+ // Valid doctype. Emit it.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ processDoctypeToken();
+ } else if (!isWhitespace) {
+ src.advancePastNonNewline();
+ if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
+ m_doctypeSearchCount++;
+ if (m_doctypeSearchCount == 6)
+ // Found 'PUBLIC' sequence
+ m_doctypeToken.setState(DoctypeBeforePublicID);
+ } else if (m_doctypeSearchCount > 0) {
+ m_doctypeSearchCount = 0;
+ m_doctypeToken.setState(DoctypeBogus);
+ } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
+ m_doctypeSecondarySearchCount++;
+ if (m_doctypeSecondarySearchCount == 6)
+ // Found 'SYSTEM' sequence
+ m_doctypeToken.setState(DoctypeBeforeSystemID);
+ } else {
+ m_doctypeSecondarySearchCount = 0;
+ m_doctypeToken.setState(DoctypeBogus);
+ }
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else {
+ src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ }
+ case DoctypeBeforePublicID: {
+ if (c == '\"' || c == '\'') {
+ tquote = c == '\"' ? DoubleQuote : SingleQuote;
+ m_doctypeToken.setState(DoctypePublicID);
+ src.advancePastNonNewline();
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else if (c == '>') {
+ // Considered bogus. Don't process the doctype.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ if (inViewSourceMode())
+ processDoctypeToken();
+ } else if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else
+ m_doctypeToken.setState(DoctypeBogus);
+ break;
+ }
+ case DoctypePublicID: {
+ if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
+ src.advancePastNonNewline();
+ m_doctypeToken.setState(DoctypeAfterPublicID);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else if (c == '>') {
+ // Considered bogus. Don't process the doctype.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ if (inViewSourceMode())
+ processDoctypeToken();
+ } else {
+ m_doctypeToken.m_publicID.append(c);
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ }
+ case DoctypeAfterPublicID:
+ if (c == '\"' || c == '\'') {
+ tquote = c == '\"' ? DoubleQuote : SingleQuote;
+ m_doctypeToken.setState(DoctypeSystemID);
+ src.advancePastNonNewline();
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else if (c == '>') {
+ // Valid doctype. Emit it now.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ processDoctypeToken();
+ } else if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else
+ m_doctypeToken.setState(DoctypeBogus);
+ break;
+ case DoctypeBeforeSystemID:
+ if (c == '\"' || c == '\'') {
+ tquote = c == '\"' ? DoubleQuote : SingleQuote;
+ m_doctypeToken.setState(DoctypeSystemID);
+ src.advancePastNonNewline();
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else if (c == '>') {
+ // Considered bogus. Don't process the doctype.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ } else if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else
+ m_doctypeToken.setState(DoctypeBogus);
+ break;
+ case DoctypeSystemID:
+ if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
+ src.advancePastNonNewline();
+ m_doctypeToken.setState(DoctypeAfterSystemID);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else if (c == '>') {
+ // Considered bogus. Don't process the doctype.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ if (inViewSourceMode())
+ processDoctypeToken();
+ } else {
+ m_doctypeToken.m_systemID.append(c);
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ case DoctypeAfterSystemID:
+ if (c == '>') {
+ // Valid doctype. Emit it now.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ processDoctypeToken();
+ } else if (isWhitespace) {
+ src.advance(m_lineNumber);
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ } else
+ m_doctypeToken.setState(DoctypeBogus);
+ break;
+ case DoctypeBogus:
+ if (c == '>') {
+ // Done with the bogus doctype.
+ src.advancePastNonNewline();
+ state.setInDoctype(false);
+ if (inViewSourceMode())
+ processDoctypeToken();
+ } else {
+ src.advance(m_lineNumber); // Just keep scanning for '>'
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(c);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return state;
+}
+
+HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state)
+{
+ ASSERT(!state.hasEntityState());
+
+ unsigned cBufferPos = m_cBufferPos;
+
+ bool lastIsSlash = false;
+
+ while (!src.isEmpty()) {
+ checkBuffer();
+ switch(state.tagState()) {
+ case NoTag:
+ {
+ m_cBufferPos = cBufferPos;
+ return state;
+ }
+ case TagName:
+ {
+ if (searchCount > 0) {
+ if (*src == commentStart[searchCount]) {
+ searchCount++;
+ if (searchCount == 2)
+ m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
+ else
+ m_doctypeSearchCount = 0;
+ if (searchCount == 4) {
+ // Found '<!--' sequence
+ src.advancePastNonNewline();
+ m_dest = m_buffer; // ignore the previous part of this tag
+ state.setInComment(true);
+ state.setTagState(NoTag);
+
+ // Fix bug 34302 at kde.bugs.org. Go ahead and treat
+ // <!--> as a valid comment, since both mozilla and IE on windows
+ // can handle this case. Only do this in quirks mode. -dwh
+ if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
+ state.setInComment(false);
+ src.advancePastNonNewline();
+ if (!src.isEmpty())
+ m_cBuffer[cBufferPos++] = *src;
+ } else
+ state = parseComment(src, state);
+
+ m_cBufferPos = cBufferPos;
+ return state; // Finished parsing tag!
+ }
+ m_cBuffer[cBufferPos++] = *src;
+ src.advancePastNonNewline();
+ break;
+ } else
+ searchCount = 0; // Stop looking for '<!--' sequence
+ }
+
+ if (m_doctypeSearchCount > 0) {
+ if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
+ m_doctypeSearchCount++;
+ m_cBuffer[cBufferPos++] = *src;
+ src.advancePastNonNewline();
+ if (m_doctypeSearchCount == 9) {
+ // Found '<!DOCTYPE' sequence
+ state.setInDoctype(true);
+ state.setTagState(NoTag);
+ m_doctypeToken.reset();
+ if (inViewSourceMode())
+ m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
+ state = parseDoctype(src, state);
+ m_cBufferPos = cBufferPos;
+ return state;
+ }
+ break;
+ } else
+ m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
+ }
+
+ bool finish = false;
+ unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
+ while (ll--) {
+ UChar curchar = *src;
+ if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
+ finish = true;
+ break;
+ }
+
+ // tolower() shows up on profiles. This is faster!
+ if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
+ m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
+ else
+ m_cBuffer[cBufferPos++] = curchar;
+ src.advancePastNonNewline();
+ }
+
+ // Disadvantage: we add the possible rest of the tag
+ // as attribute names. ### judge if this causes problems
+ if (finish || CBUFLEN == cBufferPos) {
+ bool beginTag;
+ UChar* ptr = m_cBuffer;
+ unsigned int len = cBufferPos;
+ m_cBuffer[cBufferPos] = '\0';
+ if ((cBufferPos > 0) && (*ptr == '/')) {
+ // End Tag
+ beginTag = false;
+ ptr++;
+ len--;
+ }
+ else
+ // Start Tag
+ beginTag = true;
+
+ // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
+ if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
+ ptr[--len] = '\0';
+
+ // Now that we've shaved off any invalid / that might have followed the name), make the tag.
+ // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
+ if (ptr[0] != '!' || inViewSourceMode()) {
+ m_currentToken.tagName = AtomicString(ptr);
+ m_currentToken.beginTag = beginTag;
+ }
+ m_dest = m_buffer;
+ state.setTagState(SearchAttribute);
+ cBufferPos = 0;
+ }
+ break;
+ }
+ case SearchAttribute:
+ while(!src.isEmpty()) {
+ UChar curchar = *src;
+ // In this mode just ignore any quotes we encounter and treat them like spaces.
+ if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
+ if (curchar == '<' || curchar == '>')
+ state.setTagState(SearchEnd);
+ else
+ state.setTagState(AttributeName);
+
+ cBufferPos = 0;
+ break;
+ }
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+ src.advance(m_lineNumber);
+ }
+ break;
+ case AttributeName:
+ {
+ int ll = min(src.length(), CBUFLEN - cBufferPos);
+ while (ll--) {
+ UChar curchar = *src;
+ // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the
+ // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
+ if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
+ m_cBuffer[cBufferPos] = '\0';
+ m_attrName = AtomicString(m_cBuffer);
+ m_dest = m_buffer;
+ *m_dest++ = 0;
+ state.setTagState(SearchEqual);
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('a');
+ break;
+ }
+
+ // tolower() shows up on profiles. This is faster!
+ if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
+ m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
+ else
+ m_cBuffer[cBufferPos++] = curchar;
+
+ src.advance(m_lineNumber);
+ }
+ if (cBufferPos == CBUFLEN) {
+ m_cBuffer[cBufferPos] = '\0';
+ m_attrName = AtomicString(m_cBuffer);
+ m_dest = m_buffer;
+ *m_dest++ = 0;
+ state.setTagState(SearchEqual);
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('a');
+ }
+ break;
+ }
+ case SearchEqual:
+ while (!src.isEmpty()) {
+ UChar curchar = *src;
+
+ if (lastIsSlash && curchar == '>') {
+ // This is a quirk (with a long sad history). We have to do this
+ // since widgets do <script src="foo.js"/> and expect the tag to close.
+ if (m_currentToken.tagName == scriptTag)
+ m_currentToken.selfClosingTag = true;
+ m_currentToken.brokenXMLStyle = true;
+ }
+
+ // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
+ if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
+ if (curchar == '=') {
+ state.setTagState(SearchValue);
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+ src.advancePastNonNewline();
+ } else {
+ m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
+ m_dest = m_buffer;
+ state.setTagState(SearchAttribute);
+ lastIsSlash = false;
+ }
+ break;
+ }
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+
+ lastIsSlash = curchar == '/';
+
+ src.advance(m_lineNumber);
+ }
+ break;
+ case SearchValue:
+ while (!src.isEmpty()) {
+ UChar curchar = *src;
+ if (!isASCIISpace(curchar)) {
+ if (curchar == '\'' || curchar == '\"') {
+ tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
+ state.setTagState(QuotedValue);
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+ src.advancePastNonNewline();
+ } else
+ state.setTagState(Value);
+
+ break;
+ }
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+ src.advance(m_lineNumber);
+ }
+ break;
+ case QuotedValue:
+ while (!src.isEmpty()) {
+ checkBuffer();
+
+ UChar curchar = *src;
+ if (curchar <= '>' && !src.escaped()) {
+ if (curchar == '>' && m_attrName.isEmpty()) {
+ // Handle a case like <img '>. Just go ahead and be willing
+ // to close the whole tag. Don't consume the character and
+ // just go back into SearchEnd while ignoring the whole
+ // value.
+ // FIXME: Note that this is actually not a very good solution.
+ // It doesn't handle the general case of
+ // unmatched quotes among attributes that have names. -dwh
+ while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
+ m_dest--; // remove trailing newlines
+ AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+ if (!attributeValue.contains('/'))
+ m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
+ m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('x');
+ state.setTagState(SearchAttribute);
+ m_dest = m_buffer;
+ tquote = NoQuote;
+ break;
+ }
+
+ if (curchar == '&') {
+ src.advancePastNonNewline();
+ state = parseEntity(src, m_dest, state, cBufferPos, true, true);
+ break;
+ }
+
+ if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
+ // some <input type=hidden> rely on trailing spaces. argh
+ while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
+ m_dest--; // remove trailing newlines
+ AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+ if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
+ m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('x');
+ } else if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('v');
+ m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+ m_dest = m_buffer;
+ state.setTagState(SearchAttribute);
+ tquote = NoQuote;
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(curchar);
+ src.advancePastNonNewline();
+ break;
+ }
+ }
+
+ *m_dest++ = curchar;
+ src.advance(m_lineNumber);
+ }
+ break;
+ case Value:
+ while(!src.isEmpty()) {
+ checkBuffer();
+ UChar curchar = *src;
+ if (curchar <= '>' && !src.escaped()) {
+ // parse Entities
+ if (curchar == '&') {
+ src.advancePastNonNewline();
+ state = parseEntity(src, m_dest, state, cBufferPos, true, true);
+ break;
+ }
+ // no quotes. Every space means end of value
+ // '/' does not delimit in IE!
+ if (isASCIISpace(curchar) || curchar == '>') {
+ AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+ m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar('v');
+ m_dest = m_buffer;
+ state.setTagState(SearchAttribute);
+ break;
+ }
+ }
+
+ *m_dest++ = curchar;
+ src.advance(m_lineNumber);
+ }
+ break;
+ case SearchEnd:
+ {
+ while (!src.isEmpty()) {
+ UChar ch = *src;
+ if (ch == '>' || ch == '<')
+ break;
+ if (ch == '/')
+ m_currentToken.selfClosingTag = true;
+ if (inViewSourceMode())
+ m_currentToken.addViewSourceChar(ch);
+ src.advance(m_lineNumber);
+ }
+ if (src.isEmpty())
+ break;
+
+ searchCount = 0; // Stop looking for '<!--' sequence
+ state.setTagState(NoTag);
+ tquote = NoQuote;
+
+ if (*src != '<')
+ src.advance(m_lineNumber);
+
+ if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
+ m_cBufferPos = cBufferPos;
+ return state;
+ }
+
+ AtomicString tagName = m_currentToken.tagName;
+
+ // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
+ // compatibility.
+ bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
+ bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
+ if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {
+ Attribute* a = 0;
+ m_scriptTagSrcAttrValue = String();
+ m_scriptTagCharsetAttrValue = String();
+ if (m_currentToken.attrs && !m_fragment) {
+ if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {
+ if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
+ m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();
+ }
+ }
+ }
+
+ RefPtr<Node> n = processToken();
+ m_cBufferPos = cBufferPos;
+ if (n || inViewSourceMode()) {
+ if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
+ if (beginTag)
+ state.setDiscardLF(true); // Discard the first LF after we open a pre.
+ } else if (tagName == scriptTag) {
+ ASSERT(!m_scriptNode);
+ m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
+ if (m_scriptNode)
+ m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
+ if (beginTag) {
+ m_searchStopper = scriptEnd;
+ m_searchStopperLength = 8;
+ state.setInScript(true);
+ state = parseSpecial(src, state);
+ } else if (isSelfClosingScript) { // Handle <script src="foo"/>
+ state.setInScript(true);
+ state = scriptHandler(state);
+ }
+ } else if (tagName == styleTag) {
+ if (beginTag) {
+ m_searchStopper = styleEnd;
+ m_searchStopperLength = 7;
+ state.setInStyle(true);
+ state = parseSpecial(src, state);
+ }
+ } else if (tagName == textareaTag) {
+ if (beginTag) {
+ m_searchStopper = textareaEnd;
+ m_searchStopperLength = 10;
+ state.setInTextArea(true);
+ state = parseSpecial(src, state);
+ }
+ } else if (tagName == titleTag) {
+ if (beginTag) {
+ m_searchStopper = titleEnd;
+ m_searchStopperLength = 7;
+ State savedState = state;
+ SegmentedString savedSrc = src;
+ long savedLineno = m_lineNumber;
+ state.setInTitle(true);
+ state = parseSpecial(src, state);
+ if (state.inTitle() && src.isEmpty()) {
+ // We just ate the rest of the document as the title #text node!
+ // Reset the state then retokenize without special title handling.
+ // Let the parser clean up the missing </title> tag.
+ // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
+ // at the end of the document unless m_noMoreData is also true. We need
+ // to detect this case elsewhere, and save the state somewhere other
+ // than a local variable.
+ state = savedState;
+ src = savedSrc;
+ m_lineNumber = savedLineno;
+ m_scriptCodeSize = 0;
+ }
+ }
+ } else if (tagName == xmpTag) {
+ if (beginTag) {
+ m_searchStopper = xmpEnd;
+ m_searchStopperLength = 5;
+ state.setInXmp(true);
+ state = parseSpecial(src, state);
+ }
+ } else if (tagName == iframeTag) {
+ if (beginTag) {
+ m_searchStopper = iframeEnd;
+ m_searchStopperLength = 8;
+ state.setInIFrame(true);
+ state = parseSpecial(src, state);
+ }
+ }
+ }
+ if (tagName == plaintextTag)
+ state.setInPlainText(beginTag);
+ return state; // Finished parsing tag!
+ }
+ } // end switch
+ }
+ m_cBufferPos = cBufferPos;
+ return state;
+}
+
+inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
+{
+ // We don't want to be checking elapsed time with every character, so we only check after we've
+ // processed a certain number of characters.
+ bool allowedYield = state.allowYield();
+ state.setAllowYield(false);
+ if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
+ processedCount = 0;
+ if (currentTime() - startTime > m_tokenizerTimeDelay) {
+ /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
+ load, but this hurts overall performance on slower machines. For now turn this
+ off.
+ || (!m_doc->haveStylesheetsLoaded() &&
+ (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
+ // Schedule the timer to keep processing as soon as possible.
+ m_timer.startOneShot(0);
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (currentTime() - startTime > m_tokenizerTimeDelay)
+ printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
+#endif
+ return false;
+ }
+ }
+
+ processedCount++;
+ return true;
+}
+
+bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
+{
+ if (!m_buffer)
+ return false;
+
+ if (m_parserStopped)
+ return false;
+
+ SegmentedString source(str);
+ if (m_executingScript)
+ source.setExcludeLineNumbers();
+
+ if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
+ // don't parse; we will do this later
+ if (m_currentPrependingSrc)
+ m_currentPrependingSrc->append(source);
+ else {
+ m_pendingSrc.append(source);
+#if PRELOAD_SCANNER_ENABLED
+ if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
+ m_preloadScanner->write(source);
+#endif
+ }
+ return false;
+ }
+
+#if PRELOAD_SCANNER_ENABLED
+ if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
+ m_preloadScanner->end();
+#endif
+
+ if (!m_src.isEmpty())
+ m_src.append(source);
+ else
+ setSrc(source);
+
+ // Once a timer is set, it has control of when the tokenizer continues.
+ if (m_timer.isActive())
+ return false;
+
+ bool wasInWrite = m_inWrite;
+ m_inWrite = true;
+
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("Beginning write at time %d\n", m_doc->elapsedTime());
+#endif
+
+ int processedCount = 0;
+ double startTime = currentTime();
+
+ Frame* frame = m_doc->frame();
+
+ State state = m_state;
+
+ while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
+ if (!continueProcessing(processedCount, startTime, state))
+ break;
+
+ // do we need to enlarge the buffer?
+ checkBuffer();
+
+ UChar cc = *m_src;
+
+ bool wasSkipLF = state.skipLF();
+ if (wasSkipLF)
+ state.setSkipLF(false);
+
+ if (wasSkipLF && (cc == '\n'))
+ m_src.advance();
+ else if (state.needsSpecialWriteHandling()) {
+ // it's important to keep needsSpecialWriteHandling with the flags this block tests
+ if (state.hasEntityState())
+ state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
+ else if (state.inPlainText())
+ state = parseText(m_src, state);
+ else if (state.inAnySpecial())
+ state = parseSpecial(m_src, state);
+ else if (state.inComment())
+ state = parseComment(m_src, state);
+ else if (state.inDoctype())
+ state = parseDoctype(m_src, state);
+ else if (state.inServer())
+ state = parseServer(m_src, state);
+ else if (state.inProcessingInstruction())
+ state = parseProcessingInstruction(m_src, state);
+ else if (state.hasTagState())
+ state = parseTag(m_src, state);
+ else if (state.startTag()) {
+ state.setStartTag(false);
+
+ switch(cc) {
+ case '/':
+ break;
+ case '!': {
+ // <!-- comment --> or <!DOCTYPE ...>
+ searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
+ m_doctypeSearchCount = 1;
+ break;
+ }
+ case '?': {
+ // xml processing instruction
+ state.setInProcessingInstruction(true);
+ tquote = NoQuote;
+ state = parseProcessingInstruction(m_src, state);
+ continue;
+
+ break;
+ }
+ case '%':
+ if (!m_brokenServer) {
+ // <% server stuff, handle as comment %>
+ state.setInServer(true);
+ tquote = NoQuote;
+ state = parseServer(m_src, state);
+ continue;
+ }
+ // else fall through
+ default: {
+ if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
+ // Start of a Start-Tag
+ } else {
+ // Invalid tag
+ // Add as is
+ *m_dest = '<';
+ m_dest++;
+ continue;
+ }
+ }
+ }; // end case
+
+ processToken();
+
+ m_cBufferPos = 0;
+ state.setTagState(TagName);
+ state = parseTag(m_src, state);
+ }
+ } else if (cc == '&' && !m_src.escaped()) {
+ m_src.advancePastNonNewline();
+ state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
+ } else if (cc == '<' && !m_src.escaped()) {
+ m_currentTagStartLineNumber = m_lineNumber;
+ m_src.advancePastNonNewline();
+ state.setStartTag(true);
+ state.setDiscardLF(false);
+ } else if (cc == '\n' || cc == '\r') {
+ if (state.discardLF())
+ // Ignore this LF
+ state.setDiscardLF(false); // We have discarded 1 LF
+ else {
+ // Process this LF
+ *m_dest++ = '\n';
+ if (cc == '\r' && !m_src.excludeLineNumbers())
+ m_lineNumber++;
+ }
+
+ /* Check for MS-DOS CRLF sequence */
+ if (cc == '\r')
+ state.setSkipLF(true);
+ m_src.advance(m_lineNumber);
+ } else {
+ state.setDiscardLF(false);
+ *m_dest++ = cc;
+ m_src.advancePastNonNewline();
+ }
+ }
+
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("Ending write at time %d\n", m_doc->elapsedTime());
+#endif
+
+ m_inWrite = wasInWrite;
+
+ m_state = state;
+
+ if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
+ end(); // this actually causes us to be deleted
+ return true;
+ }
+ return false;
+}
+
+void HTMLTokenizer::stopParsing()
+{
+ Tokenizer::stopParsing();
+ m_timer.stop();
+
+ // The part needs to know that the tokenizer has finished with its data,
+ // regardless of whether it happened naturally or due to manual intervention.
+ if (!m_fragment && m_doc->frame())
+ m_doc->frame()->loader()->tokenizerProcessedData();
+}
+
+bool HTMLTokenizer::processingData() const
+{
+ return m_timer.isActive() || m_inWrite;
+}
+
+void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
+{
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
+#endif
+
+ if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
+ // Restart the timer and let layout win. This is basically a way of ensuring that the layout
+ // timer has higher priority than our timer.
+ m_timer.startOneShot(0);
+ return;
+ }
+
+ // Invoke write() as though more data came in. This might cause us to get deleted.
+ write(SegmentedString(), true);
+}
+
+void HTMLTokenizer::end()
+{
+ ASSERT(!m_timer.isActive());
+ m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
+
+ if (m_buffer) {
+ // parseTag is using the buffer for different matters
+ if (!m_state.hasTagState())
+ processToken();
+
+ fastFree(m_scriptCode);
+ m_scriptCode = 0;
+ m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+
+ fastFree(m_buffer);
+ m_buffer = 0;
+ }
+
+ if (!inViewSourceMode())
+ m_parser->finished();
+ else
+ m_doc->finishedParsing();
+}
+
+void HTMLTokenizer::finish()
+{
+ // do this as long as we don't find matching comment ends
+ while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
+ // we've found an unmatched comment start
+ if (m_state.inComment())
+ m_brokenComments = true;
+ else
+ m_brokenServer = true;
+ checkScriptBuffer();
+ m_scriptCode[m_scriptCodeSize] = 0;
+ m_scriptCode[m_scriptCodeSize + 1] = 0;
+ int pos;
+ String food;
+ if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
+ food = String(m_scriptCode, m_scriptCodeSize);
+ else if (m_state.inServer()) {
+ food = "<";
+ food.append(m_scriptCode, m_scriptCodeSize);
+ } else {
+ pos = find(m_scriptCode, m_scriptCodeSize, '>');
+ food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
+ }
+ fastFree(m_scriptCode);
+ m_scriptCode = 0;
+ m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+ m_state.setInComment(false);
+ m_state.setInServer(false);
+ if (!food.isEmpty())
+ write(food, true);
+ }
+ // this indicates we will not receive any more data... but if we are waiting on
+ // an external script to load, we can't finish parsing until that is done
+ m_noMoreData = true;
+ if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
+ end(); // this actually causes us to be deleted
+}
+
+PassRefPtr<Node> HTMLTokenizer::processToken()
+{
+ ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
+ if (scriptController && scriptController->isEnabled())
+ // FIXME: Why isn't this m_currentScriptTagStartLineNumber? I suspect this is wrong.
+ scriptController->setEventHandlerLineno(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
+ if (m_dest > m_buffer) {
+ m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
+ if (m_currentToken.tagName != commentAtom)
+ m_currentToken.tagName = textAtom;
+ } else if (m_currentToken.tagName == nullAtom) {
+ m_currentToken.reset();
+ if (scriptController)
+ scriptController->setEventHandlerLineno(m_lineNumber + 1); // Script line numbers are 1 based.
+ return 0;
+ }
+
+ m_dest = m_buffer;
+
+ RefPtr<Node> n;
+
+ if (!m_parserStopped) {
+ if (NamedMappedAttrMap* map = m_currentToken.attrs.get())
+ map->shrinkToLength();
+ if (inViewSourceMode())
+ static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
+ else
+ // pass the token over to the parser, the parser DOES NOT delete the token
+ n = m_parser->parseToken(&m_currentToken);
+ }
+ m_currentToken.reset();
+ if (scriptController)
+ scriptController->setEventHandlerLineno(0);
+
+ return n.release();
+}
+
+void HTMLTokenizer::processDoctypeToken()
+{
+ if (inViewSourceMode())
+ static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
+ else
+ m_parser->parseDoctypeToken(&m_doctypeToken);
+}
+
+HTMLTokenizer::~HTMLTokenizer()
+{
+ ASSERT(!m_inWrite);
+ reset();
+}
+
+
+void HTMLTokenizer::enlargeBuffer(int len)
+{
+ int newSize = max(m_bufferSize * 2, m_bufferSize + len);
+ int oldOffset = m_dest - m_buffer;
+ m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
+ m_dest = m_buffer + oldOffset;
+ m_bufferSize = newSize;
+}
+
+void HTMLTokenizer::enlargeScriptBuffer(int len)
+{
+ int newSize = max(m_scriptCodeCapacity * 2, m_scriptCodeCapacity + len);
+ m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
+ m_scriptCodeCapacity = newSize;
+}
+
+void HTMLTokenizer::executeScriptsWaitingForStylesheets()
+{
+ ASSERT(m_doc->haveStylesheetsLoaded());
+
+ if (m_hasScriptsWaitingForStylesheets)
+ notifyFinished(0);
+}
+
+void HTMLTokenizer::notifyFinished(CachedResource*)
+{
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("script loaded at %d\n", m_doc->elapsedTime());
+#endif
+
+ ASSERT(!m_pendingScripts.isEmpty());
+
+ // Make external scripts wait for external stylesheets.
+ // FIXME: This needs to be done for inline scripts too.
+ m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
+ if (m_hasScriptsWaitingForStylesheets)
+ return;
+
+ bool finished = false;
+ while (!finished && m_pendingScripts.first()->isLoaded()) {
+ CachedScript* cs = m_pendingScripts.first().get();
+ m_pendingScripts.removeFirst();
+ ASSERT(cache()->disabled() || cs->accessCount() > 0);
+
+ setSrc(SegmentedString());
+
+ // make sure we forget about the script before we execute the new one
+ // infinite recursion might happen otherwise
+ ScriptSourceCode sourceCode(cs);
+ bool errorOccurred = cs->errorOccurred();
+ cs->removeClient(this);
+
+ RefPtr<Node> n = m_scriptNode.release();
+
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("external script beginning execution at %d\n", m_doc->elapsedTime());
+#endif
+
+ if (errorOccurred)
+ EventTargetNodeCast(n.get())->dispatchEventForType(eventNames().errorEvent, true, false);
+ else {
+ if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
+ m_state = scriptExecution(sourceCode, m_state);
+ EventTargetNodeCast(n.get())->dispatchEventForType(eventNames().loadEvent, false, false);
+ }
+
+ // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
+ // call above, so test afterwards.
+ finished = m_pendingScripts.isEmpty();
+ if (finished) {
+ ASSERT(!m_hasScriptsWaitingForStylesheets);
+ m_state.setLoadingExtScript(false);
+#ifdef INSTRUMENT_LAYOUT_SCHEDULING
+ if (!m_doc->ownerElement())
+ printf("external script finished execution at %d\n", m_doc->elapsedTime());
+#endif
+ } else if (m_hasScriptsWaitingForStylesheets) {
+ // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
+ // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
+ finished = true;
+ }
+
+ // 'm_requestingScript' is true when we are called synchronously from
+ // scriptHandler(). In that case scriptHandler() will take care
+ // of m_pendingSrc.
+ if (!m_requestingScript) {
+ SegmentedString rest = m_pendingSrc;
+ m_pendingSrc.clear();
+ write(rest, false);
+ // we might be deleted at this point, do not access any members.
+ }
+ }
+}
+
+bool HTMLTokenizer::isWaitingForScripts() const
+{
+ return m_state.loadingExtScript();
+}
+
+void HTMLTokenizer::setSrc(const SegmentedString& source)
+{
+ m_src = source;
+}
+
+void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
+{
+ HTMLTokenizer tok(fragment);
+ tok.setForceSynchronous(true);
+ tok.write(source, true);
+ tok.finish();
+ ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
+}
+
+UChar decodeNamedEntity(const char* name)
+{
+ const Entity* e = findEntity(name, strlen(name));
+ return e ? e->code : 0;
+}
+
+}
+
+
« no previous file with comments | « third_party/WebKit/WebCore/html/HTMLParser.cpp ('k') | third_party/WebKit/WebCore/loader/EmptyClients.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698