| Index: third_party/WebKit/WebCore/html/HTMLTokenizer.cpp
|
| ===================================================================
|
| --- third_party/WebKit/WebCore/html/HTMLTokenizer.cpp (revision 9310)
|
| +++ third_party/WebKit/WebCore/html/HTMLTokenizer.cpp (working copy)
|
| @@ -1,2045 +1,2045 @@
|
| -/*
|
| - Copyright (C) 1997 Martin Jones (mjones@kde.org)
|
| - (C) 1997 Torben Weis (weis@kde.org)
|
| - (C) 1998 Waldo Bastian (bastian@kde.org)
|
| - (C) 1999 Lars Knoll (knoll@kde.org)
|
| - (C) 1999 Antti Koivisto (koivisto@kde.org)
|
| - (C) 2001 Dirk Mueller (mueller@kde.org)
|
| - Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
| - Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
|
| -
|
| - This library is free software; you can redistribute it and/or
|
| - modify it under the terms of the GNU Library General Public
|
| - License as published by the Free Software Foundation; either
|
| - version 2 of the License, or (at your option) any later version.
|
| -
|
| - This library is distributed in the hope that it will be useful,
|
| - but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| - Library General Public License for more details.
|
| -
|
| - You should have received a copy of the GNU Library General Public License
|
| - along with this library; see the file COPYING.LIB. If not, write to
|
| - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| - Boston, MA 02110-1301, USA.
|
| -*/
|
| -
|
| -#include "config.h"
|
| -#include "HTMLTokenizer.h"
|
| -
|
| -#include "CSSHelper.h"
|
| -#include "Cache.h"
|
| -#include "CachedScript.h"
|
| -#include "DocLoader.h"
|
| -#include "DocumentFragment.h"
|
| -#include "EventNames.h"
|
| -#include "Frame.h"
|
| -#include "FrameLoader.h"
|
| -#include "FrameView.h"
|
| -#include "HTMLElement.h"
|
| -#include "HTMLNames.h"
|
| -#include "HTMLParser.h"
|
| -#include "HTMLScriptElement.h"
|
| -#include "HTMLViewSourceDocument.h"
|
| -#include "Page.h"
|
| -#include "PreloadScanner.h"
|
| -#include "ScriptController.h"
|
| -#include "ScriptSourceCode.h"
|
| -#include "ScriptValue.h"
|
| -#include <wtf/ASCIICType.h>
|
| -#include <wtf/CurrentTime.h>
|
| -
|
| -#include "HTMLEntityNames.c"
|
| -
|
| -#define PRELOAD_SCANNER_ENABLED 1
|
| -// #define INSTRUMENT_LAYOUT_SCHEDULING 1
|
| -
|
| -using namespace WTF;
|
| -using namespace std;
|
| -
|
| -namespace WebCore {
|
| -
|
| -using namespace HTMLNames;
|
| -
|
| -#if MOBILE
|
| -// The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.
|
| -// This value is used to define how many characters the tokenizer will process before
|
| -// yeilding control.
|
| -static const int defaultTokenizerChunkSize = 256;
|
| -#else
|
| -static const int defaultTokenizerChunkSize = 4096;
|
| -#endif
|
| -
|
| -#if MOBILE
|
| -// As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise
|
| -// it will take way to long to load a page.
|
| -static const double defaultTokenizerTimeDelay = 0.300;
|
| -#else
|
| -// FIXME: We would like this constant to be 200ms.
|
| -// Yielding more aggressively results in increased responsiveness and better incremental rendering.
|
| -// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
|
| -static const double defaultTokenizerTimeDelay = 0.500;
|
| -#endif
|
| -
|
| -static const char commentStart [] = "<!--";
|
| -static const char doctypeStart [] = "<!doctype";
|
| -static const char publicStart [] = "public";
|
| -static const char systemStart [] = "system";
|
| -static const char scriptEnd [] = "</script";
|
| -static const char xmpEnd [] = "</xmp";
|
| -static const char styleEnd [] = "</style";
|
| -static const char textareaEnd [] = "</textarea";
|
| -static const char titleEnd [] = "</title";
|
| -static const char iframeEnd [] = "</iframe";
|
| -
|
| -// Full support for MS Windows extensions to Latin-1.
|
| -// Technically these extensions should only be activated for pages
|
| -// marked "windows-1252" or "cp1252", but
|
| -// in the standard Microsoft way, these extensions infect hundreds of thousands
|
| -// of web pages. Note that people with non-latin-1 Microsoft extensions
|
| -// are SOL.
|
| -//
|
| -// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
|
| -// http://www.bbsinc.com/iso8859.html
|
| -// http://www.obviously.com/
|
| -//
|
| -// There may be better equivalents
|
| -
|
| -// We only need this for entities. For non-entity text, we handle this in the text encoding.
|
| -
|
| -static const UChar windowsLatin1ExtensionArray[32] = {
|
| - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
|
| - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
|
| - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
|
| - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
|
| -};
|
| -
|
| -static inline UChar fixUpChar(UChar c)
|
| -{
|
| - if ((c & ~0x1F) != 0x0080)
|
| - return c;
|
| - return windowsLatin1ExtensionArray[c - 0x80];
|
| -}
|
| -
|
| -static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
|
| -{
|
| - for (unsigned i = 0; i != length; ++i) {
|
| - unsigned char c1 = s1[i];
|
| - unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
|
| - UChar c2 = s2[i];
|
| - if (c1 != c2 && uc1 != c2)
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
|
| -{
|
| - if (!attrName.isEmpty()) {
|
| - ASSERT(!attrName.contains('/'));
|
| - RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);
|
| - if (!attrs) {
|
| - attrs = NamedMappedAttrMap::create();
|
| - attrs->reserveInitialCapacity(10);
|
| - }
|
| - attrs->insertAttribute(a.release(), viewSourceMode);
|
| - }
|
| -
|
| - attrName = emptyAtom;
|
| -}
|
| -
|
| -// ----------------------------------------------------------------------------
|
| -
|
| -HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)
|
| - : Tokenizer()
|
| - , m_buffer(0)
|
| - , m_scriptCode(0)
|
| - , m_scriptCodeSize(0)
|
| - , m_scriptCodeCapacity(0)
|
| - , m_scriptCodeResync(0)
|
| - , m_executingScript(0)
|
| - , m_requestingScript(false)
|
| - , m_hasScriptsWaitingForStylesheets(false)
|
| - , m_timer(this, &HTMLTokenizer::timerFired)
|
| - , m_doc(doc)
|
| - , m_parser(new HTMLParser(doc, reportErrors))
|
| - , m_inWrite(false)
|
| - , m_fragment(false)
|
| -{
|
| - begin();
|
| -}
|
| -
|
| -HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
|
| - : Tokenizer(true)
|
| - , m_buffer(0)
|
| - , m_scriptCode(0)
|
| - , m_scriptCodeSize(0)
|
| - , m_scriptCodeCapacity(0)
|
| - , m_scriptCodeResync(0)
|
| - , m_executingScript(0)
|
| - , m_requestingScript(false)
|
| - , m_hasScriptsWaitingForStylesheets(false)
|
| - , m_timer(this, &HTMLTokenizer::timerFired)
|
| - , m_doc(doc)
|
| - , m_parser(0)
|
| - , m_inWrite(false)
|
| - , m_fragment(false)
|
| -{
|
| - begin();
|
| -}
|
| -
|
| -HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
|
| - : m_buffer(0)
|
| - , m_scriptCode(0)
|
| - , m_scriptCodeSize(0)
|
| - , m_scriptCodeCapacity(0)
|
| - , m_scriptCodeResync(0)
|
| - , m_executingScript(0)
|
| - , m_requestingScript(false)
|
| - , m_hasScriptsWaitingForStylesheets(false)
|
| - , m_timer(this, &HTMLTokenizer::timerFired)
|
| - , m_doc(frag->document())
|
| - , m_parser(new HTMLParser(frag))
|
| - , m_inWrite(false)
|
| - , m_fragment(true)
|
| -{
|
| - begin();
|
| -}
|
| -
|
| -void HTMLTokenizer::reset()
|
| -{
|
| - ASSERT(m_executingScript == 0);
|
| -
|
| - while (!m_pendingScripts.isEmpty()) {
|
| - CachedScript* cs = m_pendingScripts.first().get();
|
| - m_pendingScripts.removeFirst();
|
| - ASSERT(cache()->disabled() || cs->accessCount() > 0);
|
| - cs->removeClient(this);
|
| - }
|
| -
|
| - fastFree(m_buffer);
|
| - m_buffer = m_dest = 0;
|
| - m_bufferSize = 0;
|
| -
|
| - fastFree(m_scriptCode);
|
| - m_scriptCode = 0;
|
| - m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| -
|
| - m_timer.stop();
|
| - m_state.setAllowYield(false);
|
| - m_state.setForceSynchronous(false);
|
| -
|
| - m_currentToken.reset();
|
| - m_doctypeToken.reset();
|
| - m_doctypeSearchCount = 0;
|
| - m_doctypeSecondarySearchCount = 0;
|
| - m_hasScriptsWaitingForStylesheets = false;
|
| -}
|
| -
|
| -void HTMLTokenizer::begin()
|
| -{
|
| - m_executingScript = 0;
|
| - m_requestingScript = false;
|
| - m_hasScriptsWaitingForStylesheets = false;
|
| - m_state.setLoadingExtScript(false);
|
| - reset();
|
| - m_bufferSize = 254;
|
| - m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
|
| - m_dest = m_buffer;
|
| - tquote = NoQuote;
|
| - searchCount = 0;
|
| - m_state.setEntityState(NoEntity);
|
| - m_scriptTagSrcAttrValue = String();
|
| - m_pendingSrc.clear();
|
| - m_currentPrependingSrc = 0;
|
| - m_noMoreData = false;
|
| - m_brokenComments = false;
|
| - m_brokenServer = false;
|
| - m_lineNumber = 0;
|
| - m_currentScriptTagStartLineNumber = 0;
|
| - m_currentTagStartLineNumber = 0;
|
| - m_state.setForceSynchronous(false);
|
| -
|
| - Page* page = m_doc->page();
|
| - if (page && page->hasCustomHTMLTokenizerTimeDelay())
|
| - m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
|
| - else
|
| - m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
|
| -
|
| - if (page && page->hasCustomHTMLTokenizerChunkSize())
|
| - m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
|
| - else
|
| - m_tokenizerChunkSize = defaultTokenizerChunkSize;
|
| -}
|
| -
|
| -void HTMLTokenizer::setForceSynchronous(bool force)
|
| -{
|
| - m_state.setForceSynchronous(force);
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
|
| -{
|
| - // This function adds the listing 'list' as
|
| - // preformatted text-tokens to the token-collection
|
| - while (!list.isEmpty()) {
|
| - if (state.skipLF()) {
|
| - state.setSkipLF(false);
|
| - if (*list == '\n') {
|
| - list.advance();
|
| - continue;
|
| - }
|
| - }
|
| -
|
| - checkBuffer();
|
| -
|
| - if (*list == '\n' || *list == '\r') {
|
| - if (state.discardLF())
|
| - // Ignore this LF
|
| - state.setDiscardLF(false); // We have discarded 1 LF
|
| - else
|
| - *m_dest++ = '\n';
|
| -
|
| - /* Check for MS-DOS CRLF sequence */
|
| - if (*list == '\r')
|
| - state.setSkipLF(true);
|
| -
|
| - list.advance();
|
| - } else {
|
| - state.setDiscardLF(false);
|
| - *m_dest++ = *list;
|
| - list.advance();
|
| - }
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString& src, State state)
|
| -{
|
| - ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
|
| - ASSERT(!state.hasTagState());
|
| - ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );
|
| - if (state.inScript() && !m_currentScriptTagStartLineNumber)
|
| - m_currentScriptTagStartLineNumber = m_lineNumber;
|
| -
|
| - if (state.inComment())
|
| - state = parseComment(src, state);
|
| -
|
| - int lastDecodedEntityPosition = -1;
|
| - while (!src.isEmpty()) {
|
| - checkScriptBuffer();
|
| - UChar ch = *src;
|
| -
|
| - if (!m_scriptCodeResync && !m_brokenComments &&
|
| - !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
|
| - m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
|
| - (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
|
| - state.setInComment(true);
|
| - state = parseComment(src, state);
|
| - continue;
|
| - }
|
| - if (m_scriptCodeResync && !tquote && ch == '>') {
|
| - src.advancePastNonNewline();
|
| - m_scriptCodeSize = m_scriptCodeResync - 1;
|
| - m_scriptCodeResync = 0;
|
| - m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| - if (state.inScript())
|
| - state = scriptHandler(state);
|
| - else {
|
| - state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
|
| - processToken();
|
| - if (state.inStyle()) {
|
| - m_currentToken.tagName = styleTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - } else if (state.inTextArea()) {
|
| - m_currentToken.tagName = textareaTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - } else if (state.inTitle()) {
|
| - m_currentToken.tagName = titleTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - } else if (state.inXmp()) {
|
| - m_currentToken.tagName = xmpTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - } else if (state.inIFrame()) {
|
| - m_currentToken.tagName = iframeTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - }
|
| - processToken();
|
| - state.setInStyle(false);
|
| - state.setInScript(false);
|
| - state.setInTextArea(false);
|
| - state.setInTitle(false);
|
| - state.setInXmp(false);
|
| - state.setInIFrame(false);
|
| - tquote = NoQuote;
|
| - m_scriptCodeSize = m_scriptCodeResync = 0;
|
| - }
|
| - return state;
|
| - }
|
| - // possible end of tagname, lets check.
|
| - if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
|
| - m_scriptCodeSize >= m_searchStopperLength &&
|
| - tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
|
| - (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
|
| - m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
|
| - tquote = NoQuote;
|
| - continue;
|
| - }
|
| - if (m_scriptCodeResync && !state.escaped()) {
|
| - if (ch == '\"')
|
| - tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
|
| - else if (ch == '\'')
|
| - tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
|
| - else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
|
| - tquote = NoQuote;
|
| - }
|
| - state.setEscaped(!state.escaped() && ch == '\\');
|
| - if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
|
| - UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
|
| - src.advancePastNonNewline();
|
| - state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
|
| - if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
|
| - lastDecodedEntityPosition = m_scriptCodeSize;
|
| - else
|
| - m_scriptCodeSize = scriptCodeDest - m_scriptCode;
|
| - } else {
|
| - m_scriptCode[m_scriptCodeSize++] = ch;
|
| - src.advance(m_lineNumber);
|
| - }
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
|
| -{
|
| - // We are inside a <script>
|
| - bool doScriptExec = false;
|
| - int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
|
| -
|
| - // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
|
| - m_currentScriptTagStartLineNumber = 0;
|
| -
|
| - // (Bugzilla 3837) Scripts following a frameset element should not execute or,
|
| - // in the case of extern scripts, even load.
|
| - bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));
|
| -
|
| - CachedScript* cs = 0;
|
| - // don't load external scripts for standalone documents (for now)
|
| - if (!inViewSourceMode()) {
|
| - if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {
|
| - // forget what we just got; load from src url instead
|
| - if (!m_parser->skipMode() && !followingFrameset) {
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("Requesting script at time %d\n", m_doc->elapsedTime());
|
| -#endif
|
| - // The parser might have been stopped by for example a window.close call in an earlier script.
|
| - // If so, we don't want to load scripts.
|
| - if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
|
| - m_pendingScripts.append(cs);
|
| - else
|
| - m_scriptNode = 0;
|
| - } else
|
| - m_scriptNode = 0;
|
| - m_scriptTagSrcAttrValue = String();
|
| - } else {
|
| - // Parse m_scriptCode containing <script> info
|
| -#if USE(LOW_BANDWIDTH_DISPLAY)
|
| - if (m_doc->inLowBandwidthDisplay()) {
|
| - // ideal solution is only skipping internal JavaScript if there is external JavaScript.
|
| - // but internal JavaScript can use document.write() to create an external JavaScript,
|
| - // so we have to skip internal JavaScript all the time.
|
| - m_doc->frame()->loader()->needToSwitchOutLowBandwidthDisplay();
|
| - doScriptExec = false;
|
| - } else
|
| -#endif
|
| - doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
|
| - m_scriptNode = 0;
|
| - }
|
| - }
|
| -
|
| - state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
|
| - RefPtr<Node> node = processToken();
|
| - String scriptString = node ? node->textContent() : "";
|
| - m_currentToken.tagName = scriptTag.localName();
|
| - m_currentToken.beginTag = false;
|
| - processToken();
|
| -
|
| - state.setInScript(false);
|
| - m_scriptCodeSize = m_scriptCodeResync = 0;
|
| -
|
| - // FIXME: The script should be syntax highlighted.
|
| - if (inViewSourceMode())
|
| - return state;
|
| -
|
| - SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
|
| - SegmentedString prependingSrc;
|
| - m_currentPrependingSrc = &prependingSrc;
|
| -
|
| - if (!m_parser->skipMode() && !followingFrameset) {
|
| - if (cs) {
|
| - if (savedPrependingSrc)
|
| - savedPrependingSrc->append(m_src);
|
| - else
|
| - m_pendingSrc.prepend(m_src);
|
| - setSrc(SegmentedString());
|
| -
|
| - // the ref() call below may call notifyFinished if the script is already in cache,
|
| - // and that mucks with the state directly, so we must write it back to the object.
|
| - m_state = state;
|
| - bool savedRequestingScript = m_requestingScript;
|
| - m_requestingScript = true;
|
| - cs->addClient(this);
|
| - m_requestingScript = savedRequestingScript;
|
| - state = m_state;
|
| - // will be 0 if script was already loaded and ref() executed it
|
| - if (!m_pendingScripts.isEmpty())
|
| - state.setLoadingExtScript(true);
|
| - } else if (!m_fragment && doScriptExec) {
|
| - if (!m_executingScript)
|
| - m_pendingSrc.prepend(m_src);
|
| - else
|
| - prependingSrc = m_src;
|
| - setSrc(SegmentedString());
|
| - state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);
|
| - }
|
| - }
|
| -
|
| - if (!m_executingScript && !state.loadingExtScript()) {
|
| - m_src.append(m_pendingSrc);
|
| - m_pendingSrc.clear();
|
| - } else if (!prependingSrc.isEmpty()) {
|
| - // restore first so that the write appends in the right place
|
| - // (does not hurt to do it again below)
|
| - m_currentPrependingSrc = savedPrependingSrc;
|
| -
|
| - // we need to do this slightly modified bit of one of the write() cases
|
| - // because we want to prepend to m_pendingSrc rather than appending
|
| - // if there's no previous prependingSrc
|
| - if (!m_pendingScripts.isEmpty()) {
|
| - if (m_currentPrependingSrc)
|
| - m_currentPrependingSrc->append(prependingSrc);
|
| - else
|
| - m_pendingSrc.prepend(prependingSrc);
|
| - } else {
|
| - m_state = state;
|
| - write(prependingSrc, false);
|
| - state = m_state;
|
| - }
|
| - }
|
| -
|
| -#if PRELOAD_SCANNER_ENABLED
|
| - if (!m_pendingScripts.isEmpty() && !m_executingScript) {
|
| - if (!m_preloadScanner)
|
| - m_preloadScanner.set(new PreloadScanner(m_doc));
|
| - if (!m_preloadScanner->inProgress()) {
|
| - m_preloadScanner->begin();
|
| - m_preloadScanner->write(m_pendingSrc);
|
| - }
|
| - }
|
| -#endif
|
| - m_currentPrependingSrc = savedPrependingSrc;
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state)
|
| -{
|
| - if (m_fragment || !m_doc->frame())
|
| - return state;
|
| - m_executingScript++;
|
| -
|
| - SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
|
| - SegmentedString prependingSrc;
|
| - m_currentPrependingSrc = &prependingSrc;
|
| -
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("beginning script execution at %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - m_state = state;
|
| - m_doc->frame()->loader()->executeScript(sourceCode);
|
| - state = m_state;
|
| -
|
| - state.setAllowYield(true);
|
| -
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("ending script execution at %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - m_executingScript--;
|
| -
|
| - if (!m_executingScript && !state.loadingExtScript()) {
|
| - m_pendingSrc.prepend(prependingSrc);
|
| - m_src.append(m_pendingSrc);
|
| - m_pendingSrc.clear();
|
| - } else if (!prependingSrc.isEmpty()) {
|
| - // restore first so that the write appends in the right place
|
| - // (does not hurt to do it again below)
|
| - m_currentPrependingSrc = savedPrependingSrc;
|
| -
|
| - // we need to do this slightly modified bit of one of the write() cases
|
| - // because we want to prepend to m_pendingSrc rather than appending
|
| - // if there's no previous prependingSrc
|
| - if (!m_pendingScripts.isEmpty()) {
|
| - if (m_currentPrependingSrc)
|
| - m_currentPrependingSrc->append(prependingSrc);
|
| - else
|
| - m_pendingSrc.prepend(prependingSrc);
|
| -
|
| -#if PRELOAD_SCANNER_ENABLED
|
| - // We are stuck waiting for another script. Lets check the source that
|
| - // was just document.write()n for anything to load.
|
| - PreloadScanner documentWritePreloadScanner(m_doc);
|
| - documentWritePreloadScanner.begin();
|
| - documentWritePreloadScanner.write(prependingSrc);
|
| - documentWritePreloadScanner.end();
|
| -#endif
|
| - } else {
|
| - m_state = state;
|
| - write(prependingSrc, false);
|
| - state = m_state;
|
| - }
|
| - }
|
| -
|
| - m_currentPrependingSrc = savedPrependingSrc;
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state)
|
| -{
|
| - // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
|
| - checkScriptBuffer(src.length());
|
| - while (!src.isEmpty()) {
|
| - UChar ch = *src;
|
| - m_scriptCode[m_scriptCodeSize++] = ch;
|
| - if (ch == '>') {
|
| - bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
|
| - int endCharsCount = 1; // start off with one for the '>' character
|
| - if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
|
| - endCharsCount = 3;
|
| - } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
|
| - m_scriptCode[m_scriptCodeSize-2] == '!') {
|
| - // Other browsers will accept --!> as a close comment, even though it's
|
| - // not technically valid.
|
| - endCharsCount = 4;
|
| - }
|
| - if (handleBrokenComments || endCharsCount > 1) {
|
| - src.advancePastNonNewline();
|
| - if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
|
| - checkScriptBuffer();
|
| - m_scriptCode[m_scriptCodeSize] = 0;
|
| - m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| - m_currentToken.tagName = commentAtom;
|
| - m_currentToken.beginTag = true;
|
| - state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
|
| - processToken();
|
| - m_currentToken.tagName = commentAtom;
|
| - m_currentToken.beginTag = false;
|
| - processToken();
|
| - m_scriptCodeSize = 0;
|
| - }
|
| - state.setInComment(false);
|
| - return state; // Finished parsing comment
|
| - }
|
| - }
|
| - src.advance(m_lineNumber);
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
|
| -{
|
| - checkScriptBuffer(src.length());
|
| - while (!src.isEmpty()) {
|
| - UChar ch = *src;
|
| - m_scriptCode[m_scriptCodeSize++] = ch;
|
| - if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
|
| - src.advancePastNonNewline();
|
| - state.setInServer(false);
|
| - m_scriptCodeSize = 0;
|
| - return state; // Finished parsing server include
|
| - }
|
| - src.advance(m_lineNumber);
|
| - }
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state)
|
| -{
|
| - UChar oldchar = 0;
|
| - while (!src.isEmpty()) {
|
| - UChar chbegin = *src;
|
| - if (chbegin == '\'')
|
| - tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
|
| - else if (chbegin == '\"')
|
| - tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
|
| - // Look for '?>'
|
| - // Some crappy sites omit the "?" before it, so
|
| - // we look for an unquoted '>' instead. (IE compatible)
|
| - else if (chbegin == '>' && (!tquote || oldchar == '?')) {
|
| - // We got a '?>' sequence
|
| - state.setInProcessingInstruction(false);
|
| - src.advancePastNonNewline();
|
| - state.setDiscardLF(true);
|
| - return state; // Finished parsing comment!
|
| - }
|
| - src.advance(m_lineNumber);
|
| - oldchar = chbegin;
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state)
|
| -{
|
| - while (!src.isEmpty()) {
|
| - UChar cc = *src;
|
| -
|
| - if (state.skipLF()) {
|
| - state.setSkipLF(false);
|
| - if (cc == '\n') {
|
| - src.advancePastNewline(m_lineNumber);
|
| - continue;
|
| - }
|
| - }
|
| -
|
| - // do we need to enlarge the buffer?
|
| - checkBuffer();
|
| -
|
| - if (cc == '\r') {
|
| - state.setSkipLF(true);
|
| - *m_dest++ = '\n';
|
| - } else
|
| - *m_dest++ = cc;
|
| - src.advance(m_lineNumber);
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
|
| -{
|
| - if (start) {
|
| - cBufferPos = 0;
|
| - state.setEntityState(SearchEntity);
|
| - EntityUnicodeValue = 0;
|
| - }
|
| -
|
| - while(!src.isEmpty()) {
|
| - UChar cc = *src;
|
| - switch(state.entityState()) {
|
| - case NoEntity:
|
| - ASSERT(state.entityState() != NoEntity);
|
| - return state;
|
| -
|
| - case SearchEntity:
|
| - if (cc == '#') {
|
| - m_cBuffer[cBufferPos++] = cc;
|
| - src.advancePastNonNewline();
|
| - state.setEntityState(NumericSearch);
|
| - } else
|
| - state.setEntityState(EntityName);
|
| - break;
|
| -
|
| - case NumericSearch:
|
| - if (cc == 'x' || cc == 'X') {
|
| - m_cBuffer[cBufferPos++] = cc;
|
| - src.advancePastNonNewline();
|
| - state.setEntityState(Hexadecimal);
|
| - } else if (cc >= '0' && cc <= '9')
|
| - state.setEntityState(Decimal);
|
| - else
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| -
|
| - case Hexadecimal: {
|
| - int ll = min(src.length(), 10 - cBufferPos);
|
| - while (ll--) {
|
| - cc = *src;
|
| - if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| - }
|
| - int digit;
|
| - if (cc < 'A')
|
| - digit = cc - '0';
|
| - else
|
| - digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
|
| - EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
|
| - m_cBuffer[cBufferPos++] = cc;
|
| - src.advancePastNonNewline();
|
| - }
|
| - if (cBufferPos == 10)
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| - }
|
| - case Decimal:
|
| - {
|
| - int ll = min(src.length(), 9-cBufferPos);
|
| - while(ll--) {
|
| - cc = *src;
|
| -
|
| - if (!(cc >= '0' && cc <= '9')) {
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| - }
|
| -
|
| - EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
|
| - m_cBuffer[cBufferPos++] = cc;
|
| - src.advancePastNonNewline();
|
| - }
|
| - if (cBufferPos == 9)
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| - }
|
| - case EntityName:
|
| - {
|
| - int ll = min(src.length(), 9-cBufferPos);
|
| - while(ll--) {
|
| - cc = *src;
|
| -
|
| - if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
|
| - state.setEntityState(SearchSemicolon);
|
| - break;
|
| - }
|
| -
|
| - m_cBuffer[cBufferPos++] = cc;
|
| - src.advancePastNonNewline();
|
| - }
|
| - if (cBufferPos == 9)
|
| - state.setEntityState(SearchSemicolon);
|
| - if (state.entityState() == SearchSemicolon) {
|
| - if(cBufferPos > 1) {
|
| - // Since the maximum length of entity name is 9,
|
| - // so a single char array which is allocated on
|
| - // the stack, its length is 10, should be OK.
|
| - // Also if we have an illegal character, we treat it
|
| - // as illegal entity name.
|
| - unsigned testedEntityNameLen = 0;
|
| - char tmpEntityNameBuffer[10];
|
| -
|
| - ASSERT(cBufferPos < 10);
|
| - for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
|
| - if (m_cBuffer[testedEntityNameLen] > 0x7e)
|
| - break;
|
| - tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
|
| - }
|
| -
|
| - const Entity *e;
|
| -
|
| - if (testedEntityNameLen == cBufferPos)
|
| - e = findEntity(tmpEntityNameBuffer, cBufferPos);
|
| - else
|
| - e = 0;
|
| -
|
| - if(e)
|
| - EntityUnicodeValue = e->code;
|
| -
|
| - // be IE compatible
|
| - if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
|
| - EntityUnicodeValue = 0;
|
| - }
|
| - }
|
| - else
|
| - break;
|
| - }
|
| - case SearchSemicolon:
|
| - // Don't allow values that are more than 21 bits.
|
| - if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
|
| - if (!inViewSourceMode()) {
|
| - if (*src == ';')
|
| - src.advancePastNonNewline();
|
| - if (EntityUnicodeValue <= 0xFFFF) {
|
| - checkBuffer();
|
| - src.push(fixUpChar(EntityUnicodeValue));
|
| - } else {
|
| - // Convert to UTF-16, using surrogate code points.
|
| - checkBuffer(2);
|
| - src.push(U16_LEAD(EntityUnicodeValue));
|
| - src.push(U16_TRAIL(EntityUnicodeValue));
|
| - }
|
| - } else {
|
| - // FIXME: We should eventually colorize entities by sending them as a special token.
|
| - checkBuffer(11);
|
| - *dest++ = '&';
|
| - for (unsigned i = 0; i < cBufferPos; i++)
|
| - dest[i] = m_cBuffer[i];
|
| - dest += cBufferPos;
|
| - if (*src == ';') {
|
| - *dest++ = ';';
|
| - src.advancePastNonNewline();
|
| - }
|
| - }
|
| - } else {
|
| - checkBuffer(10);
|
| - // ignore the sequence, add it to the buffer as plaintext
|
| - *dest++ = '&';
|
| - for (unsigned i = 0; i < cBufferPos; i++)
|
| - dest[i] = m_cBuffer[i];
|
| - dest += cBufferPos;
|
| - }
|
| -
|
| - state.setEntityState(NoEntity);
|
| - return state;
|
| - }
|
| - }
|
| -
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state)
|
| -{
|
| - ASSERT(state.inDoctype());
|
| - while (!src.isEmpty() && state.inDoctype()) {
|
| - UChar c = *src;
|
| - bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
|
| - switch (m_doctypeToken.state()) {
|
| - case DoctypeBegin: {
|
| - m_doctypeToken.setState(DoctypeBeforeName);
|
| - if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - }
|
| - case DoctypeBeforeName: {
|
| - if (c == '>') {
|
| - // Malformed. Just exit.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - if (inViewSourceMode())
|
| - processDoctypeToken();
|
| - } else if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else
|
| - m_doctypeToken.setState(DoctypeName);
|
| - break;
|
| - }
|
| - case DoctypeName: {
|
| - if (c == '>') {
|
| - // Valid doctype. Emit it.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - processDoctypeToken();
|
| - } else if (isWhitespace) {
|
| - m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
|
| - m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
|
| - m_doctypeToken.setState(DoctypeAfterName);
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else {
|
| - src.advancePastNonNewline();
|
| - m_doctypeToken.m_name.append(c);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - }
|
| - case DoctypeAfterName: {
|
| - if (c == '>') {
|
| - // Valid doctype. Emit it.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - processDoctypeToken();
|
| - } else if (!isWhitespace) {
|
| - src.advancePastNonNewline();
|
| - if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
|
| - m_doctypeSearchCount++;
|
| - if (m_doctypeSearchCount == 6)
|
| - // Found 'PUBLIC' sequence
|
| - m_doctypeToken.setState(DoctypeBeforePublicID);
|
| - } else if (m_doctypeSearchCount > 0) {
|
| - m_doctypeSearchCount = 0;
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
|
| - m_doctypeSecondarySearchCount++;
|
| - if (m_doctypeSecondarySearchCount == 6)
|
| - // Found 'SYSTEM' sequence
|
| - m_doctypeToken.setState(DoctypeBeforeSystemID);
|
| - } else {
|
| - m_doctypeSecondarySearchCount = 0;
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - }
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else {
|
| - src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - }
|
| - case DoctypeBeforePublicID: {
|
| - if (c == '\"' || c == '\'') {
|
| - tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| - m_doctypeToken.setState(DoctypePublicID);
|
| - src.advancePastNonNewline();
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else if (c == '>') {
|
| - // Considered bogus. Don't process the doctype.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - if (inViewSourceMode())
|
| - processDoctypeToken();
|
| - } else if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - break;
|
| - }
|
| - case DoctypePublicID: {
|
| - if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
|
| - src.advancePastNonNewline();
|
| - m_doctypeToken.setState(DoctypeAfterPublicID);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else if (c == '>') {
|
| - // Considered bogus. Don't process the doctype.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - if (inViewSourceMode())
|
| - processDoctypeToken();
|
| - } else {
|
| - m_doctypeToken.m_publicID.append(c);
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - }
|
| - case DoctypeAfterPublicID:
|
| - if (c == '\"' || c == '\'') {
|
| - tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| - m_doctypeToken.setState(DoctypeSystemID);
|
| - src.advancePastNonNewline();
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else if (c == '>') {
|
| - // Valid doctype. Emit it now.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - processDoctypeToken();
|
| - } else if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - break;
|
| - case DoctypeBeforeSystemID:
|
| - if (c == '\"' || c == '\'') {
|
| - tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| - m_doctypeToken.setState(DoctypeSystemID);
|
| - src.advancePastNonNewline();
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else if (c == '>') {
|
| - // Considered bogus. Don't process the doctype.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - } else if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - break;
|
| - case DoctypeSystemID:
|
| - if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
|
| - src.advancePastNonNewline();
|
| - m_doctypeToken.setState(DoctypeAfterSystemID);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else if (c == '>') {
|
| - // Considered bogus. Don't process the doctype.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - if (inViewSourceMode())
|
| - processDoctypeToken();
|
| - } else {
|
| - m_doctypeToken.m_systemID.append(c);
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - case DoctypeAfterSystemID:
|
| - if (c == '>') {
|
| - // Valid doctype. Emit it now.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - processDoctypeToken();
|
| - } else if (isWhitespace) {
|
| - src.advance(m_lineNumber);
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - } else
|
| - m_doctypeToken.setState(DoctypeBogus);
|
| - break;
|
| - case DoctypeBogus:
|
| - if (c == '>') {
|
| - // Done with the bogus doctype.
|
| - src.advancePastNonNewline();
|
| - state.setInDoctype(false);
|
| - if (inViewSourceMode())
|
| - processDoctypeToken();
|
| - } else {
|
| - src.advance(m_lineNumber); // Just keep scanning for '>'
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(c);
|
| - }
|
| - break;
|
| - default:
|
| - break;
|
| - }
|
| - }
|
| - return state;
|
| -}
|
| -
|
| -HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state)
|
| -{
|
| - ASSERT(!state.hasEntityState());
|
| -
|
| - unsigned cBufferPos = m_cBufferPos;
|
| -
|
| - bool lastIsSlash = false;
|
| -
|
| - while (!src.isEmpty()) {
|
| - checkBuffer();
|
| - switch(state.tagState()) {
|
| - case NoTag:
|
| - {
|
| - m_cBufferPos = cBufferPos;
|
| - return state;
|
| - }
|
| - case TagName:
|
| - {
|
| - if (searchCount > 0) {
|
| - if (*src == commentStart[searchCount]) {
|
| - searchCount++;
|
| - if (searchCount == 2)
|
| - m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
|
| - else
|
| - m_doctypeSearchCount = 0;
|
| - if (searchCount == 4) {
|
| - // Found '<!--' sequence
|
| - src.advancePastNonNewline();
|
| - m_dest = m_buffer; // ignore the previous part of this tag
|
| - state.setInComment(true);
|
| - state.setTagState(NoTag);
|
| -
|
| - // Fix bug 34302 at kde.bugs.org. Go ahead and treat
|
| - // <!--> as a valid comment, since both mozilla and IE on windows
|
| - // can handle this case. Only do this in quirks mode. -dwh
|
| - if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
|
| - state.setInComment(false);
|
| - src.advancePastNonNewline();
|
| - if (!src.isEmpty())
|
| - m_cBuffer[cBufferPos++] = *src;
|
| - } else
|
| - state = parseComment(src, state);
|
| -
|
| - m_cBufferPos = cBufferPos;
|
| - return state; // Finished parsing tag!
|
| - }
|
| - m_cBuffer[cBufferPos++] = *src;
|
| - src.advancePastNonNewline();
|
| - break;
|
| - } else
|
| - searchCount = 0; // Stop looking for '<!--' sequence
|
| - }
|
| -
|
| - if (m_doctypeSearchCount > 0) {
|
| - if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
|
| - m_doctypeSearchCount++;
|
| - m_cBuffer[cBufferPos++] = *src;
|
| - src.advancePastNonNewline();
|
| - if (m_doctypeSearchCount == 9) {
|
| - // Found '<!DOCTYPE' sequence
|
| - state.setInDoctype(true);
|
| - state.setTagState(NoTag);
|
| - m_doctypeToken.reset();
|
| - if (inViewSourceMode())
|
| - m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
|
| - state = parseDoctype(src, state);
|
| - m_cBufferPos = cBufferPos;
|
| - return state;
|
| - }
|
| - break;
|
| - } else
|
| - m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
|
| - }
|
| -
|
| - bool finish = false;
|
| - unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
|
| - while (ll--) {
|
| - UChar curchar = *src;
|
| - if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
|
| - finish = true;
|
| - break;
|
| - }
|
| -
|
| - // tolower() shows up on profiles. This is faster!
|
| - if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
|
| - m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
|
| - else
|
| - m_cBuffer[cBufferPos++] = curchar;
|
| - src.advancePastNonNewline();
|
| - }
|
| -
|
| - // Disadvantage: we add the possible rest of the tag
|
| - // as attribute names. ### judge if this causes problems
|
| - if (finish || CBUFLEN == cBufferPos) {
|
| - bool beginTag;
|
| - UChar* ptr = m_cBuffer;
|
| - unsigned int len = cBufferPos;
|
| - m_cBuffer[cBufferPos] = '\0';
|
| - if ((cBufferPos > 0) && (*ptr == '/')) {
|
| - // End Tag
|
| - beginTag = false;
|
| - ptr++;
|
| - len--;
|
| - }
|
| - else
|
| - // Start Tag
|
| - beginTag = true;
|
| -
|
| - // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
|
| - if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
|
| - ptr[--len] = '\0';
|
| -
|
| - // Now that we've shaved off any invalid / that might have followed the name), make the tag.
|
| - // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
|
| - if (ptr[0] != '!' || inViewSourceMode()) {
|
| - m_currentToken.tagName = AtomicString(ptr);
|
| - m_currentToken.beginTag = beginTag;
|
| - }
|
| - m_dest = m_buffer;
|
| - state.setTagState(SearchAttribute);
|
| - cBufferPos = 0;
|
| - }
|
| - break;
|
| - }
|
| - case SearchAttribute:
|
| - while(!src.isEmpty()) {
|
| - UChar curchar = *src;
|
| - // In this mode just ignore any quotes we encounter and treat them like spaces.
|
| - if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
|
| - if (curchar == '<' || curchar == '>')
|
| - state.setTagState(SearchEnd);
|
| - else
|
| - state.setTagState(AttributeName);
|
| -
|
| - cBufferPos = 0;
|
| - break;
|
| - }
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| - src.advance(m_lineNumber);
|
| - }
|
| - break;
|
| - case AttributeName:
|
| - {
|
| - int ll = min(src.length(), CBUFLEN - cBufferPos);
|
| - while (ll--) {
|
| - UChar curchar = *src;
|
| - // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the
|
| - // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
|
| - if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
|
| - m_cBuffer[cBufferPos] = '\0';
|
| - m_attrName = AtomicString(m_cBuffer);
|
| - m_dest = m_buffer;
|
| - *m_dest++ = 0;
|
| - state.setTagState(SearchEqual);
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('a');
|
| - break;
|
| - }
|
| -
|
| - // tolower() shows up on profiles. This is faster!
|
| - if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
|
| - m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
|
| - else
|
| - m_cBuffer[cBufferPos++] = curchar;
|
| -
|
| - src.advance(m_lineNumber);
|
| - }
|
| - if (cBufferPos == CBUFLEN) {
|
| - m_cBuffer[cBufferPos] = '\0';
|
| - m_attrName = AtomicString(m_cBuffer);
|
| - m_dest = m_buffer;
|
| - *m_dest++ = 0;
|
| - state.setTagState(SearchEqual);
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('a');
|
| - }
|
| - break;
|
| - }
|
| - case SearchEqual:
|
| - while (!src.isEmpty()) {
|
| - UChar curchar = *src;
|
| -
|
| - if (lastIsSlash && curchar == '>') {
|
| - // This is a quirk (with a long sad history). We have to do this
|
| - // since widgets do <script src="foo.js"/> and expect the tag to close.
|
| - if (m_currentToken.tagName == scriptTag)
|
| - m_currentToken.selfClosingTag = true;
|
| - m_currentToken.brokenXMLStyle = true;
|
| - }
|
| -
|
| - // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
|
| - if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
|
| - if (curchar == '=') {
|
| - state.setTagState(SearchValue);
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| - src.advancePastNonNewline();
|
| - } else {
|
| - m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
|
| - m_dest = m_buffer;
|
| - state.setTagState(SearchAttribute);
|
| - lastIsSlash = false;
|
| - }
|
| - break;
|
| - }
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| -
|
| - lastIsSlash = curchar == '/';
|
| -
|
| - src.advance(m_lineNumber);
|
| - }
|
| - break;
|
| - case SearchValue:
|
| - while (!src.isEmpty()) {
|
| - UChar curchar = *src;
|
| - if (!isASCIISpace(curchar)) {
|
| - if (curchar == '\'' || curchar == '\"') {
|
| - tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
|
| - state.setTagState(QuotedValue);
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| - src.advancePastNonNewline();
|
| - } else
|
| - state.setTagState(Value);
|
| -
|
| - break;
|
| - }
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| - src.advance(m_lineNumber);
|
| - }
|
| - break;
|
| - case QuotedValue:
|
| - while (!src.isEmpty()) {
|
| - checkBuffer();
|
| -
|
| - UChar curchar = *src;
|
| - if (curchar <= '>' && !src.escaped()) {
|
| - if (curchar == '>' && m_attrName.isEmpty()) {
|
| - // Handle a case like <img '>. Just go ahead and be willing
|
| - // to close the whole tag. Don't consume the character and
|
| - // just go back into SearchEnd while ignoring the whole
|
| - // value.
|
| - // FIXME: Note that this is actually not a very good solution.
|
| - // It doesn't handle the general case of
|
| - // unmatched quotes among attributes that have names. -dwh
|
| - while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
|
| - m_dest--; // remove trailing newlines
|
| - AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| - if (!attributeValue.contains('/'))
|
| - m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
|
| - m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('x');
|
| - state.setTagState(SearchAttribute);
|
| - m_dest = m_buffer;
|
| - tquote = NoQuote;
|
| - break;
|
| - }
|
| -
|
| - if (curchar == '&') {
|
| - src.advancePastNonNewline();
|
| - state = parseEntity(src, m_dest, state, cBufferPos, true, true);
|
| - break;
|
| - }
|
| -
|
| - if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
|
| - // some <input type=hidden> rely on trailing spaces. argh
|
| - while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
|
| - m_dest--; // remove trailing newlines
|
| - AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| - if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
|
| - m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('x');
|
| - } else if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('v');
|
| - m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| - m_dest = m_buffer;
|
| - state.setTagState(SearchAttribute);
|
| - tquote = NoQuote;
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(curchar);
|
| - src.advancePastNonNewline();
|
| - break;
|
| - }
|
| - }
|
| -
|
| - *m_dest++ = curchar;
|
| - src.advance(m_lineNumber);
|
| - }
|
| - break;
|
| - case Value:
|
| - while(!src.isEmpty()) {
|
| - checkBuffer();
|
| - UChar curchar = *src;
|
| - if (curchar <= '>' && !src.escaped()) {
|
| - // parse Entities
|
| - if (curchar == '&') {
|
| - src.advancePastNonNewline();
|
| - state = parseEntity(src, m_dest, state, cBufferPos, true, true);
|
| - break;
|
| - }
|
| - // no quotes. Every space means end of value
|
| - // '/' does not delimit in IE!
|
| - if (isASCIISpace(curchar) || curchar == '>') {
|
| - AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| - m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar('v');
|
| - m_dest = m_buffer;
|
| - state.setTagState(SearchAttribute);
|
| - break;
|
| - }
|
| - }
|
| -
|
| - *m_dest++ = curchar;
|
| - src.advance(m_lineNumber);
|
| - }
|
| - break;
|
| - case SearchEnd:
|
| - {
|
| - while (!src.isEmpty()) {
|
| - UChar ch = *src;
|
| - if (ch == '>' || ch == '<')
|
| - break;
|
| - if (ch == '/')
|
| - m_currentToken.selfClosingTag = true;
|
| - if (inViewSourceMode())
|
| - m_currentToken.addViewSourceChar(ch);
|
| - src.advance(m_lineNumber);
|
| - }
|
| - if (src.isEmpty())
|
| - break;
|
| -
|
| - searchCount = 0; // Stop looking for '<!--' sequence
|
| - state.setTagState(NoTag);
|
| - tquote = NoQuote;
|
| -
|
| - if (*src != '<')
|
| - src.advance(m_lineNumber);
|
| -
|
| - if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
|
| - m_cBufferPos = cBufferPos;
|
| - return state;
|
| - }
|
| -
|
| - AtomicString tagName = m_currentToken.tagName;
|
| -
|
| - // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
|
| - // compatibility.
|
| - bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
|
| - bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
|
| - if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {
|
| - Attribute* a = 0;
|
| - m_scriptTagSrcAttrValue = String();
|
| - m_scriptTagCharsetAttrValue = String();
|
| - if (m_currentToken.attrs && !m_fragment) {
|
| - if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {
|
| - if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
|
| - m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();
|
| - }
|
| - }
|
| - }
|
| -
|
| - RefPtr<Node> n = processToken();
|
| - m_cBufferPos = cBufferPos;
|
| - if (n || inViewSourceMode()) {
|
| - if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
|
| - if (beginTag)
|
| - state.setDiscardLF(true); // Discard the first LF after we open a pre.
|
| - } else if (tagName == scriptTag) {
|
| - ASSERT(!m_scriptNode);
|
| - m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
|
| - if (m_scriptNode)
|
| - m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
|
| - if (beginTag) {
|
| - m_searchStopper = scriptEnd;
|
| - m_searchStopperLength = 8;
|
| - state.setInScript(true);
|
| - state = parseSpecial(src, state);
|
| - } else if (isSelfClosingScript) { // Handle <script src="foo"/>
|
| - state.setInScript(true);
|
| - state = scriptHandler(state);
|
| - }
|
| - } else if (tagName == styleTag) {
|
| - if (beginTag) {
|
| - m_searchStopper = styleEnd;
|
| - m_searchStopperLength = 7;
|
| - state.setInStyle(true);
|
| - state = parseSpecial(src, state);
|
| - }
|
| - } else if (tagName == textareaTag) {
|
| - if (beginTag) {
|
| - m_searchStopper = textareaEnd;
|
| - m_searchStopperLength = 10;
|
| - state.setInTextArea(true);
|
| - state = parseSpecial(src, state);
|
| - }
|
| - } else if (tagName == titleTag) {
|
| - if (beginTag) {
|
| - m_searchStopper = titleEnd;
|
| - m_searchStopperLength = 7;
|
| - State savedState = state;
|
| - SegmentedString savedSrc = src;
|
| - long savedLineno = m_lineNumber;
|
| - state.setInTitle(true);
|
| - state = parseSpecial(src, state);
|
| - if (state.inTitle() && src.isEmpty()) {
|
| - // We just ate the rest of the document as the title #text node!
|
| - // Reset the state then retokenize without special title handling.
|
| - // Let the parser clean up the missing </title> tag.
|
| - // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
|
| - // at the end of the document unless m_noMoreData is also true. We need
|
| - // to detect this case elsewhere, and save the state somewhere other
|
| - // than a local variable.
|
| - state = savedState;
|
| - src = savedSrc;
|
| - m_lineNumber = savedLineno;
|
| - m_scriptCodeSize = 0;
|
| - }
|
| - }
|
| - } else if (tagName == xmpTag) {
|
| - if (beginTag) {
|
| - m_searchStopper = xmpEnd;
|
| - m_searchStopperLength = 5;
|
| - state.setInXmp(true);
|
| - state = parseSpecial(src, state);
|
| - }
|
| - } else if (tagName == iframeTag) {
|
| - if (beginTag) {
|
| - m_searchStopper = iframeEnd;
|
| - m_searchStopperLength = 8;
|
| - state.setInIFrame(true);
|
| - state = parseSpecial(src, state);
|
| - }
|
| - }
|
| - }
|
| - if (tagName == plaintextTag)
|
| - state.setInPlainText(beginTag);
|
| - return state; // Finished parsing tag!
|
| - }
|
| - } // end switch
|
| - }
|
| - m_cBufferPos = cBufferPos;
|
| - return state;
|
| -}
|
| -
|
| -inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
|
| -{
|
| - // We don't want to be checking elapsed time with every character, so we only check after we've
|
| - // processed a certain number of characters.
|
| - bool allowedYield = state.allowYield();
|
| - state.setAllowYield(false);
|
| - if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
|
| - processedCount = 0;
|
| - if (currentTime() - startTime > m_tokenizerTimeDelay) {
|
| - /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
|
| - load, but this hurts overall performance on slower machines. For now turn this
|
| - off.
|
| - || (!m_doc->haveStylesheetsLoaded() &&
|
| - (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
|
| - // Schedule the timer to keep processing as soon as possible.
|
| - m_timer.startOneShot(0);
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (currentTime() - startTime > m_tokenizerTimeDelay)
|
| - printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
|
| -#endif
|
| - return false;
|
| - }
|
| - }
|
| -
|
| - processedCount++;
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
|
| -{
|
| - if (!m_buffer)
|
| - return false;
|
| -
|
| - if (m_parserStopped)
|
| - return false;
|
| -
|
| - SegmentedString source(str);
|
| - if (m_executingScript)
|
| - source.setExcludeLineNumbers();
|
| -
|
| - if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
|
| - // don't parse; we will do this later
|
| - if (m_currentPrependingSrc)
|
| - m_currentPrependingSrc->append(source);
|
| - else {
|
| - m_pendingSrc.append(source);
|
| -#if PRELOAD_SCANNER_ENABLED
|
| - if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
|
| - m_preloadScanner->write(source);
|
| -#endif
|
| - }
|
| - return false;
|
| - }
|
| -
|
| -#if PRELOAD_SCANNER_ENABLED
|
| - if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
|
| - m_preloadScanner->end();
|
| -#endif
|
| -
|
| - if (!m_src.isEmpty())
|
| - m_src.append(source);
|
| - else
|
| - setSrc(source);
|
| -
|
| - // Once a timer is set, it has control of when the tokenizer continues.
|
| - if (m_timer.isActive())
|
| - return false;
|
| -
|
| - bool wasInWrite = m_inWrite;
|
| - m_inWrite = true;
|
| -
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("Beginning write at time %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - int processedCount = 0;
|
| - double startTime = currentTime();
|
| -
|
| - Frame* frame = m_doc->frame();
|
| -
|
| - State state = m_state;
|
| -
|
| - while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
|
| - if (!continueProcessing(processedCount, startTime, state))
|
| - break;
|
| -
|
| - // do we need to enlarge the buffer?
|
| - checkBuffer();
|
| -
|
| - UChar cc = *m_src;
|
| -
|
| - bool wasSkipLF = state.skipLF();
|
| - if (wasSkipLF)
|
| - state.setSkipLF(false);
|
| -
|
| - if (wasSkipLF && (cc == '\n'))
|
| - m_src.advance();
|
| - else if (state.needsSpecialWriteHandling()) {
|
| - // it's important to keep needsSpecialWriteHandling with the flags this block tests
|
| - if (state.hasEntityState())
|
| - state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
|
| - else if (state.inPlainText())
|
| - state = parseText(m_src, state);
|
| - else if (state.inAnySpecial())
|
| - state = parseSpecial(m_src, state);
|
| - else if (state.inComment())
|
| - state = parseComment(m_src, state);
|
| - else if (state.inDoctype())
|
| - state = parseDoctype(m_src, state);
|
| - else if (state.inServer())
|
| - state = parseServer(m_src, state);
|
| - else if (state.inProcessingInstruction())
|
| - state = parseProcessingInstruction(m_src, state);
|
| - else if (state.hasTagState())
|
| - state = parseTag(m_src, state);
|
| - else if (state.startTag()) {
|
| - state.setStartTag(false);
|
| -
|
| - switch(cc) {
|
| - case '/':
|
| - break;
|
| - case '!': {
|
| - // <!-- comment --> or <!DOCTYPE ...>
|
| - searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
|
| - m_doctypeSearchCount = 1;
|
| - break;
|
| - }
|
| - case '?': {
|
| - // xml processing instruction
|
| - state.setInProcessingInstruction(true);
|
| - tquote = NoQuote;
|
| - state = parseProcessingInstruction(m_src, state);
|
| - continue;
|
| -
|
| - break;
|
| - }
|
| - case '%':
|
| - if (!m_brokenServer) {
|
| - // <% server stuff, handle as comment %>
|
| - state.setInServer(true);
|
| - tquote = NoQuote;
|
| - state = parseServer(m_src, state);
|
| - continue;
|
| - }
|
| - // else fall through
|
| - default: {
|
| - if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
|
| - // Start of a Start-Tag
|
| - } else {
|
| - // Invalid tag
|
| - // Add as is
|
| - *m_dest = '<';
|
| - m_dest++;
|
| - continue;
|
| - }
|
| - }
|
| - }; // end case
|
| -
|
| - processToken();
|
| -
|
| - m_cBufferPos = 0;
|
| - state.setTagState(TagName);
|
| - state = parseTag(m_src, state);
|
| - }
|
| - } else if (cc == '&' && !m_src.escaped()) {
|
| - m_src.advancePastNonNewline();
|
| - state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
|
| - } else if (cc == '<' && !m_src.escaped()) {
|
| - m_currentTagStartLineNumber = m_lineNumber;
|
| - m_src.advancePastNonNewline();
|
| - state.setStartTag(true);
|
| - state.setDiscardLF(false);
|
| - } else if (cc == '\n' || cc == '\r') {
|
| - if (state.discardLF())
|
| - // Ignore this LF
|
| - state.setDiscardLF(false); // We have discarded 1 LF
|
| - else {
|
| - // Process this LF
|
| - *m_dest++ = '\n';
|
| - if (cc == '\r' && !m_src.excludeLineNumbers())
|
| - m_lineNumber++;
|
| - }
|
| -
|
| - /* Check for MS-DOS CRLF sequence */
|
| - if (cc == '\r')
|
| - state.setSkipLF(true);
|
| - m_src.advance(m_lineNumber);
|
| - } else {
|
| - state.setDiscardLF(false);
|
| - *m_dest++ = cc;
|
| - m_src.advancePastNonNewline();
|
| - }
|
| - }
|
| -
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("Ending write at time %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - m_inWrite = wasInWrite;
|
| -
|
| - m_state = state;
|
| -
|
| - if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
|
| - end(); // this actually causes us to be deleted
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -void HTMLTokenizer::stopParsing()
|
| -{
|
| - Tokenizer::stopParsing();
|
| - m_timer.stop();
|
| -
|
| - // The part needs to know that the tokenizer has finished with its data,
|
| - // regardless of whether it happened naturally or due to manual intervention.
|
| - if (!m_fragment && m_doc->frame())
|
| - m_doc->frame()->loader()->tokenizerProcessedData();
|
| -}
|
| -
|
| -bool HTMLTokenizer::processingData() const
|
| -{
|
| - return m_timer.isActive() || m_inWrite;
|
| -}
|
| -
|
| -void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
|
| -{
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
|
| - // Restart the timer and let layout win. This is basically a way of ensuring that the layout
|
| - // timer has higher priority than our timer.
|
| - m_timer.startOneShot(0);
|
| - return;
|
| - }
|
| -
|
| - // Invoke write() as though more data came in. This might cause us to get deleted.
|
| - write(SegmentedString(), true);
|
| -}
|
| -
|
| -void HTMLTokenizer::end()
|
| -{
|
| - ASSERT(!m_timer.isActive());
|
| - m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
|
| -
|
| - if (m_buffer) {
|
| - // parseTag is using the buffer for different matters
|
| - if (!m_state.hasTagState())
|
| - processToken();
|
| -
|
| - fastFree(m_scriptCode);
|
| - m_scriptCode = 0;
|
| - m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| -
|
| - fastFree(m_buffer);
|
| - m_buffer = 0;
|
| - }
|
| -
|
| - if (!inViewSourceMode())
|
| - m_parser->finished();
|
| - else
|
| - m_doc->finishedParsing();
|
| -}
|
| -
|
| -void HTMLTokenizer::finish()
|
| -{
|
| - // do this as long as we don't find matching comment ends
|
| - while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
|
| - // we've found an unmatched comment start
|
| - if (m_state.inComment())
|
| - m_brokenComments = true;
|
| - else
|
| - m_brokenServer = true;
|
| - checkScriptBuffer();
|
| - m_scriptCode[m_scriptCodeSize] = 0;
|
| - m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| - int pos;
|
| - String food;
|
| - if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
|
| - food = String(m_scriptCode, m_scriptCodeSize);
|
| - else if (m_state.inServer()) {
|
| - food = "<";
|
| - food.append(m_scriptCode, m_scriptCodeSize);
|
| - } else {
|
| - pos = find(m_scriptCode, m_scriptCodeSize, '>');
|
| - food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
|
| - }
|
| - fastFree(m_scriptCode);
|
| - m_scriptCode = 0;
|
| - m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| - m_state.setInComment(false);
|
| - m_state.setInServer(false);
|
| - if (!food.isEmpty())
|
| - write(food, true);
|
| - }
|
| - // this indicates we will not receive any more data... but if we are waiting on
|
| - // an external script to load, we can't finish parsing until that is done
|
| - m_noMoreData = true;
|
| - if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
|
| - end(); // this actually causes us to be deleted
|
| -}
|
| -
|
| -PassRefPtr<Node> HTMLTokenizer::processToken()
|
| -{
|
| - ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
|
| - if (scriptController && scriptController->isEnabled())
|
| - // FIXME: Why isn't this m_currentScriptTagStartLineNumber? I suspect this is wrong.
|
| - scriptController->setEventHandlerLineno(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
|
| - if (m_dest > m_buffer) {
|
| - m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
|
| - if (m_currentToken.tagName != commentAtom)
|
| - m_currentToken.tagName = textAtom;
|
| - } else if (m_currentToken.tagName == nullAtom) {
|
| - m_currentToken.reset();
|
| - if (scriptController)
|
| - scriptController->setEventHandlerLineno(m_lineNumber + 1); // Script line numbers are 1 based.
|
| - return 0;
|
| - }
|
| -
|
| - m_dest = m_buffer;
|
| -
|
| - RefPtr<Node> n;
|
| -
|
| - if (!m_parserStopped) {
|
| - if (NamedMappedAttrMap* map = m_currentToken.attrs.get())
|
| - map->shrinkToLength();
|
| - if (inViewSourceMode())
|
| - static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
|
| - else
|
| - // pass the token over to the parser, the parser DOES NOT delete the token
|
| - n = m_parser->parseToken(&m_currentToken);
|
| - }
|
| - m_currentToken.reset();
|
| - if (scriptController)
|
| - scriptController->setEventHandlerLineno(0);
|
| -
|
| - return n.release();
|
| -}
|
| -
|
| -void HTMLTokenizer::processDoctypeToken()
|
| -{
|
| - if (inViewSourceMode())
|
| - static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
|
| - else
|
| - m_parser->parseDoctypeToken(&m_doctypeToken);
|
| -}
|
| -
|
| -HTMLTokenizer::~HTMLTokenizer()
|
| -{
|
| - ASSERT(!m_inWrite);
|
| - reset();
|
| -}
|
| -
|
| -
|
| -void HTMLTokenizer::enlargeBuffer(int len)
|
| -{
|
| - int newSize = max(m_bufferSize * 2, m_bufferSize + len);
|
| - int oldOffset = m_dest - m_buffer;
|
| - m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
|
| - m_dest = m_buffer + oldOffset;
|
| - m_bufferSize = newSize;
|
| -}
|
| -
|
| -void HTMLTokenizer::enlargeScriptBuffer(int len)
|
| -{
|
| - int newSize = max(m_scriptCodeCapacity * 2, m_scriptCodeCapacity + len);
|
| - m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
|
| - m_scriptCodeCapacity = newSize;
|
| -}
|
| -
|
| -void HTMLTokenizer::executeScriptsWaitingForStylesheets()
|
| -{
|
| - ASSERT(m_doc->haveStylesheetsLoaded());
|
| -
|
| - if (m_hasScriptsWaitingForStylesheets)
|
| - notifyFinished(0);
|
| -}
|
| -
|
| -void HTMLTokenizer::notifyFinished(CachedResource*)
|
| -{
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("script loaded at %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - ASSERT(!m_pendingScripts.isEmpty());
|
| -
|
| - // Make external scripts wait for external stylesheets.
|
| - // FIXME: This needs to be done for inline scripts too.
|
| - m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
|
| - if (m_hasScriptsWaitingForStylesheets)
|
| - return;
|
| -
|
| - bool finished = false;
|
| - while (!finished && m_pendingScripts.first()->isLoaded()) {
|
| - CachedScript* cs = m_pendingScripts.first().get();
|
| - m_pendingScripts.removeFirst();
|
| - ASSERT(cache()->disabled() || cs->accessCount() > 0);
|
| -
|
| - setSrc(SegmentedString());
|
| -
|
| - // make sure we forget about the script before we execute the new one
|
| - // infinite recursion might happen otherwise
|
| - ScriptSourceCode sourceCode(cs);
|
| - bool errorOccurred = cs->errorOccurred();
|
| - cs->removeClient(this);
|
| -
|
| - RefPtr<Node> n = m_scriptNode.release();
|
| -
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("external script beginning execution at %d\n", m_doc->elapsedTime());
|
| -#endif
|
| -
|
| - if (errorOccurred)
|
| - EventTargetNodeCast(n.get())->dispatchEventForType(eventNames().errorEvent, true, false);
|
| - else {
|
| - if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
|
| - m_state = scriptExecution(sourceCode, m_state);
|
| - EventTargetNodeCast(n.get())->dispatchEventForType(eventNames().loadEvent, false, false);
|
| - }
|
| -
|
| - // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
|
| - // call above, so test afterwards.
|
| - finished = m_pendingScripts.isEmpty();
|
| - if (finished) {
|
| - ASSERT(!m_hasScriptsWaitingForStylesheets);
|
| - m_state.setLoadingExtScript(false);
|
| -#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| - if (!m_doc->ownerElement())
|
| - printf("external script finished execution at %d\n", m_doc->elapsedTime());
|
| -#endif
|
| - } else if (m_hasScriptsWaitingForStylesheets) {
|
| - // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
|
| - // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
|
| - finished = true;
|
| - }
|
| -
|
| - // 'm_requestingScript' is true when we are called synchronously from
|
| - // scriptHandler(). In that case scriptHandler() will take care
|
| - // of m_pendingSrc.
|
| - if (!m_requestingScript) {
|
| - SegmentedString rest = m_pendingSrc;
|
| - m_pendingSrc.clear();
|
| - write(rest, false);
|
| - // we might be deleted at this point, do not access any members.
|
| - }
|
| - }
|
| -}
|
| -
|
| -bool HTMLTokenizer::isWaitingForScripts() const
|
| -{
|
| - return m_state.loadingExtScript();
|
| -}
|
| -
|
| -void HTMLTokenizer::setSrc(const SegmentedString& source)
|
| -{
|
| - m_src = source;
|
| -}
|
| -
|
| -void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
|
| -{
|
| - HTMLTokenizer tok(fragment);
|
| - tok.setForceSynchronous(true);
|
| - tok.write(source, true);
|
| - tok.finish();
|
| - ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
|
| -}
|
| -
|
| -UChar decodeNamedEntity(const char* name)
|
| -{
|
| - const Entity* e = findEntity(name, strlen(name));
|
| - return e ? e->code : 0;
|
| -}
|
| -
|
| -}
|
| -
|
| -
|
| +/*
|
| + Copyright (C) 1997 Martin Jones (mjones@kde.org)
|
| + (C) 1997 Torben Weis (weis@kde.org)
|
| + (C) 1998 Waldo Bastian (bastian@kde.org)
|
| + (C) 1999 Lars Knoll (knoll@kde.org)
|
| + (C) 1999 Antti Koivisto (koivisto@kde.org)
|
| + (C) 2001 Dirk Mueller (mueller@kde.org)
|
| + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
| + Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
|
| +
|
| + This library is free software; you can redistribute it and/or
|
| + modify it under the terms of the GNU Library General Public
|
| + License as published by the Free Software Foundation; either
|
| + version 2 of the License, or (at your option) any later version.
|
| +
|
| + This library is distributed in the hope that it will be useful,
|
| + but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| + Library General Public License for more details.
|
| +
|
| + You should have received a copy of the GNU Library General Public License
|
| + along with this library; see the file COPYING.LIB. If not, write to
|
| + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| + Boston, MA 02110-1301, USA.
|
| +*/
|
| +
|
| +#include "config.h"
|
| +#include "HTMLTokenizer.h"
|
| +
|
| +#include "CSSHelper.h"
|
| +#include "Cache.h"
|
| +#include "CachedScript.h"
|
| +#include "DocLoader.h"
|
| +#include "DocumentFragment.h"
|
| +#include "EventNames.h"
|
| +#include "Frame.h"
|
| +#include "FrameLoader.h"
|
| +#include "FrameView.h"
|
| +#include "HTMLElement.h"
|
| +#include "HTMLNames.h"
|
| +#include "HTMLParser.h"
|
| +#include "HTMLScriptElement.h"
|
| +#include "HTMLViewSourceDocument.h"
|
| +#include "Page.h"
|
| +#include "PreloadScanner.h"
|
| +#include "ScriptController.h"
|
| +#include "ScriptSourceCode.h"
|
| +#include "ScriptValue.h"
|
| +#include <wtf/ASCIICType.h>
|
| +#include <wtf/CurrentTime.h>
|
| +
|
| +#include "HTMLEntityNames.c"
|
| +
|
| +#define PRELOAD_SCANNER_ENABLED 1
|
| +// #define INSTRUMENT_LAYOUT_SCHEDULING 1
|
| +
|
| +using namespace WTF;
|
| +using namespace std;
|
| +
|
| +namespace WebCore {
|
| +
|
| +using namespace HTMLNames;
|
| +
|
| +#if MOBILE
|
| +// The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.
|
| +// This value is used to define how many characters the tokenizer will process before
|
| +// yeilding control.
|
| +static const int defaultTokenizerChunkSize = 256;
|
| +#else
|
| +static const int defaultTokenizerChunkSize = 4096;
|
| +#endif
|
| +
|
| +#if MOBILE
|
| +// As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise
|
| +// it will take way to long to load a page.
|
| +static const double defaultTokenizerTimeDelay = 0.300;
|
| +#else
|
| +// FIXME: We would like this constant to be 200ms.
|
| +// Yielding more aggressively results in increased responsiveness and better incremental rendering.
|
| +// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
|
| +static const double defaultTokenizerTimeDelay = 0.500;
|
| +#endif
|
| +
|
| +static const char commentStart [] = "<!--";
|
| +static const char doctypeStart [] = "<!doctype";
|
| +static const char publicStart [] = "public";
|
| +static const char systemStart [] = "system";
|
| +static const char scriptEnd [] = "</script";
|
| +static const char xmpEnd [] = "</xmp";
|
| +static const char styleEnd [] = "</style";
|
| +static const char textareaEnd [] = "</textarea";
|
| +static const char titleEnd [] = "</title";
|
| +static const char iframeEnd [] = "</iframe";
|
| +
|
| +// Full support for MS Windows extensions to Latin-1.
|
| +// Technically these extensions should only be activated for pages
|
| +// marked "windows-1252" or "cp1252", but
|
| +// in the standard Microsoft way, these extensions infect hundreds of thousands
|
| +// of web pages. Note that people with non-latin-1 Microsoft extensions
|
| +// are SOL.
|
| +//
|
| +// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
|
| +// http://www.bbsinc.com/iso8859.html
|
| +// http://www.obviously.com/
|
| +//
|
| +// There may be better equivalents
|
| +
|
| +// We only need this for entities. For non-entity text, we handle this in the text encoding.
|
| +
|
| +static const UChar windowsLatin1ExtensionArray[32] = {
|
| + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
|
| + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
|
| + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
|
| + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
|
| +};
|
| +
|
| +static inline UChar fixUpChar(UChar c)
|
| +{
|
| + if ((c & ~0x1F) != 0x0080)
|
| + return c;
|
| + return windowsLatin1ExtensionArray[c - 0x80];
|
| +}
|
| +
|
| +static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
|
| +{
|
| + for (unsigned i = 0; i != length; ++i) {
|
| + unsigned char c1 = s1[i];
|
| + unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
|
| + UChar c2 = s2[i];
|
| + if (c1 != c2 && uc1 != c2)
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
|
| +{
|
| + if (!attrName.isEmpty()) {
|
| + ASSERT(!attrName.contains('/'));
|
| + RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);
|
| + if (!attrs) {
|
| + attrs = NamedMappedAttrMap::create();
|
| + attrs->reserveInitialCapacity(10);
|
| + }
|
| + attrs->insertAttribute(a.release(), viewSourceMode);
|
| + }
|
| +
|
| + attrName = emptyAtom;
|
| +}
|
| +
|
| +// ----------------------------------------------------------------------------
|
| +
|
| +HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)
|
| + : Tokenizer()
|
| + , m_buffer(0)
|
| + , m_scriptCode(0)
|
| + , m_scriptCodeSize(0)
|
| + , m_scriptCodeCapacity(0)
|
| + , m_scriptCodeResync(0)
|
| + , m_executingScript(0)
|
| + , m_requestingScript(false)
|
| + , m_hasScriptsWaitingForStylesheets(false)
|
| + , m_timer(this, &HTMLTokenizer::timerFired)
|
| + , m_doc(doc)
|
| + , m_parser(new HTMLParser(doc, reportErrors))
|
| + , m_inWrite(false)
|
| + , m_fragment(false)
|
| +{
|
| + begin();
|
| +}
|
| +
|
| +HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)
|
| + : Tokenizer(true)
|
| + , m_buffer(0)
|
| + , m_scriptCode(0)
|
| + , m_scriptCodeSize(0)
|
| + , m_scriptCodeCapacity(0)
|
| + , m_scriptCodeResync(0)
|
| + , m_executingScript(0)
|
| + , m_requestingScript(false)
|
| + , m_hasScriptsWaitingForStylesheets(false)
|
| + , m_timer(this, &HTMLTokenizer::timerFired)
|
| + , m_doc(doc)
|
| + , m_parser(0)
|
| + , m_inWrite(false)
|
| + , m_fragment(false)
|
| +{
|
| + begin();
|
| +}
|
| +
|
| +HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)
|
| + : m_buffer(0)
|
| + , m_scriptCode(0)
|
| + , m_scriptCodeSize(0)
|
| + , m_scriptCodeCapacity(0)
|
| + , m_scriptCodeResync(0)
|
| + , m_executingScript(0)
|
| + , m_requestingScript(false)
|
| + , m_hasScriptsWaitingForStylesheets(false)
|
| + , m_timer(this, &HTMLTokenizer::timerFired)
|
| + , m_doc(frag->document())
|
| + , m_parser(new HTMLParser(frag))
|
| + , m_inWrite(false)
|
| + , m_fragment(true)
|
| +{
|
| + begin();
|
| +}
|
| +
|
| +void HTMLTokenizer::reset()
|
| +{
|
| + ASSERT(m_executingScript == 0);
|
| +
|
| + while (!m_pendingScripts.isEmpty()) {
|
| + CachedScript* cs = m_pendingScripts.first().get();
|
| + m_pendingScripts.removeFirst();
|
| + ASSERT(cache()->disabled() || cs->accessCount() > 0);
|
| + cs->removeClient(this);
|
| + }
|
| +
|
| + fastFree(m_buffer);
|
| + m_buffer = m_dest = 0;
|
| + m_bufferSize = 0;
|
| +
|
| + fastFree(m_scriptCode);
|
| + m_scriptCode = 0;
|
| + m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| +
|
| + m_timer.stop();
|
| + m_state.setAllowYield(false);
|
| + m_state.setForceSynchronous(false);
|
| +
|
| + m_currentToken.reset();
|
| + m_doctypeToken.reset();
|
| + m_doctypeSearchCount = 0;
|
| + m_doctypeSecondarySearchCount = 0;
|
| + m_hasScriptsWaitingForStylesheets = false;
|
| +}
|
| +
|
| +void HTMLTokenizer::begin()
|
| +{
|
| + m_executingScript = 0;
|
| + m_requestingScript = false;
|
| + m_hasScriptsWaitingForStylesheets = false;
|
| + m_state.setLoadingExtScript(false);
|
| + reset();
|
| + m_bufferSize = 254;
|
| + m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
|
| + m_dest = m_buffer;
|
| + tquote = NoQuote;
|
| + searchCount = 0;
|
| + m_state.setEntityState(NoEntity);
|
| + m_scriptTagSrcAttrValue = String();
|
| + m_pendingSrc.clear();
|
| + m_currentPrependingSrc = 0;
|
| + m_noMoreData = false;
|
| + m_brokenComments = false;
|
| + m_brokenServer = false;
|
| + m_lineNumber = 0;
|
| + m_currentScriptTagStartLineNumber = 0;
|
| + m_currentTagStartLineNumber = 0;
|
| + m_state.setForceSynchronous(false);
|
| +
|
| + Page* page = m_doc->page();
|
| + if (page && page->hasCustomHTMLTokenizerTimeDelay())
|
| + m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
|
| + else
|
| + m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
|
| +
|
| + if (page && page->hasCustomHTMLTokenizerChunkSize())
|
| + m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
|
| + else
|
| + m_tokenizerChunkSize = defaultTokenizerChunkSize;
|
| +}
|
| +
|
| +void HTMLTokenizer::setForceSynchronous(bool force)
|
| +{
|
| + m_state.setForceSynchronous(force);
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state)
|
| +{
|
| + // This function adds the listing 'list' as
|
| + // preformatted text-tokens to the token-collection
|
| + while (!list.isEmpty()) {
|
| + if (state.skipLF()) {
|
| + state.setSkipLF(false);
|
| + if (*list == '\n') {
|
| + list.advance();
|
| + continue;
|
| + }
|
| + }
|
| +
|
| + checkBuffer();
|
| +
|
| + if (*list == '\n' || *list == '\r') {
|
| + if (state.discardLF())
|
| + // Ignore this LF
|
| + state.setDiscardLF(false); // We have discarded 1 LF
|
| + else
|
| + *m_dest++ = '\n';
|
| +
|
| + /* Check for MS-DOS CRLF sequence */
|
| + if (*list == '\r')
|
| + state.setSkipLF(true);
|
| +
|
| + list.advance();
|
| + } else {
|
| + state.setDiscardLF(false);
|
| + *m_dest++ = *list;
|
| + list.advance();
|
| + }
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString& src, State state)
|
| +{
|
| + ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
|
| + ASSERT(!state.hasTagState());
|
| + ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );
|
| + if (state.inScript() && !m_currentScriptTagStartLineNumber)
|
| + m_currentScriptTagStartLineNumber = m_lineNumber;
|
| +
|
| + if (state.inComment())
|
| + state = parseComment(src, state);
|
| +
|
| + int lastDecodedEntityPosition = -1;
|
| + while (!src.isEmpty()) {
|
| + checkScriptBuffer();
|
| + UChar ch = *src;
|
| +
|
| + if (!m_scriptCodeResync && !m_brokenComments &&
|
| + !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
|
| + m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
|
| + (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
|
| + state.setInComment(true);
|
| + state = parseComment(src, state);
|
| + continue;
|
| + }
|
| + if (m_scriptCodeResync && !tquote && ch == '>') {
|
| + src.advancePastNonNewline();
|
| + m_scriptCodeSize = m_scriptCodeResync - 1;
|
| + m_scriptCodeResync = 0;
|
| + m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| + if (state.inScript())
|
| + state = scriptHandler(state);
|
| + else {
|
| + state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
|
| + processToken();
|
| + if (state.inStyle()) {
|
| + m_currentToken.tagName = styleTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + } else if (state.inTextArea()) {
|
| + m_currentToken.tagName = textareaTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + } else if (state.inTitle()) {
|
| + m_currentToken.tagName = titleTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + } else if (state.inXmp()) {
|
| + m_currentToken.tagName = xmpTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + } else if (state.inIFrame()) {
|
| + m_currentToken.tagName = iframeTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + }
|
| + processToken();
|
| + state.setInStyle(false);
|
| + state.setInScript(false);
|
| + state.setInTextArea(false);
|
| + state.setInTitle(false);
|
| + state.setInXmp(false);
|
| + state.setInIFrame(false);
|
| + tquote = NoQuote;
|
| + m_scriptCodeSize = m_scriptCodeResync = 0;
|
| + }
|
| + return state;
|
| + }
|
| + // possible end of tagname, lets check.
|
| + if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
|
| + m_scriptCodeSize >= m_searchStopperLength &&
|
| + tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
|
| + (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
|
| + m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
|
| + tquote = NoQuote;
|
| + continue;
|
| + }
|
| + if (m_scriptCodeResync && !state.escaped()) {
|
| + if (ch == '\"')
|
| + tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
|
| + else if (ch == '\'')
|
| + tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
|
| + else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
|
| + tquote = NoQuote;
|
| + }
|
| + state.setEscaped(!state.escaped() && ch == '\\');
|
| + if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
|
| + UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
|
| + src.advancePastNonNewline();
|
| + state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
|
| + if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
|
| + lastDecodedEntityPosition = m_scriptCodeSize;
|
| + else
|
| + m_scriptCodeSize = scriptCodeDest - m_scriptCode;
|
| + } else {
|
| + m_scriptCode[m_scriptCodeSize++] = ch;
|
| + src.advance(m_lineNumber);
|
| + }
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state)
|
| +{
|
| + // We are inside a <script>
|
| + bool doScriptExec = false;
|
| + int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
|
| +
|
| + // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
|
| + m_currentScriptTagStartLineNumber = 0;
|
| +
|
| + // (Bugzilla 3837) Scripts following a frameset element should not execute or,
|
| + // in the case of extern scripts, even load.
|
| + bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));
|
| +
|
| + CachedScript* cs = 0;
|
| + // don't load external scripts for standalone documents (for now)
|
| + if (!inViewSourceMode()) {
|
| + if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {
|
| + // forget what we just got; load from src url instead
|
| + if (!m_parser->skipMode() && !followingFrameset) {
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("Requesting script at time %d\n", m_doc->elapsedTime());
|
| +#endif
|
| + // The parser might have been stopped by for example a window.close call in an earlier script.
|
| + // If so, we don't want to load scripts.
|
| + if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
|
| + m_pendingScripts.append(cs);
|
| + else
|
| + m_scriptNode = 0;
|
| + } else
|
| + m_scriptNode = 0;
|
| + m_scriptTagSrcAttrValue = String();
|
| + } else {
|
| + // Parse m_scriptCode containing <script> info
|
| +#if USE(LOW_BANDWIDTH_DISPLAY)
|
| + if (m_doc->inLowBandwidthDisplay()) {
|
| + // ideal solution is only skipping internal JavaScript if there is external JavaScript.
|
| + // but internal JavaScript can use document.write() to create an external JavaScript,
|
| + // so we have to skip internal JavaScript all the time.
|
| + m_doc->frame()->loader()->needToSwitchOutLowBandwidthDisplay();
|
| + doScriptExec = false;
|
| + } else
|
| +#endif
|
| + doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
|
| + m_scriptNode = 0;
|
| + }
|
| + }
|
| +
|
| + state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
|
| + RefPtr<Node> node = processToken();
|
| + String scriptString = node ? node->textContent() : "";
|
| + m_currentToken.tagName = scriptTag.localName();
|
| + m_currentToken.beginTag = false;
|
| + processToken();
|
| +
|
| + state.setInScript(false);
|
| + m_scriptCodeSize = m_scriptCodeResync = 0;
|
| +
|
| + // FIXME: The script should be syntax highlighted.
|
| + if (inViewSourceMode())
|
| + return state;
|
| +
|
| + SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
|
| + SegmentedString prependingSrc;
|
| + m_currentPrependingSrc = &prependingSrc;
|
| +
|
| + if (!m_parser->skipMode() && !followingFrameset) {
|
| + if (cs) {
|
| + if (savedPrependingSrc)
|
| + savedPrependingSrc->append(m_src);
|
| + else
|
| + m_pendingSrc.prepend(m_src);
|
| + setSrc(SegmentedString());
|
| +
|
| + // the ref() call below may call notifyFinished if the script is already in cache,
|
| + // and that mucks with the state directly, so we must write it back to the object.
|
| + m_state = state;
|
| + bool savedRequestingScript = m_requestingScript;
|
| + m_requestingScript = true;
|
| + cs->addClient(this);
|
| + m_requestingScript = savedRequestingScript;
|
| + state = m_state;
|
| + // will be 0 if script was already loaded and ref() executed it
|
| + if (!m_pendingScripts.isEmpty())
|
| + state.setLoadingExtScript(true);
|
| + } else if (!m_fragment && doScriptExec) {
|
| + if (!m_executingScript)
|
| + m_pendingSrc.prepend(m_src);
|
| + else
|
| + prependingSrc = m_src;
|
| + setSrc(SegmentedString());
|
| + state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);
|
| + }
|
| + }
|
| +
|
| + if (!m_executingScript && !state.loadingExtScript()) {
|
| + m_src.append(m_pendingSrc);
|
| + m_pendingSrc.clear();
|
| + } else if (!prependingSrc.isEmpty()) {
|
| + // restore first so that the write appends in the right place
|
| + // (does not hurt to do it again below)
|
| + m_currentPrependingSrc = savedPrependingSrc;
|
| +
|
| + // we need to do this slightly modified bit of one of the write() cases
|
| + // because we want to prepend to m_pendingSrc rather than appending
|
| + // if there's no previous prependingSrc
|
| + if (!m_pendingScripts.isEmpty()) {
|
| + if (m_currentPrependingSrc)
|
| + m_currentPrependingSrc->append(prependingSrc);
|
| + else
|
| + m_pendingSrc.prepend(prependingSrc);
|
| + } else {
|
| + m_state = state;
|
| + write(prependingSrc, false);
|
| + state = m_state;
|
| + }
|
| + }
|
| +
|
| +#if PRELOAD_SCANNER_ENABLED
|
| + if (!m_pendingScripts.isEmpty() && !m_executingScript) {
|
| + if (!m_preloadScanner)
|
| + m_preloadScanner.set(new PreloadScanner(m_doc));
|
| + if (!m_preloadScanner->inProgress()) {
|
| + m_preloadScanner->begin();
|
| + m_preloadScanner->write(m_pendingSrc);
|
| + }
|
| + }
|
| +#endif
|
| + m_currentPrependingSrc = savedPrependingSrc;
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state)
|
| +{
|
| + if (m_fragment || !m_doc->frame())
|
| + return state;
|
| + m_executingScript++;
|
| +
|
| + SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
|
| + SegmentedString prependingSrc;
|
| + m_currentPrependingSrc = &prependingSrc;
|
| +
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("beginning script execution at %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + m_state = state;
|
| + m_doc->frame()->loader()->executeScript(sourceCode);
|
| + state = m_state;
|
| +
|
| + state.setAllowYield(true);
|
| +
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("ending script execution at %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + m_executingScript--;
|
| +
|
| + if (!m_executingScript && !state.loadingExtScript()) {
|
| + m_pendingSrc.prepend(prependingSrc);
|
| + m_src.append(m_pendingSrc);
|
| + m_pendingSrc.clear();
|
| + } else if (!prependingSrc.isEmpty()) {
|
| + // restore first so that the write appends in the right place
|
| + // (does not hurt to do it again below)
|
| + m_currentPrependingSrc = savedPrependingSrc;
|
| +
|
| + // we need to do this slightly modified bit of one of the write() cases
|
| + // because we want to prepend to m_pendingSrc rather than appending
|
| + // if there's no previous prependingSrc
|
| + if (!m_pendingScripts.isEmpty()) {
|
| + if (m_currentPrependingSrc)
|
| + m_currentPrependingSrc->append(prependingSrc);
|
| + else
|
| + m_pendingSrc.prepend(prependingSrc);
|
| +
|
| +#if PRELOAD_SCANNER_ENABLED
|
| + // We are stuck waiting for another script. Lets check the source that
|
| + // was just document.write()n for anything to load.
|
| + PreloadScanner documentWritePreloadScanner(m_doc);
|
| + documentWritePreloadScanner.begin();
|
| + documentWritePreloadScanner.write(prependingSrc);
|
| + documentWritePreloadScanner.end();
|
| +#endif
|
| + } else {
|
| + m_state = state;
|
| + write(prependingSrc, false);
|
| + state = m_state;
|
| + }
|
| + }
|
| +
|
| + m_currentPrependingSrc = savedPrependingSrc;
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state)
|
| +{
|
| + // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
|
| + checkScriptBuffer(src.length());
|
| + while (!src.isEmpty()) {
|
| + UChar ch = *src;
|
| + m_scriptCode[m_scriptCodeSize++] = ch;
|
| + if (ch == '>') {
|
| + bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
|
| + int endCharsCount = 1; // start off with one for the '>' character
|
| + if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
|
| + endCharsCount = 3;
|
| + } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
|
| + m_scriptCode[m_scriptCodeSize-2] == '!') {
|
| + // Other browsers will accept --!> as a close comment, even though it's
|
| + // not technically valid.
|
| + endCharsCount = 4;
|
| + }
|
| + if (handleBrokenComments || endCharsCount > 1) {
|
| + src.advancePastNonNewline();
|
| + if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
|
| + checkScriptBuffer();
|
| + m_scriptCode[m_scriptCodeSize] = 0;
|
| + m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| + m_currentToken.tagName = commentAtom;
|
| + m_currentToken.beginTag = true;
|
| + state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
|
| + processToken();
|
| + m_currentToken.tagName = commentAtom;
|
| + m_currentToken.beginTag = false;
|
| + processToken();
|
| + m_scriptCodeSize = 0;
|
| + }
|
| + state.setInComment(false);
|
| + return state; // Finished parsing comment
|
| + }
|
| + }
|
| + src.advance(m_lineNumber);
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state)
|
| +{
|
| + checkScriptBuffer(src.length());
|
| + while (!src.isEmpty()) {
|
| + UChar ch = *src;
|
| + m_scriptCode[m_scriptCodeSize++] = ch;
|
| + if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
|
| + src.advancePastNonNewline();
|
| + state.setInServer(false);
|
| + m_scriptCodeSize = 0;
|
| + return state; // Finished parsing server include
|
| + }
|
| + src.advance(m_lineNumber);
|
| + }
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString& src, State state)
|
| +{
|
| + UChar oldchar = 0;
|
| + while (!src.isEmpty()) {
|
| + UChar chbegin = *src;
|
| + if (chbegin == '\'')
|
| + tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
|
| + else if (chbegin == '\"')
|
| + tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
|
| + // Look for '?>'
|
| + // Some crappy sites omit the "?" before it, so
|
| + // we look for an unquoted '>' instead. (IE compatible)
|
| + else if (chbegin == '>' && (!tquote || oldchar == '?')) {
|
| + // We got a '?>' sequence
|
| + state.setInProcessingInstruction(false);
|
| + src.advancePastNonNewline();
|
| + state.setDiscardLF(true);
|
| + return state; // Finished parsing comment!
|
| + }
|
| + src.advance(m_lineNumber);
|
| + oldchar = chbegin;
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString& src, State state)
|
| +{
|
| + while (!src.isEmpty()) {
|
| + UChar cc = *src;
|
| +
|
| + if (state.skipLF()) {
|
| + state.setSkipLF(false);
|
| + if (cc == '\n') {
|
| + src.advancePastNewline(m_lineNumber);
|
| + continue;
|
| + }
|
| + }
|
| +
|
| + // do we need to enlarge the buffer?
|
| + checkBuffer();
|
| +
|
| + if (cc == '\r') {
|
| + state.setSkipLF(true);
|
| + *m_dest++ = '\n';
|
| + } else
|
| + *m_dest++ = cc;
|
| + src.advance(m_lineNumber);
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
|
| +{
|
| + if (start) {
|
| + cBufferPos = 0;
|
| + state.setEntityState(SearchEntity);
|
| + EntityUnicodeValue = 0;
|
| + }
|
| +
|
| + while(!src.isEmpty()) {
|
| + UChar cc = *src;
|
| + switch(state.entityState()) {
|
| + case NoEntity:
|
| + ASSERT(state.entityState() != NoEntity);
|
| + return state;
|
| +
|
| + case SearchEntity:
|
| + if (cc == '#') {
|
| + m_cBuffer[cBufferPos++] = cc;
|
| + src.advancePastNonNewline();
|
| + state.setEntityState(NumericSearch);
|
| + } else
|
| + state.setEntityState(EntityName);
|
| + break;
|
| +
|
| + case NumericSearch:
|
| + if (cc == 'x' || cc == 'X') {
|
| + m_cBuffer[cBufferPos++] = cc;
|
| + src.advancePastNonNewline();
|
| + state.setEntityState(Hexadecimal);
|
| + } else if (cc >= '0' && cc <= '9')
|
| + state.setEntityState(Decimal);
|
| + else
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| +
|
| + case Hexadecimal: {
|
| + int ll = min(src.length(), 10 - cBufferPos);
|
| + while (ll--) {
|
| + cc = *src;
|
| + if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| + }
|
| + int digit;
|
| + if (cc < 'A')
|
| + digit = cc - '0';
|
| + else
|
| + digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
|
| + EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
|
| + m_cBuffer[cBufferPos++] = cc;
|
| + src.advancePastNonNewline();
|
| + }
|
| + if (cBufferPos == 10)
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| + }
|
| + case Decimal:
|
| + {
|
| + int ll = min(src.length(), 9-cBufferPos);
|
| + while(ll--) {
|
| + cc = *src;
|
| +
|
| + if (!(cc >= '0' && cc <= '9')) {
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| + }
|
| +
|
| + EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
|
| + m_cBuffer[cBufferPos++] = cc;
|
| + src.advancePastNonNewline();
|
| + }
|
| + if (cBufferPos == 9)
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| + }
|
| + case EntityName:
|
| + {
|
| + int ll = min(src.length(), 9-cBufferPos);
|
| + while(ll--) {
|
| + cc = *src;
|
| +
|
| + if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
|
| + state.setEntityState(SearchSemicolon);
|
| + break;
|
| + }
|
| +
|
| + m_cBuffer[cBufferPos++] = cc;
|
| + src.advancePastNonNewline();
|
| + }
|
| + if (cBufferPos == 9)
|
| + state.setEntityState(SearchSemicolon);
|
| + if (state.entityState() == SearchSemicolon) {
|
| + if(cBufferPos > 1) {
|
| + // Since the maximum length of entity name is 9,
|
| + // so a single char array which is allocated on
|
| + // the stack, its length is 10, should be OK.
|
| + // Also if we have an illegal character, we treat it
|
| + // as illegal entity name.
|
| + unsigned testedEntityNameLen = 0;
|
| + char tmpEntityNameBuffer[10];
|
| +
|
| + ASSERT(cBufferPos < 10);
|
| + for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
|
| + if (m_cBuffer[testedEntityNameLen] > 0x7e)
|
| + break;
|
| + tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
|
| + }
|
| +
|
| + const Entity *e;
|
| +
|
| + if (testedEntityNameLen == cBufferPos)
|
| + e = findEntity(tmpEntityNameBuffer, cBufferPos);
|
| + else
|
| + e = 0;
|
| +
|
| + if(e)
|
| + EntityUnicodeValue = e->code;
|
| +
|
| + // be IE compatible
|
| + if(parsingTag && EntityUnicodeValue > 255 && *src != ';')
|
| + EntityUnicodeValue = 0;
|
| + }
|
| + }
|
| + else
|
| + break;
|
| + }
|
| + case SearchSemicolon:
|
| + // Don't allow values that are more than 21 bits.
|
| + if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
|
| + if (!inViewSourceMode()) {
|
| + if (*src == ';')
|
| + src.advancePastNonNewline();
|
| + if (EntityUnicodeValue <= 0xFFFF) {
|
| + checkBuffer();
|
| + src.push(fixUpChar(EntityUnicodeValue));
|
| + } else {
|
| + // Convert to UTF-16, using surrogate code points.
|
| + checkBuffer(2);
|
| + src.push(U16_LEAD(EntityUnicodeValue));
|
| + src.push(U16_TRAIL(EntityUnicodeValue));
|
| + }
|
| + } else {
|
| + // FIXME: We should eventually colorize entities by sending them as a special token.
|
| + checkBuffer(11);
|
| + *dest++ = '&';
|
| + for (unsigned i = 0; i < cBufferPos; i++)
|
| + dest[i] = m_cBuffer[i];
|
| + dest += cBufferPos;
|
| + if (*src == ';') {
|
| + *dest++ = ';';
|
| + src.advancePastNonNewline();
|
| + }
|
| + }
|
| + } else {
|
| + checkBuffer(10);
|
| + // ignore the sequence, add it to the buffer as plaintext
|
| + *dest++ = '&';
|
| + for (unsigned i = 0; i < cBufferPos; i++)
|
| + dest[i] = m_cBuffer[i];
|
| + dest += cBufferPos;
|
| + }
|
| +
|
| + state.setEntityState(NoEntity);
|
| + return state;
|
| + }
|
| + }
|
| +
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseDoctype(SegmentedString& src, State state)
|
| +{
|
| + ASSERT(state.inDoctype());
|
| + while (!src.isEmpty() && state.inDoctype()) {
|
| + UChar c = *src;
|
| + bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
|
| + switch (m_doctypeToken.state()) {
|
| + case DoctypeBegin: {
|
| + m_doctypeToken.setState(DoctypeBeforeName);
|
| + if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + }
|
| + case DoctypeBeforeName: {
|
| + if (c == '>') {
|
| + // Malformed. Just exit.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + if (inViewSourceMode())
|
| + processDoctypeToken();
|
| + } else if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else
|
| + m_doctypeToken.setState(DoctypeName);
|
| + break;
|
| + }
|
| + case DoctypeName: {
|
| + if (c == '>') {
|
| + // Valid doctype. Emit it.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + processDoctypeToken();
|
| + } else if (isWhitespace) {
|
| + m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
|
| + m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
|
| + m_doctypeToken.setState(DoctypeAfterName);
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else {
|
| + src.advancePastNonNewline();
|
| + m_doctypeToken.m_name.append(c);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + }
|
| + case DoctypeAfterName: {
|
| + if (c == '>') {
|
| + // Valid doctype. Emit it.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + processDoctypeToken();
|
| + } else if (!isWhitespace) {
|
| + src.advancePastNonNewline();
|
| + if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
|
| + m_doctypeSearchCount++;
|
| + if (m_doctypeSearchCount == 6)
|
| + // Found 'PUBLIC' sequence
|
| + m_doctypeToken.setState(DoctypeBeforePublicID);
|
| + } else if (m_doctypeSearchCount > 0) {
|
| + m_doctypeSearchCount = 0;
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
|
| + m_doctypeSecondarySearchCount++;
|
| + if (m_doctypeSecondarySearchCount == 6)
|
| + // Found 'SYSTEM' sequence
|
| + m_doctypeToken.setState(DoctypeBeforeSystemID);
|
| + } else {
|
| + m_doctypeSecondarySearchCount = 0;
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + }
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else {
|
| + src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + }
|
| + case DoctypeBeforePublicID: {
|
| + if (c == '\"' || c == '\'') {
|
| + tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| + m_doctypeToken.setState(DoctypePublicID);
|
| + src.advancePastNonNewline();
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else if (c == '>') {
|
| + // Considered bogus. Don't process the doctype.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + if (inViewSourceMode())
|
| + processDoctypeToken();
|
| + } else if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + break;
|
| + }
|
| + case DoctypePublicID: {
|
| + if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
|
| + src.advancePastNonNewline();
|
| + m_doctypeToken.setState(DoctypeAfterPublicID);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else if (c == '>') {
|
| + // Considered bogus. Don't process the doctype.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + if (inViewSourceMode())
|
| + processDoctypeToken();
|
| + } else {
|
| + m_doctypeToken.m_publicID.append(c);
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + }
|
| + case DoctypeAfterPublicID:
|
| + if (c == '\"' || c == '\'') {
|
| + tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| + m_doctypeToken.setState(DoctypeSystemID);
|
| + src.advancePastNonNewline();
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else if (c == '>') {
|
| + // Valid doctype. Emit it now.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + processDoctypeToken();
|
| + } else if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + break;
|
| + case DoctypeBeforeSystemID:
|
| + if (c == '\"' || c == '\'') {
|
| + tquote = c == '\"' ? DoubleQuote : SingleQuote;
|
| + m_doctypeToken.setState(DoctypeSystemID);
|
| + src.advancePastNonNewline();
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else if (c == '>') {
|
| + // Considered bogus. Don't process the doctype.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + } else if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + break;
|
| + case DoctypeSystemID:
|
| + if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
|
| + src.advancePastNonNewline();
|
| + m_doctypeToken.setState(DoctypeAfterSystemID);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else if (c == '>') {
|
| + // Considered bogus. Don't process the doctype.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + if (inViewSourceMode())
|
| + processDoctypeToken();
|
| + } else {
|
| + m_doctypeToken.m_systemID.append(c);
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + case DoctypeAfterSystemID:
|
| + if (c == '>') {
|
| + // Valid doctype. Emit it now.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + processDoctypeToken();
|
| + } else if (isWhitespace) {
|
| + src.advance(m_lineNumber);
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + } else
|
| + m_doctypeToken.setState(DoctypeBogus);
|
| + break;
|
| + case DoctypeBogus:
|
| + if (c == '>') {
|
| + // Done with the bogus doctype.
|
| + src.advancePastNonNewline();
|
| + state.setInDoctype(false);
|
| + if (inViewSourceMode())
|
| + processDoctypeToken();
|
| + } else {
|
| + src.advance(m_lineNumber); // Just keep scanning for '>'
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(c);
|
| + }
|
| + break;
|
| + default:
|
| + break;
|
| + }
|
| + }
|
| + return state;
|
| +}
|
| +
|
| +HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state)
|
| +{
|
| + ASSERT(!state.hasEntityState());
|
| +
|
| + unsigned cBufferPos = m_cBufferPos;
|
| +
|
| + bool lastIsSlash = false;
|
| +
|
| + while (!src.isEmpty()) {
|
| + checkBuffer();
|
| + switch(state.tagState()) {
|
| + case NoTag:
|
| + {
|
| + m_cBufferPos = cBufferPos;
|
| + return state;
|
| + }
|
| + case TagName:
|
| + {
|
| + if (searchCount > 0) {
|
| + if (*src == commentStart[searchCount]) {
|
| + searchCount++;
|
| + if (searchCount == 2)
|
| + m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
|
| + else
|
| + m_doctypeSearchCount = 0;
|
| + if (searchCount == 4) {
|
| + // Found '<!--' sequence
|
| + src.advancePastNonNewline();
|
| + m_dest = m_buffer; // ignore the previous part of this tag
|
| + state.setInComment(true);
|
| + state.setTagState(NoTag);
|
| +
|
| + // Fix bug 34302 at kde.bugs.org. Go ahead and treat
|
| + // <!--> as a valid comment, since both mozilla and IE on windows
|
| + // can handle this case. Only do this in quirks mode. -dwh
|
| + if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
|
| + state.setInComment(false);
|
| + src.advancePastNonNewline();
|
| + if (!src.isEmpty())
|
| + m_cBuffer[cBufferPos++] = *src;
|
| + } else
|
| + state = parseComment(src, state);
|
| +
|
| + m_cBufferPos = cBufferPos;
|
| + return state; // Finished parsing tag!
|
| + }
|
| + m_cBuffer[cBufferPos++] = *src;
|
| + src.advancePastNonNewline();
|
| + break;
|
| + } else
|
| + searchCount = 0; // Stop looking for '<!--' sequence
|
| + }
|
| +
|
| + if (m_doctypeSearchCount > 0) {
|
| + if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
|
| + m_doctypeSearchCount++;
|
| + m_cBuffer[cBufferPos++] = *src;
|
| + src.advancePastNonNewline();
|
| + if (m_doctypeSearchCount == 9) {
|
| + // Found '<!DOCTYPE' sequence
|
| + state.setInDoctype(true);
|
| + state.setTagState(NoTag);
|
| + m_doctypeToken.reset();
|
| + if (inViewSourceMode())
|
| + m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
|
| + state = parseDoctype(src, state);
|
| + m_cBufferPos = cBufferPos;
|
| + return state;
|
| + }
|
| + break;
|
| + } else
|
| + m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
|
| + }
|
| +
|
| + bool finish = false;
|
| + unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
|
| + while (ll--) {
|
| + UChar curchar = *src;
|
| + if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
|
| + finish = true;
|
| + break;
|
| + }
|
| +
|
| + // tolower() shows up on profiles. This is faster!
|
| + if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
|
| + m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
|
| + else
|
| + m_cBuffer[cBufferPos++] = curchar;
|
| + src.advancePastNonNewline();
|
| + }
|
| +
|
| + // Disadvantage: we add the possible rest of the tag
|
| + // as attribute names. ### judge if this causes problems
|
| + if (finish || CBUFLEN == cBufferPos) {
|
| + bool beginTag;
|
| + UChar* ptr = m_cBuffer;
|
| + unsigned int len = cBufferPos;
|
| + m_cBuffer[cBufferPos] = '\0';
|
| + if ((cBufferPos > 0) && (*ptr == '/')) {
|
| + // End Tag
|
| + beginTag = false;
|
| + ptr++;
|
| + len--;
|
| + }
|
| + else
|
| + // Start Tag
|
| + beginTag = true;
|
| +
|
| + // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/".
|
| + if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
|
| + ptr[--len] = '\0';
|
| +
|
| + // Now that we've shaved off any invalid / that might have followed the name), make the tag.
|
| + // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
|
| + if (ptr[0] != '!' || inViewSourceMode()) {
|
| + m_currentToken.tagName = AtomicString(ptr);
|
| + m_currentToken.beginTag = beginTag;
|
| + }
|
| + m_dest = m_buffer;
|
| + state.setTagState(SearchAttribute);
|
| + cBufferPos = 0;
|
| + }
|
| + break;
|
| + }
|
| + case SearchAttribute:
|
| + while(!src.isEmpty()) {
|
| + UChar curchar = *src;
|
| + // In this mode just ignore any quotes we encounter and treat them like spaces.
|
| + if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
|
| + if (curchar == '<' || curchar == '>')
|
| + state.setTagState(SearchEnd);
|
| + else
|
| + state.setTagState(AttributeName);
|
| +
|
| + cBufferPos = 0;
|
| + break;
|
| + }
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| + src.advance(m_lineNumber);
|
| + }
|
| + break;
|
| + case AttributeName:
|
| + {
|
| + int ll = min(src.length(), CBUFLEN - cBufferPos);
|
| + while (ll--) {
|
| + UChar curchar = *src;
|
| + // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the
|
| + // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
|
| + if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
|
| + m_cBuffer[cBufferPos] = '\0';
|
| + m_attrName = AtomicString(m_cBuffer);
|
| + m_dest = m_buffer;
|
| + *m_dest++ = 0;
|
| + state.setTagState(SearchEqual);
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('a');
|
| + break;
|
| + }
|
| +
|
| + // tolower() shows up on profiles. This is faster!
|
| + if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
|
| + m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
|
| + else
|
| + m_cBuffer[cBufferPos++] = curchar;
|
| +
|
| + src.advance(m_lineNumber);
|
| + }
|
| + if (cBufferPos == CBUFLEN) {
|
| + m_cBuffer[cBufferPos] = '\0';
|
| + m_attrName = AtomicString(m_cBuffer);
|
| + m_dest = m_buffer;
|
| + *m_dest++ = 0;
|
| + state.setTagState(SearchEqual);
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('a');
|
| + }
|
| + break;
|
| + }
|
| + case SearchEqual:
|
| + while (!src.isEmpty()) {
|
| + UChar curchar = *src;
|
| +
|
| + if (lastIsSlash && curchar == '>') {
|
| + // This is a quirk (with a long sad history). We have to do this
|
| + // since widgets do <script src="foo.js"/> and expect the tag to close.
|
| + if (m_currentToken.tagName == scriptTag)
|
| + m_currentToken.selfClosingTag = true;
|
| + m_currentToken.brokenXMLStyle = true;
|
| + }
|
| +
|
| + // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
|
| + if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
|
| + if (curchar == '=') {
|
| + state.setTagState(SearchValue);
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| + src.advancePastNonNewline();
|
| + } else {
|
| + m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
|
| + m_dest = m_buffer;
|
| + state.setTagState(SearchAttribute);
|
| + lastIsSlash = false;
|
| + }
|
| + break;
|
| + }
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| +
|
| + lastIsSlash = curchar == '/';
|
| +
|
| + src.advance(m_lineNumber);
|
| + }
|
| + break;
|
| + case SearchValue:
|
| + while (!src.isEmpty()) {
|
| + UChar curchar = *src;
|
| + if (!isASCIISpace(curchar)) {
|
| + if (curchar == '\'' || curchar == '\"') {
|
| + tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
|
| + state.setTagState(QuotedValue);
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| + src.advancePastNonNewline();
|
| + } else
|
| + state.setTagState(Value);
|
| +
|
| + break;
|
| + }
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| + src.advance(m_lineNumber);
|
| + }
|
| + break;
|
| + case QuotedValue:
|
| + while (!src.isEmpty()) {
|
| + checkBuffer();
|
| +
|
| + UChar curchar = *src;
|
| + if (curchar <= '>' && !src.escaped()) {
|
| + if (curchar == '>' && m_attrName.isEmpty()) {
|
| + // Handle a case like <img '>. Just go ahead and be willing
|
| + // to close the whole tag. Don't consume the character and
|
| + // just go back into SearchEnd while ignoring the whole
|
| + // value.
|
| + // FIXME: Note that this is actually not a very good solution.
|
| + // It doesn't handle the general case of
|
| + // unmatched quotes among attributes that have names. -dwh
|
| + while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
|
| + m_dest--; // remove trailing newlines
|
| + AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| + if (!attributeValue.contains('/'))
|
| + m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
|
| + m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('x');
|
| + state.setTagState(SearchAttribute);
|
| + m_dest = m_buffer;
|
| + tquote = NoQuote;
|
| + break;
|
| + }
|
| +
|
| + if (curchar == '&') {
|
| + src.advancePastNonNewline();
|
| + state = parseEntity(src, m_dest, state, cBufferPos, true, true);
|
| + break;
|
| + }
|
| +
|
| + if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
|
| + // some <input type=hidden> rely on trailing spaces. argh
|
| + while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
|
| + m_dest--; // remove trailing newlines
|
| + AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| + if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
|
| + m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('x');
|
| + } else if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('v');
|
| + m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| + m_dest = m_buffer;
|
| + state.setTagState(SearchAttribute);
|
| + tquote = NoQuote;
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(curchar);
|
| + src.advancePastNonNewline();
|
| + break;
|
| + }
|
| + }
|
| +
|
| + *m_dest++ = curchar;
|
| + src.advance(m_lineNumber);
|
| + }
|
| + break;
|
| + case Value:
|
| + while(!src.isEmpty()) {
|
| + checkBuffer();
|
| + UChar curchar = *src;
|
| + if (curchar <= '>' && !src.escaped()) {
|
| + // parse Entities
|
| + if (curchar == '&') {
|
| + src.advancePastNonNewline();
|
| + state = parseEntity(src, m_dest, state, cBufferPos, true, true);
|
| + break;
|
| + }
|
| + // no quotes. Every space means end of value
|
| + // '/' does not delimit in IE!
|
| + if (isASCIISpace(curchar) || curchar == '>') {
|
| + AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
|
| + m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar('v');
|
| + m_dest = m_buffer;
|
| + state.setTagState(SearchAttribute);
|
| + break;
|
| + }
|
| + }
|
| +
|
| + *m_dest++ = curchar;
|
| + src.advance(m_lineNumber);
|
| + }
|
| + break;
|
| + case SearchEnd:
|
| + {
|
| + while (!src.isEmpty()) {
|
| + UChar ch = *src;
|
| + if (ch == '>' || ch == '<')
|
| + break;
|
| + if (ch == '/')
|
| + m_currentToken.selfClosingTag = true;
|
| + if (inViewSourceMode())
|
| + m_currentToken.addViewSourceChar(ch);
|
| + src.advance(m_lineNumber);
|
| + }
|
| + if (src.isEmpty())
|
| + break;
|
| +
|
| + searchCount = 0; // Stop looking for '<!--' sequence
|
| + state.setTagState(NoTag);
|
| + tquote = NoQuote;
|
| +
|
| + if (*src != '<')
|
| + src.advance(m_lineNumber);
|
| +
|
| + if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
|
| + m_cBufferPos = cBufferPos;
|
| + return state;
|
| + }
|
| +
|
| + AtomicString tagName = m_currentToken.tagName;
|
| +
|
| + // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
|
| + // compatibility.
|
| + bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
|
| + bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
|
| + if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {
|
| + Attribute* a = 0;
|
| + m_scriptTagSrcAttrValue = String();
|
| + m_scriptTagCharsetAttrValue = String();
|
| + if (m_currentToken.attrs && !m_fragment) {
|
| + if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {
|
| + if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
|
| + m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();
|
| + }
|
| + }
|
| + }
|
| +
|
| + RefPtr<Node> n = processToken();
|
| + m_cBufferPos = cBufferPos;
|
| + if (n || inViewSourceMode()) {
|
| + if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
|
| + if (beginTag)
|
| + state.setDiscardLF(true); // Discard the first LF after we open a pre.
|
| + } else if (tagName == scriptTag) {
|
| + ASSERT(!m_scriptNode);
|
| + m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
|
| + if (m_scriptNode)
|
| + m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
|
| + if (beginTag) {
|
| + m_searchStopper = scriptEnd;
|
| + m_searchStopperLength = 8;
|
| + state.setInScript(true);
|
| + state = parseSpecial(src, state);
|
| + } else if (isSelfClosingScript) { // Handle <script src="foo"/>
|
| + state.setInScript(true);
|
| + state = scriptHandler(state);
|
| + }
|
| + } else if (tagName == styleTag) {
|
| + if (beginTag) {
|
| + m_searchStopper = styleEnd;
|
| + m_searchStopperLength = 7;
|
| + state.setInStyle(true);
|
| + state = parseSpecial(src, state);
|
| + }
|
| + } else if (tagName == textareaTag) {
|
| + if (beginTag) {
|
| + m_searchStopper = textareaEnd;
|
| + m_searchStopperLength = 10;
|
| + state.setInTextArea(true);
|
| + state = parseSpecial(src, state);
|
| + }
|
| + } else if (tagName == titleTag) {
|
| + if (beginTag) {
|
| + m_searchStopper = titleEnd;
|
| + m_searchStopperLength = 7;
|
| + State savedState = state;
|
| + SegmentedString savedSrc = src;
|
| + long savedLineno = m_lineNumber;
|
| + state.setInTitle(true);
|
| + state = parseSpecial(src, state);
|
| + if (state.inTitle() && src.isEmpty()) {
|
| + // We just ate the rest of the document as the title #text node!
|
| + // Reset the state then retokenize without special title handling.
|
| + // Let the parser clean up the missing </title> tag.
|
| + // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
|
| + // at the end of the document unless m_noMoreData is also true. We need
|
| + // to detect this case elsewhere, and save the state somewhere other
|
| + // than a local variable.
|
| + state = savedState;
|
| + src = savedSrc;
|
| + m_lineNumber = savedLineno;
|
| + m_scriptCodeSize = 0;
|
| + }
|
| + }
|
| + } else if (tagName == xmpTag) {
|
| + if (beginTag) {
|
| + m_searchStopper = xmpEnd;
|
| + m_searchStopperLength = 5;
|
| + state.setInXmp(true);
|
| + state = parseSpecial(src, state);
|
| + }
|
| + } else if (tagName == iframeTag) {
|
| + if (beginTag) {
|
| + m_searchStopper = iframeEnd;
|
| + m_searchStopperLength = 8;
|
| + state.setInIFrame(true);
|
| + state = parseSpecial(src, state);
|
| + }
|
| + }
|
| + }
|
| + if (tagName == plaintextTag)
|
| + state.setInPlainText(beginTag);
|
| + return state; // Finished parsing tag!
|
| + }
|
| + } // end switch
|
| + }
|
| + m_cBufferPos = cBufferPos;
|
| + return state;
|
| +}
|
| +
|
| +inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state)
|
| +{
|
| + // We don't want to be checking elapsed time with every character, so we only check after we've
|
| + // processed a certain number of characters.
|
| + bool allowedYield = state.allowYield();
|
| + state.setAllowYield(false);
|
| + if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
|
| + processedCount = 0;
|
| + if (currentTime() - startTime > m_tokenizerTimeDelay) {
|
| + /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
|
| + load, but this hurts overall performance on slower machines. For now turn this
|
| + off.
|
| + || (!m_doc->haveStylesheetsLoaded() &&
|
| + (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/
|
| + // Schedule the timer to keep processing as soon as possible.
|
| + m_timer.startOneShot(0);
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (currentTime() - startTime > m_tokenizerTimeDelay)
|
| + printf("Deferring processing of data because 500ms elapsed away from event loop.\n");
|
| +#endif
|
| + return false;
|
| + }
|
| + }
|
| +
|
| + processedCount++;
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
|
| +{
|
| + if (!m_buffer)
|
| + return false;
|
| +
|
| + if (m_parserStopped)
|
| + return false;
|
| +
|
| + SegmentedString source(str);
|
| + if (m_executingScript)
|
| + source.setExcludeLineNumbers();
|
| +
|
| + if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
|
| + // don't parse; we will do this later
|
| + if (m_currentPrependingSrc)
|
| + m_currentPrependingSrc->append(source);
|
| + else {
|
| + m_pendingSrc.append(source);
|
| +#if PRELOAD_SCANNER_ENABLED
|
| + if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
|
| + m_preloadScanner->write(source);
|
| +#endif
|
| + }
|
| + return false;
|
| + }
|
| +
|
| +#if PRELOAD_SCANNER_ENABLED
|
| + if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
|
| + m_preloadScanner->end();
|
| +#endif
|
| +
|
| + if (!m_src.isEmpty())
|
| + m_src.append(source);
|
| + else
|
| + setSrc(source);
|
| +
|
| + // Once a timer is set, it has control of when the tokenizer continues.
|
| + if (m_timer.isActive())
|
| + return false;
|
| +
|
| + bool wasInWrite = m_inWrite;
|
| + m_inWrite = true;
|
| +
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("Beginning write at time %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + int processedCount = 0;
|
| + double startTime = currentTime();
|
| +
|
| + Frame* frame = m_doc->frame();
|
| +
|
| + State state = m_state;
|
| +
|
| + while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
|
| + if (!continueProcessing(processedCount, startTime, state))
|
| + break;
|
| +
|
| + // do we need to enlarge the buffer?
|
| + checkBuffer();
|
| +
|
| + UChar cc = *m_src;
|
| +
|
| + bool wasSkipLF = state.skipLF();
|
| + if (wasSkipLF)
|
| + state.setSkipLF(false);
|
| +
|
| + if (wasSkipLF && (cc == '\n'))
|
| + m_src.advance();
|
| + else if (state.needsSpecialWriteHandling()) {
|
| + // it's important to keep needsSpecialWriteHandling with the flags this block tests
|
| + if (state.hasEntityState())
|
| + state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
|
| + else if (state.inPlainText())
|
| + state = parseText(m_src, state);
|
| + else if (state.inAnySpecial())
|
| + state = parseSpecial(m_src, state);
|
| + else if (state.inComment())
|
| + state = parseComment(m_src, state);
|
| + else if (state.inDoctype())
|
| + state = parseDoctype(m_src, state);
|
| + else if (state.inServer())
|
| + state = parseServer(m_src, state);
|
| + else if (state.inProcessingInstruction())
|
| + state = parseProcessingInstruction(m_src, state);
|
| + else if (state.hasTagState())
|
| + state = parseTag(m_src, state);
|
| + else if (state.startTag()) {
|
| + state.setStartTag(false);
|
| +
|
| + switch(cc) {
|
| + case '/':
|
| + break;
|
| + case '!': {
|
| + // <!-- comment --> or <!DOCTYPE ...>
|
| + searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
|
| + m_doctypeSearchCount = 1;
|
| + break;
|
| + }
|
| + case '?': {
|
| + // xml processing instruction
|
| + state.setInProcessingInstruction(true);
|
| + tquote = NoQuote;
|
| + state = parseProcessingInstruction(m_src, state);
|
| + continue;
|
| +
|
| + break;
|
| + }
|
| + case '%':
|
| + if (!m_brokenServer) {
|
| + // <% server stuff, handle as comment %>
|
| + state.setInServer(true);
|
| + tquote = NoQuote;
|
| + state = parseServer(m_src, state);
|
| + continue;
|
| + }
|
| + // else fall through
|
| + default: {
|
| + if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
|
| + // Start of a Start-Tag
|
| + } else {
|
| + // Invalid tag
|
| + // Add as is
|
| + *m_dest = '<';
|
| + m_dest++;
|
| + continue;
|
| + }
|
| + }
|
| + }; // end case
|
| +
|
| + processToken();
|
| +
|
| + m_cBufferPos = 0;
|
| + state.setTagState(TagName);
|
| + state = parseTag(m_src, state);
|
| + }
|
| + } else if (cc == '&' && !m_src.escaped()) {
|
| + m_src.advancePastNonNewline();
|
| + state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
|
| + } else if (cc == '<' && !m_src.escaped()) {
|
| + m_currentTagStartLineNumber = m_lineNumber;
|
| + m_src.advancePastNonNewline();
|
| + state.setStartTag(true);
|
| + state.setDiscardLF(false);
|
| + } else if (cc == '\n' || cc == '\r') {
|
| + if (state.discardLF())
|
| + // Ignore this LF
|
| + state.setDiscardLF(false); // We have discarded 1 LF
|
| + else {
|
| + // Process this LF
|
| + *m_dest++ = '\n';
|
| + if (cc == '\r' && !m_src.excludeLineNumbers())
|
| + m_lineNumber++;
|
| + }
|
| +
|
| + /* Check for MS-DOS CRLF sequence */
|
| + if (cc == '\r')
|
| + state.setSkipLF(true);
|
| + m_src.advance(m_lineNumber);
|
| + } else {
|
| + state.setDiscardLF(false);
|
| + *m_dest++ = cc;
|
| + m_src.advancePastNonNewline();
|
| + }
|
| + }
|
| +
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("Ending write at time %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + m_inWrite = wasInWrite;
|
| +
|
| + m_state = state;
|
| +
|
| + if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) {
|
| + end(); // this actually causes us to be deleted
|
| + return true;
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +void HTMLTokenizer::stopParsing()
|
| +{
|
| + Tokenizer::stopParsing();
|
| + m_timer.stop();
|
| +
|
| + // The part needs to know that the tokenizer has finished with its data,
|
| + // regardless of whether it happened naturally or due to manual intervention.
|
| + if (!m_fragment && m_doc->frame())
|
| + m_doc->frame()->loader()->tokenizerProcessedData();
|
| +}
|
| +
|
| +bool HTMLTokenizer::processingData() const
|
| +{
|
| + return m_timer.isActive() || m_inWrite;
|
| +}
|
| +
|
| +void HTMLTokenizer::timerFired(Timer<HTMLTokenizer>*)
|
| +{
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("Beginning timer write at time %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + if (m_doc->view() && m_doc->view()->layoutPending() && !m_doc->minimumLayoutDelay()) {
|
| + // Restart the timer and let layout win. This is basically a way of ensuring that the layout
|
| + // timer has higher priority than our timer.
|
| + m_timer.startOneShot(0);
|
| + return;
|
| + }
|
| +
|
| + // Invoke write() as though more data came in. This might cause us to get deleted.
|
| + write(SegmentedString(), true);
|
| +}
|
| +
|
| +void HTMLTokenizer::end()
|
| +{
|
| + ASSERT(!m_timer.isActive());
|
| + m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
|
| +
|
| + if (m_buffer) {
|
| + // parseTag is using the buffer for different matters
|
| + if (!m_state.hasTagState())
|
| + processToken();
|
| +
|
| + fastFree(m_scriptCode);
|
| + m_scriptCode = 0;
|
| + m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| +
|
| + fastFree(m_buffer);
|
| + m_buffer = 0;
|
| + }
|
| +
|
| + if (!inViewSourceMode())
|
| + m_parser->finished();
|
| + else
|
| + m_doc->finishedParsing();
|
| +}
|
| +
|
| +void HTMLTokenizer::finish()
|
| +{
|
| + // do this as long as we don't find matching comment ends
|
| + while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
|
| + // we've found an unmatched comment start
|
| + if (m_state.inComment())
|
| + m_brokenComments = true;
|
| + else
|
| + m_brokenServer = true;
|
| + checkScriptBuffer();
|
| + m_scriptCode[m_scriptCodeSize] = 0;
|
| + m_scriptCode[m_scriptCodeSize + 1] = 0;
|
| + int pos;
|
| + String food;
|
| + if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
|
| + food = String(m_scriptCode, m_scriptCodeSize);
|
| + else if (m_state.inServer()) {
|
| + food = "<";
|
| + food.append(m_scriptCode, m_scriptCodeSize);
|
| + } else {
|
| + pos = find(m_scriptCode, m_scriptCodeSize, '>');
|
| + food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
|
| + }
|
| + fastFree(m_scriptCode);
|
| + m_scriptCode = 0;
|
| + m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
|
| + m_state.setInComment(false);
|
| + m_state.setInServer(false);
|
| + if (!food.isEmpty())
|
| + write(food, true);
|
| + }
|
| + // this indicates we will not receive any more data... but if we are waiting on
|
| + // an external script to load, we can't finish parsing until that is done
|
| + m_noMoreData = true;
|
| + if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
|
| + end(); // this actually causes us to be deleted
|
| +}
|
| +
|
| +PassRefPtr<Node> HTMLTokenizer::processToken()
|
| +{
|
| + ScriptController* scriptController = (!m_fragment && m_doc->frame()) ? m_doc->frame()->script() : 0;
|
| + if (scriptController && scriptController->isEnabled())
|
| + // FIXME: Why isn't this m_currentScriptTagStartLineNumber? I suspect this is wrong.
|
| + scriptController->setEventHandlerLineno(m_currentTagStartLineNumber + 1); // Script line numbers are 1 based.
|
| + if (m_dest > m_buffer) {
|
| + m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
|
| + if (m_currentToken.tagName != commentAtom)
|
| + m_currentToken.tagName = textAtom;
|
| + } else if (m_currentToken.tagName == nullAtom) {
|
| + m_currentToken.reset();
|
| + if (scriptController)
|
| + scriptController->setEventHandlerLineno(m_lineNumber + 1); // Script line numbers are 1 based.
|
| + return 0;
|
| + }
|
| +
|
| + m_dest = m_buffer;
|
| +
|
| + RefPtr<Node> n;
|
| +
|
| + if (!m_parserStopped) {
|
| + if (NamedMappedAttrMap* map = m_currentToken.attrs.get())
|
| + map->shrinkToLength();
|
| + if (inViewSourceMode())
|
| + static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceToken(&m_currentToken);
|
| + else
|
| + // pass the token over to the parser, the parser DOES NOT delete the token
|
| + n = m_parser->parseToken(&m_currentToken);
|
| + }
|
| + m_currentToken.reset();
|
| + if (scriptController)
|
| + scriptController->setEventHandlerLineno(0);
|
| +
|
| + return n.release();
|
| +}
|
| +
|
| +void HTMLTokenizer::processDoctypeToken()
|
| +{
|
| + if (inViewSourceMode())
|
| + static_cast<HTMLViewSourceDocument*>(m_doc)->addViewSourceDoctypeToken(&m_doctypeToken);
|
| + else
|
| + m_parser->parseDoctypeToken(&m_doctypeToken);
|
| +}
|
| +
|
| +HTMLTokenizer::~HTMLTokenizer()
|
| +{
|
| + ASSERT(!m_inWrite);
|
| + reset();
|
| +}
|
| +
|
| +
|
| +void HTMLTokenizer::enlargeBuffer(int len)
|
| +{
|
| + int newSize = max(m_bufferSize * 2, m_bufferSize + len);
|
| + int oldOffset = m_dest - m_buffer;
|
| + m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
|
| + m_dest = m_buffer + oldOffset;
|
| + m_bufferSize = newSize;
|
| +}
|
| +
|
| +void HTMLTokenizer::enlargeScriptBuffer(int len)
|
| +{
|
| + int newSize = max(m_scriptCodeCapacity * 2, m_scriptCodeCapacity + len);
|
| + m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
|
| + m_scriptCodeCapacity = newSize;
|
| +}
|
| +
|
| +void HTMLTokenizer::executeScriptsWaitingForStylesheets()
|
| +{
|
| + ASSERT(m_doc->haveStylesheetsLoaded());
|
| +
|
| + if (m_hasScriptsWaitingForStylesheets)
|
| + notifyFinished(0);
|
| +}
|
| +
|
| +void HTMLTokenizer::notifyFinished(CachedResource*)
|
| +{
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("script loaded at %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + ASSERT(!m_pendingScripts.isEmpty());
|
| +
|
| + // Make external scripts wait for external stylesheets.
|
| + // FIXME: This needs to be done for inline scripts too.
|
| + m_hasScriptsWaitingForStylesheets = !m_doc->haveStylesheetsLoaded();
|
| + if (m_hasScriptsWaitingForStylesheets)
|
| + return;
|
| +
|
| + bool finished = false;
|
| + while (!finished && m_pendingScripts.first()->isLoaded()) {
|
| + CachedScript* cs = m_pendingScripts.first().get();
|
| + m_pendingScripts.removeFirst();
|
| + ASSERT(cache()->disabled() || cs->accessCount() > 0);
|
| +
|
| + setSrc(SegmentedString());
|
| +
|
| + // make sure we forget about the script before we execute the new one
|
| + // infinite recursion might happen otherwise
|
| + ScriptSourceCode sourceCode(cs);
|
| + bool errorOccurred = cs->errorOccurred();
|
| + cs->removeClient(this);
|
| +
|
| + RefPtr<Node> n = m_scriptNode.release();
|
| +
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("external script beginning execution at %d\n", m_doc->elapsedTime());
|
| +#endif
|
| +
|
| + if (errorOccurred)
|
| + n->dispatchEventForType(eventNames().errorEvent, true, false);
|
| + else {
|
| + if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
|
| + m_state = scriptExecution(sourceCode, m_state);
|
| + n->dispatchEventForType(eventNames().loadEvent, false, false);
|
| + }
|
| +
|
| + // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
|
| + // call above, so test afterwards.
|
| + finished = m_pendingScripts.isEmpty();
|
| + if (finished) {
|
| + ASSERT(!m_hasScriptsWaitingForStylesheets);
|
| + m_state.setLoadingExtScript(false);
|
| +#ifdef INSTRUMENT_LAYOUT_SCHEDULING
|
| + if (!m_doc->ownerElement())
|
| + printf("external script finished execution at %d\n", m_doc->elapsedTime());
|
| +#endif
|
| + } else if (m_hasScriptsWaitingForStylesheets) {
|
| + // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
|
| + // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
|
| + finished = true;
|
| + }
|
| +
|
| + // 'm_requestingScript' is true when we are called synchronously from
|
| + // scriptHandler(). In that case scriptHandler() will take care
|
| + // of m_pendingSrc.
|
| + if (!m_requestingScript) {
|
| + SegmentedString rest = m_pendingSrc;
|
| + m_pendingSrc.clear();
|
| + write(rest, false);
|
| + // we might be deleted at this point, do not access any members.
|
| + }
|
| + }
|
| +}
|
| +
|
| +bool HTMLTokenizer::isWaitingForScripts() const
|
| +{
|
| + return m_state.loadingExtScript();
|
| +}
|
| +
|
| +void HTMLTokenizer::setSrc(const SegmentedString& source)
|
| +{
|
| + m_src = source;
|
| +}
|
| +
|
| +void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
|
| +{
|
| + HTMLTokenizer tok(fragment);
|
| + tok.setForceSynchronous(true);
|
| + tok.write(source, true);
|
| + tok.finish();
|
| + ASSERT(!tok.processingData()); // make sure we're done (see 3963151)
|
| +}
|
| +
|
| +UChar decodeNamedEntity(const char* name)
|
| +{
|
| + const Entity* e = findEntity(name, strlen(name));
|
| + return e ? e->code : 0;
|
| +}
|
| +
|
| +}
|
| +
|
| +
|
|
|