| Index: ios/third_party/blink/src/html_tokenizer.h
|
| diff --git a/ios/third_party/blink/src/html_tokenizer.h b/ios/third_party/blink/src/html_tokenizer.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..70623aef6ca7a064f5ede4240a0cab328cda5a35
|
| --- /dev/null
|
| +++ b/ios/third_party/blink/src/html_tokenizer.h
|
| @@ -0,0 +1,157 @@
|
| +/*
|
| + * Copyright (C) 2008 Apple Inc. All Rights Reserved.
|
| + * Copyright (C) 2010 Google, Inc. All Rights Reserved.
|
| + *
|
| + * Redistribution and use in source and binary forms, with or without
|
| + * modification, are permitted provided that the following conditions
|
| + * are met:
|
| + * 1. Redistributions of source code must retain the above copyright
|
| + * notice, this list of conditions and the following disclaimer.
|
| + * 2. Redistributions in binary form must reproduce the above copyright
|
| + * notice, this list of conditions and the following disclaimer in the
|
| + * documentation and/or other materials provided with the distribution.
|
| + *
|
| + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
| + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
| + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
| + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
| + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
| + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| + */
|
| +
|
| +#ifndef HTMLTokenizer_h
|
| +#define HTMLTokenizer_h
|
| +
|
| +#include "ios/third_party/blink/src/html_input_stream_preprocessor.h"
|
| +#include "ios/third_party/blink/src/html_token.h"
|
| +
|
| +namespace WebCore {
|
| +
|
| +class HTMLTokenizer {
|
| + WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
|
| +
|
| +public:
|
| + HTMLTokenizer();
|
| + ~HTMLTokenizer();
|
| +
|
| + void reset();
|
| +
|
| + enum State {
|
| + DataState,
|
| + TagOpenState,
|
| + EndTagOpenState,
|
| + TagNameState,
|
| + BeforeAttributeNameState,
|
| + AttributeNameState,
|
| + AfterAttributeNameState,
|
| + BeforeAttributeValueState,
|
| + AttributeValueDoubleQuotedState,
|
| + AttributeValueSingleQuotedState,
|
| + AttributeValueUnquotedState,
|
| + AfterAttributeValueQuotedState,
|
| + SelfClosingStartTagState,
|
| + BogusCommentState,
|
| + // The ContinueBogusCommentState is not in the HTML5 spec, but we use
|
| + // it internally to keep track of whether we've started the bogus
|
| + // comment token yet.
|
| + ContinueBogusCommentState,
|
| + MarkupDeclarationOpenState,
|
| + CommentStartState,
|
| + CommentStartDashState,
|
| + CommentState,
|
| + CommentEndDashState,
|
| + CommentEndState,
|
| + CommentEndBangState,
|
| + DOCTYPEState,
|
| + BeforeDOCTYPENameState,
|
| + DOCTYPENameState,
|
| + AfterDOCTYPENameState,
|
| + AfterDOCTYPEPublicKeywordState,
|
| + BeforeDOCTYPEPublicIdentifierState,
|
| + DOCTYPEPublicIdentifierDoubleQuotedState,
|
| + DOCTYPEPublicIdentifierSingleQuotedState,
|
| + AfterDOCTYPEPublicIdentifierState,
|
| + BetweenDOCTYPEPublicAndSystemIdentifiersState,
|
| + AfterDOCTYPESystemKeywordState,
|
| + BeforeDOCTYPESystemIdentifierState,
|
| + DOCTYPESystemIdentifierDoubleQuotedState,
|
| + DOCTYPESystemIdentifierSingleQuotedState,
|
| + AfterDOCTYPESystemIdentifierState,
|
| + BogusDOCTYPEState,
|
| + CDATASectionState,
|
| + // These CDATA states are not in the HTML5 spec, but we use them internally.
|
| + CDATASectionRightSquareBracketState,
|
| + CDATASectionDoubleRightSquareBracketState,
|
| + };
|
| +
|
| + // This function returns true if it emits a token. Otherwise, callers
|
| + // must provide the same (in progress) token on the next call (unless
|
| + // they call reset() first).
|
| + bool nextToken(CharacterProvider&, HTMLToken&);
|
| +
|
| + State state() const { return m_state; }
|
| + void setState(State state) { m_state = state; }
|
| +
|
| + inline bool shouldSkipNullCharacters() const
|
| + {
|
| + return m_state == HTMLTokenizer::DataState;
|
| + }
|
| +
|
| +private:
|
| + inline void parseError();
|
| +
|
| + inline bool emitAndResumeIn(CharacterProvider& source, State state)
|
| + {
|
| + ASSERT(m_token->type() != HTMLToken::Uninitialized);
|
| + m_state = state;
|
| + source.next();
|
| + return true;
|
| + }
|
| +
|
| + inline bool emitAndReconsumeIn(CharacterProvider&, State state)
|
| + {
|
| + ASSERT(m_token->type() != HTMLToken::Uninitialized);
|
| + m_state = state;
|
| + return true;
|
| + }
|
| +
|
| + inline bool emitEndOfFile(CharacterProvider& source)
|
| + {
|
| + if (haveBufferedCharacterToken())
|
| + return true;
|
| + m_state = HTMLTokenizer::DataState;
|
| + source.next();
|
| + m_token->clear();
|
| + m_token->makeEndOfFile();
|
| + return true;
|
| + }
|
| +
|
| + // Return whether we need to emit a character token before dealing with
|
| + // the buffered end tag.
|
| + inline bool flushBufferedEndTag(CharacterProvider&);
|
| +
|
| + inline bool haveBufferedCharacterToken()
|
| + {
|
| + return m_token->type() == HTMLToken::Character;
|
| + }
|
| +
|
| + State m_state;
|
| +
|
| + // m_token is owned by the caller. If nextToken is not on the stack,
|
| + // this member might be pointing to unallocated memory.
|
| + HTMLToken* m_token;
|
| +
|
| + // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
|
| + LChar m_additionalAllowedCharacter;
|
| +
|
| + // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
|
| + InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
|
| +};
|
| +}
|
| +
|
| +#endif
|
|
|