Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: ios/third_party/blink/src/html_input_stream_preprocessor.h

Issue 1031023002: Upstream ios/web/ HTML tokenizer (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifndef InputStreamPreprocessor_h
29 #define InputStreamPreprocessor_h
30
31 #include "html_character_provider.h"
32
33 namespace WebCore {
34
35 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-st ream
36 template <typename Tokenizer>
37 class InputStreamPreprocessor {
38 WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor);
39 public:
40 InputStreamPreprocessor(Tokenizer* tokenizer)
41 : m_tokenizer(tokenizer)
42 {
43 reset();
44 }
45
46 ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter ; }
47
48 // Returns whether we succeeded in peeking at the next character.
49 // The only way we can fail to peek is if there are no more
50 // characters in |source| (after collapsing \r\n, etc).
51 ALWAYS_INLINE bool peek(CharacterProvider& source)
52 {
53 m_nextInputCharacter = source.currentCharacter();
54
55 // Every branch in this function is expensive, so we have a
56 // fast-reject branch for characters that don't require special
57 // handling. Please run the parser benchmark whenever you touch
58 // this function. It's very hot.
59 static const UChar specialCharacterMask = '\n' | '\r' | '\0';
60 if (m_nextInputCharacter & ~specialCharacterMask) {
61 m_skipNextNewLine = false;
62 return true;
63 }
64 return processNextInputCharacter(source);
65 }
66
67 // Returns whether there are more characters in |source| after advancing.
68 ALWAYS_INLINE bool advance(CharacterProvider& source)
69 {
70 source.next();
71 if (source.isEmpty())
72 return false;
73 return peek(source);
74 }
75
76 void reset(bool skipNextNewLine = false)
77 {
78 m_nextInputCharacter = '\0';
79 m_skipNextNewLine = skipNextNewLine;
80 }
81
82 private:
83 bool processNextInputCharacter(CharacterProvider& source)
84 {
85 ProcessAgain:
86 ASSERT(m_nextInputCharacter == source.currentCharacter());
87
88 if (m_nextInputCharacter == '\n' && m_skipNextNewLine) {
89 m_skipNextNewLine = false;
90 source.next();
91 if (source.isEmpty())
92 return false;
93 m_nextInputCharacter = source.currentCharacter();
94 }
95 if (m_nextInputCharacter == '\r') {
96 m_nextInputCharacter = '\n';
97 m_skipNextNewLine = true;
98 } else {
99 m_skipNextNewLine = false;
100 // FIXME: The spec indicates that the surrogate pair range as well a s
101 // a number of specific character values are parse errors and should be replaced
102 // by the replacement character. We suspect this is a problem with t he spec as doing
103 // that filtering breaks surrogate pair handling and causes us not t o match Minefield.
104 if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarke r(source)) {
105 if (m_tokenizer->shouldSkipNullCharacters()) {
106 source.next();
107 if (source.isEmpty())
108 return false;
109 m_nextInputCharacter = source.currentCharacter();
110 goto ProcessAgain;
111 }
112 m_nextInputCharacter = 0xFFFD;
113 }
114 }
115 return true;
116 }
117
118 bool shouldTreatNullAsEndOfFileMarker(CharacterProvider& source) const
119 {
120 return source.remainingBytes() == 1;
121 }
122
123 Tokenizer* m_tokenizer;
124
125 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
126 UChar m_nextInputCharacter;
127 bool m_skipNextNewLine;
128 };
129
130 }
131
132 #endif // InputStreamPreprocessor_h
133
OLDNEW
« no previous file with comments | « ios/third_party/blink/src/html_character_provider.h ('k') | ios/third_party/blink/src/html_markup_tokenizer_inlines.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698