OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2013 Google, Inc. All Rights Reserved. |
| 5 * |
| 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions |
| 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. |
| 11 * 2. Redistributions in binary form must reproduce the above copyright |
| 12 * notice, this list of conditions and the following disclaimer in the |
| 13 * documentation and/or other materials provided with the distribution. |
| 14 * |
| 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 */ |
| 27 |
| 28 #ifndef InputStreamPreprocessor_h |
| 29 #define InputStreamPreprocessor_h |
| 30 |
| 31 #include "html_character_provider.h" |
| 32 |
| 33 namespace WebCore { |
| 34 |
| 35 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-st
ream |
| 36 template <typename Tokenizer> |
| 37 class InputStreamPreprocessor { |
| 38 WTF_MAKE_NONCOPYABLE(InputStreamPreprocessor); |
| 39 public: |
| 40 InputStreamPreprocessor(Tokenizer* tokenizer) |
| 41 : m_tokenizer(tokenizer) |
| 42 { |
| 43 reset(); |
| 44 } |
| 45 |
| 46 ALWAYS_INLINE UChar nextInputCharacter() const { return m_nextInputCharacter
; } |
| 47 |
| 48 // Returns whether we succeeded in peeking at the next character. |
| 49 // The only way we can fail to peek is if there are no more |
| 50 // characters in |source| (after collapsing \r\n, etc). |
| 51 ALWAYS_INLINE bool peek(CharacterProvider& source) |
| 52 { |
| 53 m_nextInputCharacter = source.currentCharacter(); |
| 54 |
| 55 // Every branch in this function is expensive, so we have a |
| 56 // fast-reject branch for characters that don't require special |
| 57 // handling. Please run the parser benchmark whenever you touch |
| 58 // this function. It's very hot. |
| 59 static const UChar specialCharacterMask = '\n' | '\r' | '\0'; |
| 60 if (m_nextInputCharacter & ~specialCharacterMask) { |
| 61 m_skipNextNewLine = false; |
| 62 return true; |
| 63 } |
| 64 return processNextInputCharacter(source); |
| 65 } |
| 66 |
| 67 // Returns whether there are more characters in |source| after advancing. |
| 68 ALWAYS_INLINE bool advance(CharacterProvider& source) |
| 69 { |
| 70 source.next(); |
| 71 if (source.isEmpty()) |
| 72 return false; |
| 73 return peek(source); |
| 74 } |
| 75 |
| 76 void reset(bool skipNextNewLine = false) |
| 77 { |
| 78 m_nextInputCharacter = '\0'; |
| 79 m_skipNextNewLine = skipNextNewLine; |
| 80 } |
| 81 |
| 82 private: |
| 83 bool processNextInputCharacter(CharacterProvider& source) |
| 84 { |
| 85 ProcessAgain: |
| 86 ASSERT(m_nextInputCharacter == source.currentCharacter()); |
| 87 |
| 88 if (m_nextInputCharacter == '\n' && m_skipNextNewLine) { |
| 89 m_skipNextNewLine = false; |
| 90 source.next(); |
| 91 if (source.isEmpty()) |
| 92 return false; |
| 93 m_nextInputCharacter = source.currentCharacter(); |
| 94 } |
| 95 if (m_nextInputCharacter == '\r') { |
| 96 m_nextInputCharacter = '\n'; |
| 97 m_skipNextNewLine = true; |
| 98 } else { |
| 99 m_skipNextNewLine = false; |
| 100 // FIXME: The spec indicates that the surrogate pair range as well a
s |
| 101 // a number of specific character values are parse errors and should
be replaced |
| 102 // by the replacement character. We suspect this is a problem with t
he spec as doing |
| 103 // that filtering breaks surrogate pair handling and causes us not t
o match Minefield. |
| 104 if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarke
r(source)) { |
| 105 if (m_tokenizer->shouldSkipNullCharacters()) { |
| 106 source.next(); |
| 107 if (source.isEmpty()) |
| 108 return false; |
| 109 m_nextInputCharacter = source.currentCharacter(); |
| 110 goto ProcessAgain; |
| 111 } |
| 112 m_nextInputCharacter = 0xFFFD; |
| 113 } |
| 114 } |
| 115 return true; |
| 116 } |
| 117 |
| 118 bool shouldTreatNullAsEndOfFileMarker(CharacterProvider& source) const |
| 119 { |
| 120 return source.remainingBytes() == 1; |
| 121 } |
| 122 |
| 123 Tokenizer* m_tokenizer; |
| 124 |
| 125 // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character |
| 126 UChar m_nextInputCharacter; |
| 127 bool m_skipNextNewLine; |
| 128 }; |
| 129 |
| 130 } |
| 131 |
| 132 #endif // InputStreamPreprocessor_h |
| 133 |
OLD | NEW |