| OLD | NEW |
| (Empty) |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "sky/engine/core/html/parser/HTMLEntityParser.h" | |
| 6 | |
| 7 #include "sky/engine/wtf/unicode/CharacterNames.h" | |
| 8 | |
| 9 using namespace WTF; | |
| 10 | |
| 11 namespace blink { | |
| 12 | |
| 13 static const UChar32 kInvalidUnicode = -1; | |
| 14 | |
| 15 static UChar asHexDigit(UChar cc) | |
| 16 { | |
| 17 if (cc >= '0' && cc <= '9') | |
| 18 return cc - '0'; | |
| 19 if (cc >= 'a' && cc <= 'f') | |
| 20 return 10 + cc - 'a'; | |
| 21 if (cc >= 'A' && cc <= 'F') | |
| 22 return 10 + cc - 'A'; | |
| 23 ASSERT_NOT_REACHED(); | |
| 24 return 0; | |
| 25 } | |
| 26 | |
| 27 static bool isAlphaNumeric(UChar cc) | |
| 28 { | |
| 29 return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' &&
cc <= 'Z'); | |
| 30 } | |
| 31 | |
| 32 static bool isHexDigit(UChar cc) | |
| 33 { | |
| 34 return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' &&
cc <= 'F'); | |
| 35 } | |
| 36 | |
| 37 static UChar decodeEntity(HTMLEntityParser::OutputBuffer buffer) | |
| 38 { | |
| 39 if (equalIgnoringNullity(buffer, "&")) | |
| 40 return '&'; | |
| 41 if (equalIgnoringNullity(buffer, "&apos")) | |
| 42 return '\''; | |
| 43 if (equalIgnoringNullity(buffer, ">")) | |
| 44 return '>'; | |
| 45 if (equalIgnoringNullity(buffer, "<")) | |
| 46 return '<'; | |
| 47 if (equalIgnoringNullity(buffer, """)) | |
| 48 return '"'; | |
| 49 return replacementCharacter; | |
| 50 } | |
| 51 | |
| 52 HTMLEntityParser::HTMLEntityParser() | |
| 53 { | |
| 54 } | |
| 55 | |
| 56 HTMLEntityParser::~HTMLEntityParser() | |
| 57 { | |
| 58 } | |
| 59 | |
| 60 void HTMLEntityParser::reset() | |
| 61 { | |
| 62 m_state = Initial; | |
| 63 m_result = '\0'; | |
| 64 m_buffer.clear(); | |
| 65 m_buffer.append('&'); | |
| 66 } | |
| 67 | |
| 68 bool HTMLEntityParser::parse(SegmentedString& source) | |
| 69 { | |
| 70 while (!source.isEmpty()) { | |
| 71 UChar cc = source.currentChar(); | |
| 72 switch (m_state) { | |
| 73 case Initial: { | |
| 74 if (cc == '#') { | |
| 75 m_state = Numeric; | |
| 76 break; | |
| 77 } | |
| 78 if (isAlphaNumeric(cc)) { | |
| 79 m_state = Named; | |
| 80 continue; | |
| 81 } | |
| 82 return true; | |
| 83 } | |
| 84 case Numeric: { | |
| 85 if (cc == 'x' || cc == 'X') { | |
| 86 m_state = PossiblyHex; | |
| 87 break; | |
| 88 } | |
| 89 if (cc >= '0' && cc <= '9') { | |
| 90 m_state = Decimal; | |
| 91 continue; | |
| 92 } | |
| 93 return true; | |
| 94 } | |
| 95 case PossiblyHex: { | |
| 96 if (isHexDigit(cc)) { | |
| 97 m_state = Hex; | |
| 98 continue; | |
| 99 } | |
| 100 return true; | |
| 101 } | |
| 102 case Hex: { | |
| 103 if (isHexDigit(cc)) { | |
| 104 if (m_result != kInvalidUnicode) | |
| 105 m_result = m_result * 16 + asHexDigit(cc); | |
| 106 break; | |
| 107 } | |
| 108 if (cc == ';') { | |
| 109 source.advanceAndASSERT(cc); | |
| 110 finalizeNumericEntity(); | |
| 111 return true; | |
| 112 } | |
| 113 return true; | |
| 114 } | |
| 115 case Decimal: { | |
| 116 if (cc >= '0' && cc <= '9') { | |
| 117 if (m_result != kInvalidUnicode) | |
| 118 m_result = m_result * 10 + cc - '0'; | |
| 119 break; | |
| 120 } | |
| 121 if (cc == ';') { | |
| 122 source.advanceAndASSERT(cc); | |
| 123 finalizeNumericEntity(); | |
| 124 return true; | |
| 125 } | |
| 126 return true; | |
| 127 } | |
| 128 case Named: { | |
| 129 if (isAlphaNumeric(cc)) | |
| 130 break; | |
| 131 if (cc == ';') { | |
| 132 source.advanceAndASSERT(cc); | |
| 133 finalizeNamedEntity(); | |
| 134 return true; | |
| 135 } | |
| 136 return true; | |
| 137 } | |
| 138 } | |
| 139 | |
| 140 if (m_result > UCHAR_MAX_VALUE) | |
| 141 m_result = kInvalidUnicode; | |
| 142 | |
| 143 m_buffer.append(cc); | |
| 144 source.advanceAndASSERT(cc); | |
| 145 } | |
| 146 ASSERT(source.isEmpty()); | |
| 147 return false; | |
| 148 } | |
| 149 | |
| 150 void HTMLEntityParser::finalizeNumericEntity() | |
| 151 { | |
| 152 m_buffer.clear(); | |
| 153 if (m_result <= 0 || m_result > 0x10FFFF || (m_result >= 0xD800 && m_result
<= 0xDFFF)) { | |
| 154 m_buffer.append(replacementCharacter); | |
| 155 } else if (U_IS_BMP(m_result)) { | |
| 156 m_buffer.append(m_result); | |
| 157 } else { | |
| 158 m_buffer.append(U16_LEAD(m_result)); | |
| 159 m_buffer.append(U16_TRAIL(m_result)); | |
| 160 } | |
| 161 } | |
| 162 | |
| 163 void HTMLEntityParser::finalizeNamedEntity() | |
| 164 { | |
| 165 UChar decodedEntity = decodeEntity(m_buffer); | |
| 166 m_buffer.clear(); | |
| 167 m_buffer.append(decodedEntity); | |
| 168 } | |
| 169 | |
| 170 } // namespace blink | |
| OLD | NEW |