OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2010 Google Inc. All Rights Reserved. | 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 } | 99 } |
100 | 100 |
101 return ""; | 101 return ""; |
102 } | 102 } |
103 | 103 |
104 bool HTMLMetaCharsetParser::processMeta() | 104 bool HTMLMetaCharsetParser::processMeta() |
105 { | 105 { |
106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes(); | 106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes(); |
107 AttributeList attributes; | 107 AttributeList attributes; |
108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin()
; iter != tokenAttributes.end(); ++iter) { | 108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin()
; iter != tokenAttributes.end(); ++iter) { |
109 String attributeName = StringImpl::create8BitIfPossible(iter->name); | 109 HTMLIdentifier attributeName(iter->name, Likely8Bit); |
110 String attributeValue = StringImpl::create8BitIfPossible(iter->value); | 110 String attributeValue = StringImpl::create8BitIfPossible(iter->value); |
111 attributes.append(std::make_pair(attributeName, attributeValue)); | 111 attributes.append(std::make_pair(attributeName, attributeValue)); |
112 } | 112 } |
113 | 113 |
114 m_encoding = encodingFromMetaAttributes(attributes); | 114 m_encoding = encodingFromMetaAttributes(attributes); |
115 return m_encoding.isValid(); | 115 return m_encoding.isValid(); |
116 } | 116 } |
117 | 117 |
118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib
uteList& attributes) | 118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib
uteList& attributes) |
119 { | 119 { |
120 bool gotPragma = false; | 120 bool gotPragma = false; |
121 Mode mode = None; | 121 Mode mode = None; |
122 String charset; | 122 String charset; |
123 | 123 |
124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib
utes.end(); ++iter) { | 124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib
utes.end(); ++iter) { |
125 const AtomicString& attributeName = iter->first; | 125 const HTMLIdentifier& attributeName = iter->first; |
126 const String& attributeValue = iter->second; | 126 const String& attributeValue = iter->second; |
127 | 127 |
128 if (attributeName == http_equivAttr) { | 128 if (threadSafeMatch(attributeName, http_equivAttr)) { |
129 if (equalIgnoringCase(attributeValue, "content-type")) | 129 if (equalIgnoringCase(attributeValue, "content-type")) |
130 gotPragma = true; | 130 gotPragma = true; |
131 } else if (charset.isEmpty()) { | 131 } else if (charset.isEmpty()) { |
132 if (attributeName == charsetAttr) { | 132 if (threadSafeMatch(attributeName, charsetAttr)) { |
133 charset = attributeValue; | 133 charset = attributeValue; |
134 mode = Charset; | 134 mode = Charset; |
135 } else if (attributeName == contentAttr) { | 135 } else if (threadSafeMatch(attributeName, contentAttr)) { |
136 charset = extractCharset(attributeValue); | 136 charset = extractCharset(attributeValue); |
137 if (charset.length()) | 137 if (charset.length()) |
138 mode = Pragma; | 138 mode = Pragma; |
139 } | 139 } |
140 } | 140 } |
141 } | 141 } |
142 | 142 |
143 if (mode == Charset || (mode == Pragma && gotPragma)) | 143 if (mode == Charset || (mode == Pragma && gotPragma)) |
144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); | 144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); |
145 | 145 |
(...skipping 25 matching lines...) Expand all Loading... |
171 | 171 |
172 // Since many sites have charset declarations after <body> or other tags | 172 // Since many sites have charset declarations after <body> or other tags |
173 // that are disallowed in <head>, we don't bail out until we've checked at | 173 // that are disallowed in <head>, we don't bail out until we've checked at |
174 // least bytesToCheckUnconditionally bytes of input. | 174 // least bytesToCheckUnconditionally bytes of input. |
175 | 175 |
176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); | 176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); |
177 | 177 |
178 while (m_tokenizer->nextToken(m_input, m_token)) { | 178 while (m_tokenizer->nextToken(m_input, m_token)) { |
179 bool end = m_token.type() == HTMLToken::EndTag; | 179 bool end = m_token.type() == HTMLToken::EndTag; |
180 if (end || m_token.type() == HTMLToken::StartTag) { | 180 if (end || m_token.type() == HTMLToken::StartTag) { |
181 AtomicString tagName(m_token.name()); | 181 HTMLIdentifier tagName(m_token.name(), Likely8Bit); |
182 if (!end) { | 182 if (!end) { |
183 m_tokenizer->updateStateFor(tagName); | 183 m_tokenizer->updateStateFor(tagName); |
184 if (tagName == metaTag && processMeta()) { | 184 if (tagName == metaTag && processMeta()) { |
185 m_doneChecking = true; | 185 m_doneChecking = true; |
186 return true; | 186 return true; |
187 } | 187 } |
188 } | 188 } |
189 | 189 |
190 if (tagName != scriptTag && tagName != noscriptTag | 190 if (tagName != scriptTag && tagName != noscriptTag |
191 && tagName != styleTag && tagName != linkTag | 191 && tagName != styleTag && tagName != linkTag |
192 && tagName != metaTag && tagName != objectTag | 192 && tagName != metaTag && tagName != objectTag |
193 && tagName != titleTag && tagName != baseTag | 193 && tagName != titleTag && tagName != baseTag |
194 && (end || tagName != htmlTag) && (end || tagName != headTag)) { | 194 && (end || tagName != htmlTag) && (end || tagName != headTag)) { |
195 m_inHeadSection = false; | 195 m_inHeadSection = false; |
196 } | 196 } |
197 } | 197 } |
198 | 198 |
199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC
heckUnconditionally) { | 199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC
heckUnconditionally) { |
200 m_doneChecking = true; | 200 m_doneChecking = true; |
201 return true; | 201 return true; |
202 } | 202 } |
203 | 203 |
204 m_token.clear(); | 204 m_token.clear(); |
205 } | 205 } |
206 | 206 |
207 return false; | 207 return false; |
208 } | 208 } |
209 | 209 |
210 } | 210 } |
OLD | NEW |