| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. | 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 99 } | 99 } |
| 100 | 100 |
| 101 return ""; | 101 return ""; |
| 102 } | 102 } |
| 103 | 103 |
| 104 bool HTMLMetaCharsetParser::processMeta() | 104 bool HTMLMetaCharsetParser::processMeta() |
| 105 { | 105 { |
| 106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes(); | 106 const HTMLToken::AttributeList& tokenAttributes = m_token.attributes(); |
| 107 AttributeList attributes; | 107 AttributeList attributes; |
| 108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin()
; iter != tokenAttributes.end(); ++iter) { | 108 for (HTMLToken::AttributeList::const_iterator iter = tokenAttributes.begin()
; iter != tokenAttributes.end(); ++iter) { |
| 109 String attributeName = StringImpl::create8BitIfPossible(iter->name); | 109 HTMLIdentifier attributeName(iter->name, Likely8Bit); |
| 110 String attributeValue = StringImpl::create8BitIfPossible(iter->value); | 110 String attributeValue = StringImpl::create8BitIfPossible(iter->value); |
| 111 attributes.append(std::make_pair(attributeName, attributeValue)); | 111 attributes.append(std::make_pair(attributeName, attributeValue)); |
| 112 } | 112 } |
| 113 | 113 |
| 114 m_encoding = encodingFromMetaAttributes(attributes); | 114 m_encoding = encodingFromMetaAttributes(attributes); |
| 115 return m_encoding.isValid(); | 115 return m_encoding.isValid(); |
| 116 } | 116 } |
| 117 | 117 |
| 118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib
uteList& attributes) | 118 WTF::TextEncoding HTMLMetaCharsetParser::encodingFromMetaAttributes(const Attrib
uteList& attributes) |
| 119 { | 119 { |
| 120 bool gotPragma = false; | 120 bool gotPragma = false; |
| 121 Mode mode = None; | 121 Mode mode = None; |
| 122 String charset; | 122 String charset; |
| 123 | 123 |
| 124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib
utes.end(); ++iter) { | 124 for (AttributeList::const_iterator iter = attributes.begin(); iter != attrib
utes.end(); ++iter) { |
| 125 const AtomicString& attributeName = iter->first; | 125 const HTMLIdentifier& attributeName = iter->first; |
| 126 const String& attributeValue = iter->second; | 126 const String& attributeValue = iter->second; |
| 127 | 127 |
| 128 if (attributeName == http_equivAttr) { | 128 if (threadSafeMatch(attributeName, http_equivAttr)) { |
| 129 if (equalIgnoringCase(attributeValue, "content-type")) | 129 if (equalIgnoringCase(attributeValue, "content-type")) |
| 130 gotPragma = true; | 130 gotPragma = true; |
| 131 } else if (charset.isEmpty()) { | 131 } else if (charset.isEmpty()) { |
| 132 if (attributeName == charsetAttr) { | 132 if (threadSafeMatch(attributeName, charsetAttr)) { |
| 133 charset = attributeValue; | 133 charset = attributeValue; |
| 134 mode = Charset; | 134 mode = Charset; |
| 135 } else if (attributeName == contentAttr) { | 135 } else if (threadSafeMatch(attributeName, contentAttr)) { |
| 136 charset = extractCharset(attributeValue); | 136 charset = extractCharset(attributeValue); |
| 137 if (charset.length()) | 137 if (charset.length()) |
| 138 mode = Pragma; | 138 mode = Pragma; |
| 139 } | 139 } |
| 140 } | 140 } |
| 141 } | 141 } |
| 142 | 142 |
| 143 if (mode == Charset || (mode == Pragma && gotPragma)) | 143 if (mode == Charset || (mode == Pragma && gotPragma)) |
| 144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); | 144 return WTF::TextEncoding(stripLeadingAndTrailingHTMLSpaces(charset)); |
| 145 | 145 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 171 | 171 |
| 172 // Since many sites have charset declarations after <body> or other tags | 172 // Since many sites have charset declarations after <body> or other tags |
| 173 // that are disallowed in <head>, we don't bail out until we've checked at | 173 // that are disallowed in <head>, we don't bail out until we've checked at |
| 174 // least bytesToCheckUnconditionally bytes of input. | 174 // least bytesToCheckUnconditionally bytes of input. |
| 175 | 175 |
| 176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); | 176 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); |
| 177 | 177 |
| 178 while (m_tokenizer->nextToken(m_input, m_token)) { | 178 while (m_tokenizer->nextToken(m_input, m_token)) { |
| 179 bool end = m_token.type() == HTMLToken::EndTag; | 179 bool end = m_token.type() == HTMLToken::EndTag; |
| 180 if (end || m_token.type() == HTMLToken::StartTag) { | 180 if (end || m_token.type() == HTMLToken::StartTag) { |
| 181 AtomicString tagName(m_token.name()); | 181 HTMLIdentifier tagName(m_token.name(), Likely8Bit); |
| 182 if (!end) { | 182 if (!end) { |
| 183 m_tokenizer->updateStateFor(tagName); | 183 m_tokenizer->updateStateFor(tagName); |
| 184 if (tagName == metaTag && processMeta()) { | 184 if (tagName == metaTag && processMeta()) { |
| 185 m_doneChecking = true; | 185 m_doneChecking = true; |
| 186 return true; | 186 return true; |
| 187 } | 187 } |
| 188 } | 188 } |
| 189 | 189 |
| 190 if (tagName != scriptTag && tagName != noscriptTag | 190 if (tagName != scriptTag && tagName != noscriptTag |
| 191 && tagName != styleTag && tagName != linkTag | 191 && tagName != styleTag && tagName != linkTag |
| 192 && tagName != metaTag && tagName != objectTag | 192 && tagName != metaTag && tagName != objectTag |
| 193 && tagName != titleTag && tagName != baseTag | 193 && tagName != titleTag && tagName != baseTag |
| 194 && (end || tagName != htmlTag) && (end || tagName != headTag)) { | 194 && (end || tagName != htmlTag) && (end || tagName != headTag)) { |
| 195 m_inHeadSection = false; | 195 m_inHeadSection = false; |
| 196 } | 196 } |
| 197 } | 197 } |
| 198 | 198 |
| 199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC
heckUnconditionally) { | 199 if (!m_inHeadSection && m_input.numberOfCharactersConsumed() >= bytesToC
heckUnconditionally) { |
| 200 m_doneChecking = true; | 200 m_doneChecking = true; |
| 201 return true; | 201 return true; |
| 202 } | 202 } |
| 203 | 203 |
| 204 m_token.clear(); | 204 m_token.clear(); |
| 205 } | 205 } |
| 206 | 206 |
| 207 return false; | 207 return false; |
| 208 } | 208 } |
| 209 | 209 |
| 210 } | 210 } |
| OLD | NEW |