OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2010 Google Inc. All Rights Reserved. | 2 * Copyright (C) 2010 Google Inc. All Rights Reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
52 for (const HTMLToken::Attribute& tokenAttribute : tokenAttributes) { | 52 for (const HTMLToken::Attribute& tokenAttribute : tokenAttributes) { |
53 String attributeName = tokenAttribute.nameAttemptStaticStringCreation(); | 53 String attributeName = tokenAttribute.nameAttemptStaticStringCreation(); |
54 String attributeValue = tokenAttribute.value8BitIfNecessary(); | 54 String attributeValue = tokenAttribute.value8BitIfNecessary(); |
55 attributes.append(std::make_pair(attributeName, attributeValue)); | 55 attributes.append(std::make_pair(attributeName, attributeValue)); |
56 } | 56 } |
57 | 57 |
58 m_encoding = encodingFromMetaAttributes(attributes); | 58 m_encoding = encodingFromMetaAttributes(attributes); |
59 return m_encoding.isValid(); | 59 return m_encoding.isValid(); |
60 } | 60 } |
61 | 61 |
62 static const int bytesToCheckUnconditionally = | 62 // That many input bytes will be checked for meta charset even if <head> section |
63 1024; // That many input bytes will be checked for meta charset even if <he
ad> section is over. | 63 // is over. |
| 64 static const int bytesToCheckUnconditionally = 1024; |
64 | 65 |
65 bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, | 66 bool HTMLMetaCharsetParser::checkForMetaCharset(const char* data, |
66 size_t length) { | 67 size_t length) { |
67 if (m_doneChecking) | 68 if (m_doneChecking) |
68 return true; | 69 return true; |
69 | 70 |
70 ASSERT(!m_encoding.isValid()); | 71 ASSERT(!m_encoding.isValid()); |
71 | 72 |
72 // We still don't have an encoding, and are in the head. | 73 // We still don't have an encoding, and are in the head. The following tags |
73 // The following tags are allowed in <head>: | 74 // are allowed in <head>: SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE |
74 // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE | |
75 | 75 |
76 // We stop scanning when a tag that is not permitted in <head> | 76 // We stop scanning when a tag that is not permitted in <head> is seen, rather |
77 // is seen, rather when </head> is seen, because that more closely | 77 // when </head> is seen, because that more closely matches behavior in other |
78 // matches behavior in other browsers; more details in | 78 // browsers; more details in <http://bugs.webkit.org/show_bug.cgi?id=3590>. |
79 // <http://bugs.webkit.org/show_bug.cgi?id=3590>. | |
80 | 79 |
81 // Additionally, we ignore things that looks like tags in <title>, <script> | 80 // Additionally, we ignore things that looks like tags in <title>, <script> |
82 // and <noscript>; see <http://bugs.webkit.org/show_bug.cgi?id=4560>, | 81 // and <noscript>; see: |
83 // <http://bugs.webkit.org/show_bug.cgi?id=12165> and | 82 // <http://bugs.webkit.org/show_bug.cgi?id=4560> |
84 // <http://bugs.webkit.org/show_bug.cgi?id=12389>. | 83 // <http://bugs.webkit.org/show_bug.cgi?id=12165> |
| 84 // <http://bugs.webkit.org/show_bug.cgi?id=12389> |
85 | 85 |
86 // Since many sites have charset declarations after <body> or other tags | 86 // Since many sites have charset declarations after <body> or other tags that |
87 // that are disallowed in <head>, we don't bail out until we've checked at | 87 // are disallowed in <head>, we don't bail out until we've checked at least |
88 // least bytesToCheckUnconditionally bytes of input. | 88 // bytesToCheckUnconditionally bytes of input. |
89 | 89 |
90 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); | 90 m_input.append(SegmentedString(m_assumedCodec->decode(data, length))); |
91 | 91 |
92 while (m_tokenizer->nextToken(m_input, m_token)) { | 92 while (m_tokenizer->nextToken(m_input, m_token)) { |
93 bool end = m_token.type() == HTMLToken::EndTag; | 93 bool end = m_token.type() == HTMLToken::EndTag; |
94 if (end || m_token.type() == HTMLToken::StartTag) { | 94 if (end || m_token.type() == HTMLToken::StartTag) { |
95 String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit); | 95 String tagName = attemptStaticStringCreation(m_token.name(), Likely8Bit); |
96 if (!end) { | 96 if (!end) { |
97 m_tokenizer->updateStateFor(tagName); | 97 m_tokenizer->updateStateFor(tagName); |
98 if (threadSafeMatch(tagName, metaTag) && processMeta()) { | 98 if (threadSafeMatch(tagName, metaTag) && processMeta()) { |
(...skipping 22 matching lines...) Expand all Loading... |
121 return true; | 121 return true; |
122 } | 122 } |
123 | 123 |
124 m_token.clear(); | 124 m_token.clear(); |
125 } | 125 } |
126 | 126 |
127 return false; | 127 return false; |
128 } | 128 } |
129 | 129 |
130 } // namespace blink | 130 } // namespace blink |
OLD | NEW |