OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 Language language; | 45 Language language; |
46 LanguageFromCode(hintUserLanguage, &language); | 46 LanguageFromCode(hintUserLanguage, &language); |
47 int consumedBytes; | 47 int consumedBytes; |
48 bool isReliable; | 48 bool isReliable; |
49 Encoding encoding = CompactEncDet::DetectEncoding( | 49 Encoding encoding = CompactEncDet::DetectEncoding( |
50 data, length, hintUrl, nullptr, nullptr, | 50 data, length, hintUrl, nullptr, nullptr, |
51 EncodingNameAliasToEncoding(hintEncodingName), language, | 51 EncodingNameAliasToEncoding(hintEncodingName), language, |
52 CompactEncDet::WEB_CORPUS, | 52 CompactEncDet::WEB_CORPUS, |
53 false, // Include 7-bit encodings to detect ISO-2022-JP | 53 false, // Include 7-bit encodings to detect ISO-2022-JP |
54 &consumedBytes, &isReliable); | 54 &consumedBytes, &isReliable); |
55 if (encoding == UNKNOWN_ENCODING) | 55 |
| 56 // Should return false if the detected encoding is UTF8. This helps prevent |
| 57 // modern web sites from neglecting proper encoding labelling and simply |
| 58 // relying on browser-side encoding detection. Encoding detection is supposed |
| 59 // to work for web sites with legacy encoding only. Detection failure leads |
| 60 // |TextResourceDecoder| to use its default encoding determined from system |
| 61 // locale or TLD. |
| 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) |
56 return false; | 63 return false; |
57 | 64 |
58 // 7-bit encodings (except ISO-2022-JP) are not supported in WHATWG encoding | 65 // 7-bit encodings (except ISO-2022-JP) are not supported in WHATWG encoding |
59 // standard. Mark them as ASCII to keep the raw bytes intact. | 66 // standard. Mark them as ASCII to keep the raw bytes intact. |
60 switch (encoding) { | 67 switch (encoding) { |
61 case HZ_GB_2312: | 68 case HZ_GB_2312: |
62 case ISO_2022_KR: | 69 case ISO_2022_KR: |
63 case ISO_2022_CN: | 70 case ISO_2022_CN: |
64 case UTF7: | 71 case UTF7: |
65 encoding = ASCII_7BIT; | 72 encoding = ASCII_7BIT; |
66 break; | 73 break; |
67 default: | 74 default: |
68 break; | 75 break; |
69 } | 76 } |
70 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 77 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
71 return true; | 78 return true; |
72 } | 79 } |
73 | 80 |
74 } // namespace blink | 81 } // namespace blink |
OLD | NEW |