| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 | 55 |
| 56 // Should return false if the detected encoding is UTF8. This helps prevent | 56 // Should return false if the detected encoding is UTF8. This helps prevent |
| 57 // modern web sites from neglecting proper encoding labelling and simply | 57 // modern web sites from neglecting proper encoding labelling and simply |
| 58 // relying on browser-side encoding detection. Encoding detection is supposed | 58 // relying on browser-side encoding detection. Encoding detection is supposed |
| 59 // to work for web sites with legacy encoding only. Detection failure leads | 59 // to work for web sites with legacy encoding only. Detection failure leads |
| 60 // |TextResourceDecoder| to use its default encoding determined from system | 60 // |TextResourceDecoder| to use its default encoding determined from system |
| 61 // locale or TLD. | 61 // locale or TLD. |
| 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) | 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) |
| 63 return false; | 63 return false; |
| 64 | 64 |
| 65 // Map all the Shift-JIS variants to Shift-JIS. | |
| 66 if (hintUserLanguage && !strncmp(hintUserLanguage, "ja", 2) && | |
| 67 IsShiftJisOrVariant(encoding)) { | |
| 68 encoding = JAPANESE_SHIFT_JIS; | |
| 69 } | |
| 70 | |
| 71 // 7-bit encodings (except ISO-2022-JP), and some obscure encodings not | |
| 72 // supported in WHATWG encoding standard are marked as ASCII to keep the raw | |
| 73 // bytes intact. | |
| 74 // TODO(jinsukkim): Put this conversion into CED library, and enable "WHATWG" | |
| 75 // mode. | |
| 76 switch (encoding) { | |
| 77 case HZ_GB_2312: | |
| 78 case ISO_2022_KR: | |
| 79 case ISO_2022_CN: | |
| 80 case UTF7: | |
| 81 | |
| 82 case CHINESE_EUC_DEC: | |
| 83 case CHINESE_CNS: | |
| 84 case CHINESE_BIG5_CP950: | |
| 85 case JAPANESE_CP932: | |
| 86 case MSFT_CP874: | |
| 87 case TSCII: | |
| 88 case TAMIL_MONO: | |
| 89 case TAMIL_BI: | |
| 90 case JAGRAN: | |
| 91 case BHASKAR: | |
| 92 case HTCHANAKYA: | |
| 93 case BINARYENC: | |
| 94 case UTF8UTF8: | |
| 95 case TAM_ELANGO: | |
| 96 case TAM_LTTMBARANI: | |
| 97 case TAM_SHREE: | |
| 98 case TAM_TBOOMIS: | |
| 99 case TAM_TMNEWS: | |
| 100 case TAM_WEBTAMIL: | |
| 101 case KDDI_SHIFT_JIS: | |
| 102 case DOCOMO_SHIFT_JIS: | |
| 103 case SOFTBANK_SHIFT_JIS: | |
| 104 case KDDI_ISO_2022_JP: | |
| 105 case SOFTBANK_ISO_2022_JP: | |
| 106 encoding = ASCII_7BIT; | |
| 107 break; | |
| 108 default: | |
| 109 break; | |
| 110 } | |
| 111 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 65 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
| 112 return true; | 66 return true; |
| 113 } | 67 } |
| 114 | 68 |
| 115 } // namespace blink | 69 } // namespace blink |
| OLD | NEW |