| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. | 2 * Copyright (C) 2008, 2009 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 | 55 |
| 56 // Should return false if the detected encoding is UTF8. This helps prevent | 56 // Should return false if the detected encoding is UTF8. This helps prevent |
| 57 // modern web sites from neglecting proper encoding labelling and simply | 57 // modern web sites from neglecting proper encoding labelling and simply |
| 58 // relying on browser-side encoding detection. Encoding detection is supposed | 58 // relying on browser-side encoding detection. Encoding detection is supposed |
| 59 // to work for web sites with legacy encoding only. Detection failure leads | 59 // to work for web sites with legacy encoding only. Detection failure leads |
| 60 // |TextResourceDecoder| to use its default encoding determined from system | 60 // |TextResourceDecoder| to use its default encoding determined from system |
| 61 // locale or TLD. | 61 // locale or TLD. |
| 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) | 62 if (encoding == UNKNOWN_ENCODING || encoding == UTF8) |
| 63 return false; | 63 return false; |
| 64 | 64 |
| 65 // 7-bit encodings (except ISO-2022-JP) are not supported in WHATWG encoding | 65 // Map all the Shift-JIS variants to Shift-JIS. |
| 66 // standard. Mark them as ASCII to keep the raw bytes intact. | 66 if (hintUserLanguage && !strncmp(hintUserLanguage, "ja", 2) && |
| 67 IsShiftJisOrVariant(encoding)) { |
| 68 encoding = JAPANESE_SHIFT_JIS; |
| 69 } |
| 70 |
| 71 // 7-bit encodings (except ISO-2022-JP), and some obscure encodings not |
| 72 // supported in WHATWG encoding standard are marked as ASCII to keep the raw |
| 73 // bytes intact. |
| 74 // TODO(jinsukkim): Put this conversion into CED library, and enable "WHATWG" |
| 75 // mode. |
| 67 switch (encoding) { | 76 switch (encoding) { |
| 68 case HZ_GB_2312: | 77 case HZ_GB_2312: |
| 69 case ISO_2022_KR: | 78 case ISO_2022_KR: |
| 70 case ISO_2022_CN: | 79 case ISO_2022_CN: |
| 71 case UTF7: | 80 case UTF7: |
| 81 |
| 82 case CHINESE_EUC_DEC: |
| 83 case CHINESE_CNS: |
| 84 case CHINESE_BIG5_CP950: |
| 85 case JAPANESE_CP932: |
| 86 case MSFT_CP874: |
| 87 case TSCII: |
| 88 case TAMIL_MONO: |
| 89 case TAMIL_BI: |
| 90 case JAGRAN: |
| 91 case BHASKAR: |
| 92 case HTCHANAKYA: |
| 93 case BINARYENC: |
| 94 case UTF8UTF8: |
| 95 case TAM_ELANGO: |
| 96 case TAM_LTTMBARANI: |
| 97 case TAM_SHREE: |
| 98 case TAM_TBOOMIS: |
| 99 case TAM_TMNEWS: |
| 100 case TAM_WEBTAMIL: |
| 101 case KDDI_SHIFT_JIS: |
| 102 case DOCOMO_SHIFT_JIS: |
| 103 case SOFTBANK_SHIFT_JIS: |
| 104 case KDDI_ISO_2022_JP: |
| 105 case SOFTBANK_ISO_2022_JP: |
| 72 encoding = ASCII_7BIT; | 106 encoding = ASCII_7BIT; |
| 73 break; | 107 break; |
| 74 default: | 108 default: |
| 75 break; | 109 break; |
| 76 } | 110 } |
| 77 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); | 111 *detectedEncoding = WTF::TextEncoding(MimeEncodingName(encoding)); |
| 78 return true; | 112 return true; |
| 79 } | 113 } |
| 80 | 114 |
| 81 } // namespace blink | 115 } // namespace blink |
| OLD | NEW |