| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2005-2012, International Business Machines | 3 * Copyright (C) 2005-2012, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
| 9 | 9 |
| 10 #if !UCONFIG_NO_CONVERSION | 10 #if !UCONFIG_NO_CONVERSION |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 | 112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 |
| 113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 | 113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 |
| 114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII | 114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII |
| 115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman | 115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman |
| 116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana | 116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana |
| 117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman | 117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman |
| 118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 | 118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 |
| 119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
| 120 }; | 120 }; |
| 121 | 121 |
| 122 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 122 static const uint8_t escapeSequences_2022KR[][5] = { | 123 static const uint8_t escapeSequences_2022KR[][5] = { |
| 123 {0x1b, 0x24, 0x29, 0x43, 0x00} | 124 {0x1b, 0x24, 0x29, 0x43, 0x00} |
| 124 }; | 125 }; |
| 125 | 126 |
| 126 static const uint8_t escapeSequences_2022CN[][5] = { | 127 static const uint8_t escapeSequences_2022CN[][5] = { |
| 127 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 | 128 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 |
| 128 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 | 129 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 |
| 129 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 | 130 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 |
| 130 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 | 131 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 |
| 131 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 | 132 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 |
| 132 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 | 133 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 |
| 133 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 | 134 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 |
| 134 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 | 135 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 |
| 135 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 | 136 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 |
| 136 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | 137 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
| 137 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | 138 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
| 138 }; | 139 }; |
| 140 #endif |
| 139 | 141 |
| 140 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | 142 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
| 141 | 143 |
| 142 const char *CharsetRecog_2022JP::getName() const { | 144 const char *CharsetRecog_2022JP::getName() const { |
| 143 return "ISO-2022-JP"; | 145 return "ISO-2022-JP"; |
| 144 } | 146 } |
| 145 | 147 |
| 146 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ | 148 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ |
| 147 int32_t confidence = match_2022(textIn->fInputBytes, | 149 int32_t confidence = match_2022(textIn->fInputBytes, |
| 148 textIn->fInputLen, | 150 textIn->fInputLen, |
| 149 escapeSequences_2022JP, | 151 escapeSequences_2022JP, |
| 150 ARRAY_SIZE(escapeSequences_2022JP)); | 152 ARRAY_SIZE(escapeSequences_2022JP)); |
| 151 results->set(textIn, this, confidence); | 153 results->set(textIn, this, confidence); |
| 152 return (confidence > 0); | 154 return (confidence > 0); |
| 153 } | 155 } |
| 154 | 156 |
| 157 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 155 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 158 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
| 156 | 159 |
| 157 const char *CharsetRecog_2022KR::getName() const { | 160 const char *CharsetRecog_2022KR::getName() const { |
| 158 return "ISO-2022-KR"; | 161 return "ISO-2022-KR"; |
| 159 } | 162 } |
| 160 | 163 |
| 161 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ | 164 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ |
| 162 int32_t confidence = match_2022(textIn->fInputBytes, | 165 int32_t confidence = match_2022(textIn->fInputBytes, |
| 163 textIn->fInputLen, | 166 textIn->fInputLen, |
| 164 escapeSequences_2022KR, | 167 escapeSequences_2022KR, |
| 165 ARRAY_SIZE(escapeSequences_2022KR)); | 168 ARRAY_SIZE(escapeSequences_2022KR)); |
| 166 results->set(textIn, this, confidence); | 169 results->set(textIn, this, confidence); |
| 167 return (confidence > 0); | 170 return (confidence > 0); |
| 168 } | 171 } |
| 169 | 172 |
| 170 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} | 173 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} |
| 171 | 174 |
| 172 const char *CharsetRecog_2022CN::getName() const { | 175 const char *CharsetRecog_2022CN::getName() const { |
| 173 return "ISO-2022-CN"; | 176 return "ISO-2022-CN"; |
| 174 } | 177 } |
| 175 | 178 |
| 176 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ | 179 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ |
| 177 int32_t confidence = match_2022(textIn->fInputBytes, | 180 int32_t confidence = match_2022(textIn->fInputBytes, |
| 178 textIn->fInputLen, | 181 textIn->fInputLen, |
| 179 escapeSequences_2022CN, | 182 escapeSequences_2022CN, |
| 180 ARRAY_SIZE(escapeSequences_2022CN)); | 183 ARRAY_SIZE(escapeSequences_2022CN)); |
| 181 results->set(textIn, this, confidence); | 184 results->set(textIn, this, confidence); |
| 182 return (confidence > 0); | 185 return (confidence > 0); |
| 183 } | 186 } |
| 187 #endif |
| 184 | 188 |
| 185 CharsetRecog_2022::~CharsetRecog_2022() { | 189 CharsetRecog_2022::~CharsetRecog_2022() { |
| 186 // nothing to do | 190 // nothing to do |
| 187 } | 191 } |
| 188 | 192 |
| 189 U_NAMESPACE_END | 193 U_NAMESPACE_END |
| 190 #endif | 194 #endif |
| OLD | NEW |