OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2005-2012, International Business Machines | 3 * Copyright (C) 2005-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_CONVERSION | 10 #if !UCONFIG_NO_CONVERSION |
11 | 11 |
12 #include "cstring.h" | 12 #include "cstring.h" |
13 | 13 |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 | 112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 |
113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 | 113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 |
114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII | 114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII |
115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman | 115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman |
116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana | 116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana |
117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman | 117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman |
118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 | 118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 |
119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
120 }; | 120 }; |
121 | 121 |
122 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 122 #if !UCONFIG_ONLY_HTML_CONVERSION |
123 static const uint8_t escapeSequences_2022KR[][5] = { | 123 static const uint8_t escapeSequences_2022KR[][5] = { |
124 {0x1b, 0x24, 0x29, 0x43, 0x00} | 124 {0x1b, 0x24, 0x29, 0x43, 0x00} |
125 }; | 125 }; |
126 | 126 |
127 static const uint8_t escapeSequences_2022CN[][5] = { | 127 static const uint8_t escapeSequences_2022CN[][5] = { |
128 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 | 128 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 |
129 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 | 129 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 |
130 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 | 130 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 |
131 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 | 131 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 |
132 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 | 132 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 |
(...skipping 14 matching lines...) Expand all Loading... |
147 | 147 |
148 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ | 148 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ |
149 int32_t confidence = match_2022(textIn->fInputBytes, | 149 int32_t confidence = match_2022(textIn->fInputBytes, |
150 textIn->fInputLen, | 150 textIn->fInputLen, |
151 escapeSequences_2022JP, | 151 escapeSequences_2022JP, |
152 ARRAY_SIZE(escapeSequences_2022JP)); | 152 ARRAY_SIZE(escapeSequences_2022JP)); |
153 results->set(textIn, this, confidence); | 153 results->set(textIn, this, confidence); |
154 return (confidence > 0); | 154 return (confidence > 0); |
155 } | 155 } |
156 | 156 |
157 #if !UCONFIG_NO_NON_HTML5_CONVERSION | 157 #if !UCONFIG_ONLY_HTML_CONVERSION |
158 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 158 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
159 | 159 |
160 const char *CharsetRecog_2022KR::getName() const { | 160 const char *CharsetRecog_2022KR::getName() const { |
161 return "ISO-2022-KR"; | 161 return "ISO-2022-KR"; |
162 } | 162 } |
163 | 163 |
164 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ | 164 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ |
165 int32_t confidence = match_2022(textIn->fInputBytes, | 165 int32_t confidence = match_2022(textIn->fInputBytes, |
166 textIn->fInputLen, | 166 textIn->fInputLen, |
167 escapeSequences_2022KR, | 167 escapeSequences_2022KR, |
(...skipping 17 matching lines...) Expand all Loading... |
185 return (confidence > 0); | 185 return (confidence > 0); |
186 } | 186 } |
187 #endif | 187 #endif |
188 | 188 |
189 CharsetRecog_2022::~CharsetRecog_2022() { | 189 CharsetRecog_2022::~CharsetRecog_2022() { |
190 // nothing to do | 190 // nothing to do |
191 } | 191 } |
192 | 192 |
193 U_NAMESPACE_END | 193 U_NAMESPACE_END |
194 #endif | 194 #endif |
OLD | NEW |