OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2005-2012, International Business Machines | 3 * Copyright (C) 2005-2012, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_CONVERSION | 10 #if !UCONFIG_NO_CONVERSION |
(...skipping 101 matching lines...) Loading... |
112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 | 112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 |
113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 | 113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 |
114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII | 114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII |
115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman | 115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman |
116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana | 116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana |
117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman | 117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman |
118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 | 118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 |
119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
120 }; | 120 }; |
121 | 121 |
| 122 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
122 static const uint8_t escapeSequences_2022KR[][5] = { | 123 static const uint8_t escapeSequences_2022KR[][5] = { |
123 {0x1b, 0x24, 0x29, 0x43, 0x00} | 124 {0x1b, 0x24, 0x29, 0x43, 0x00} |
124 }; | 125 }; |
125 | 126 |
126 static const uint8_t escapeSequences_2022CN[][5] = { | 127 static const uint8_t escapeSequences_2022CN[][5] = { |
127 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 | 128 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 |
128 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 | 129 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 |
129 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 | 130 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 |
130 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 | 131 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 |
131 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 | 132 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 |
132 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 | 133 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 |
133 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 | 134 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 |
134 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 | 135 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 |
135 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 | 136 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 |
136 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | 137 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
137 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | 138 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
138 }; | 139 }; |
| 140 #endif |
139 | 141 |
140 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | 142 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
141 | 143 |
142 const char *CharsetRecog_2022JP::getName() const { | 144 const char *CharsetRecog_2022JP::getName() const { |
143 return "ISO-2022-JP"; | 145 return "ISO-2022-JP"; |
144 } | 146 } |
145 | 147 |
146 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ | 148 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ |
147 int32_t confidence = match_2022(textIn->fInputBytes, | 149 int32_t confidence = match_2022(textIn->fInputBytes, |
148 textIn->fInputLen, | 150 textIn->fInputLen, |
149 escapeSequences_2022JP, | 151 escapeSequences_2022JP, |
150 ARRAY_SIZE(escapeSequences_2022JP)); | 152 ARRAY_SIZE(escapeSequences_2022JP)); |
151 results->set(textIn, this, confidence); | 153 results->set(textIn, this, confidence); |
152 return (confidence > 0); | 154 return (confidence > 0); |
153 } | 155 } |
154 | 156 |
| 157 #if !UCONFIG_NO_NON_HTML5_CONVERSION |
155 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 158 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
156 | 159 |
157 const char *CharsetRecog_2022KR::getName() const { | 160 const char *CharsetRecog_2022KR::getName() const { |
158 return "ISO-2022-KR"; | 161 return "ISO-2022-KR"; |
159 } | 162 } |
160 | 163 |
161 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ | 164 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ |
162 int32_t confidence = match_2022(textIn->fInputBytes, | 165 int32_t confidence = match_2022(textIn->fInputBytes, |
163 textIn->fInputLen, | 166 textIn->fInputLen, |
164 escapeSequences_2022KR, | 167 escapeSequences_2022KR, |
165 ARRAY_SIZE(escapeSequences_2022KR)); | 168 ARRAY_SIZE(escapeSequences_2022KR)); |
166 results->set(textIn, this, confidence); | 169 results->set(textIn, this, confidence); |
167 return (confidence > 0); | 170 return (confidence > 0); |
168 } | 171 } |
169 | 172 |
170 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} | 173 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} |
171 | 174 |
172 const char *CharsetRecog_2022CN::getName() const { | 175 const char *CharsetRecog_2022CN::getName() const { |
173 return "ISO-2022-CN"; | 176 return "ISO-2022-CN"; |
174 } | 177 } |
175 | 178 |
176 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ | 179 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ |
177 int32_t confidence = match_2022(textIn->fInputBytes, | 180 int32_t confidence = match_2022(textIn->fInputBytes, |
178 textIn->fInputLen, | 181 textIn->fInputLen, |
179 escapeSequences_2022CN, | 182 escapeSequences_2022CN, |
180 ARRAY_SIZE(escapeSequences_2022CN)); | 183 ARRAY_SIZE(escapeSequences_2022CN)); |
181 results->set(textIn, this, confidence); | 184 results->set(textIn, this, confidence); |
182 return (confidence > 0); | 185 return (confidence > 0); |
183 } | 186 } |
| 187 #endif |
184 | 188 |
185 CharsetRecog_2022::~CharsetRecog_2022() { | 189 CharsetRecog_2022::~CharsetRecog_2022() { |
186 // nothing to do | 190 // nothing to do |
187 } | 191 } |
188 | 192 |
189 U_NAMESPACE_END | 193 U_NAMESPACE_END |
190 #endif | 194 #endif |
OLD | NEW |