OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2005-2012, International Business Machines | 3 * Copyright (C) 2005-2012, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 | 9 |
10 #if !UCONFIG_NO_CONVERSION | 10 #if !UCONFIG_NO_CONVERSION |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 | 112 {0x1b, 0x24, 0x42, 0x00, 0x00}, // JIS X 208-1983 |
113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 | 113 {0x1b, 0x26, 0x40, 0x00, 0x00}, // JIS X 208 1990, 1997 |
114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII | 114 {0x1b, 0x28, 0x42, 0x00, 0x00}, // ASCII |
115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman | 115 {0x1b, 0x28, 0x48, 0x00, 0x00}, // JIS-Roman |
116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana | 116 {0x1b, 0x28, 0x49, 0x00, 0x00}, // Half-width katakana |
117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman | 117 {0x1b, 0x28, 0x4a, 0x00, 0x00}, // JIS-Roman |
118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 | 118 {0x1b, 0x2e, 0x41, 0x00, 0x00}, // ISO 8859-1 |
119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 119 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
120 }; | 120 }; |
121 | 121 |
122 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
123 static const uint8_t escapeSequences_2022KR[][5] = { | 122 static const uint8_t escapeSequences_2022KR[][5] = { |
124 {0x1b, 0x24, 0x29, 0x43, 0x00} | 123 {0x1b, 0x24, 0x29, 0x43, 0x00} |
125 }; | 124 }; |
126 | 125 |
127 static const uint8_t escapeSequences_2022CN[][5] = { | 126 static const uint8_t escapeSequences_2022CN[][5] = { |
128 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 | 127 {0x1b, 0x24, 0x29, 0x41, 0x00}, // GB 2312-80 |
129 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 | 128 {0x1b, 0x24, 0x29, 0x47, 0x00}, // CNS 11643-1992 Plane 1 |
130 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 | 129 {0x1b, 0x24, 0x2A, 0x48, 0x00}, // CNS 11643-1992 Plane 2 |
131 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 | 130 {0x1b, 0x24, 0x29, 0x45, 0x00}, // ISO-IR-165 |
132 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 | 131 {0x1b, 0x24, 0x2B, 0x49, 0x00}, // CNS 11643-1992 Plane 3 |
133 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 | 132 {0x1b, 0x24, 0x2B, 0x4A, 0x00}, // CNS 11643-1992 Plane 4 |
134 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 | 133 {0x1b, 0x24, 0x2B, 0x4B, 0x00}, // CNS 11643-1992 Plane 5 |
135 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 | 134 {0x1b, 0x24, 0x2B, 0x4C, 0x00}, // CNS 11643-1992 Plane 6 |
136 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 | 135 {0x1b, 0x24, 0x2B, 0x4D, 0x00}, // CNS 11643-1992 Plane 7 |
137 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | 136 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
138 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | 137 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
139 }; | 138 }; |
140 #endif | |
141 | 139 |
142 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | 140 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
143 | 141 |
144 const char *CharsetRecog_2022JP::getName() const { | 142 const char *CharsetRecog_2022JP::getName() const { |
145 return "ISO-2022-JP"; | 143 return "ISO-2022-JP"; |
146 } | 144 } |
147 | 145 |
148 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ | 146 UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
{ |
149 int32_t confidence = match_2022(textIn->fInputBytes, | 147 int32_t confidence = match_2022(textIn->fInputBytes, |
150 textIn->fInputLen, | 148 textIn->fInputLen, |
151 escapeSequences_2022JP, | 149 escapeSequences_2022JP, |
152 ARRAY_SIZE(escapeSequences_2022JP)); | 150 ARRAY_SIZE(escapeSequences_2022JP)); |
153 results->set(textIn, this, confidence); | 151 results->set(textIn, this, confidence); |
154 return (confidence > 0); | 152 return (confidence > 0); |
155 } | 153 } |
156 | 154 |
157 #if !UCONFIG_NO_NON_HTML5_CONVERSION | |
158 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 155 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
159 | 156 |
160 const char *CharsetRecog_2022KR::getName() const { | 157 const char *CharsetRecog_2022KR::getName() const { |
161 return "ISO-2022-KR"; | 158 return "ISO-2022-KR"; |
162 } | 159 } |
163 | 160 |
164 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ | 161 UBool CharsetRecog_2022KR::match(InputText *textIn, CharsetMatch *results) const
{ |
165 int32_t confidence = match_2022(textIn->fInputBytes, | 162 int32_t confidence = match_2022(textIn->fInputBytes, |
166 textIn->fInputLen, | 163 textIn->fInputLen, |
167 escapeSequences_2022KR, | 164 escapeSequences_2022KR, |
168 ARRAY_SIZE(escapeSequences_2022KR)); | 165 ARRAY_SIZE(escapeSequences_2022KR)); |
169 results->set(textIn, this, confidence); | 166 results->set(textIn, this, confidence); |
170 return (confidence > 0); | 167 return (confidence > 0); |
171 } | 168 } |
172 | 169 |
173 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} | 170 CharsetRecog_2022CN::~CharsetRecog_2022CN() {} |
174 | 171 |
175 const char *CharsetRecog_2022CN::getName() const { | 172 const char *CharsetRecog_2022CN::getName() const { |
176 return "ISO-2022-CN"; | 173 return "ISO-2022-CN"; |
177 } | 174 } |
178 | 175 |
179 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ | 176 UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
{ |
180 int32_t confidence = match_2022(textIn->fInputBytes, | 177 int32_t confidence = match_2022(textIn->fInputBytes, |
181 textIn->fInputLen, | 178 textIn->fInputLen, |
182 escapeSequences_2022CN, | 179 escapeSequences_2022CN, |
183 ARRAY_SIZE(escapeSequences_2022CN)); | 180 ARRAY_SIZE(escapeSequences_2022CN)); |
184 results->set(textIn, this, confidence); | 181 results->set(textIn, this, confidence); |
185 return (confidence > 0); | 182 return (confidence > 0); |
186 } | 183 } |
187 #endif | |
188 | 184 |
189 CharsetRecog_2022::~CharsetRecog_2022() { | 185 CharsetRecog_2022::~CharsetRecog_2022() { |
190 // nothing to do | 186 // nothing to do |
191 } | 187 } |
192 | 188 |
193 U_NAMESPACE_END | 189 U_NAMESPACE_END |
194 #endif | 190 #endif |
OLD | NEW |