OLD | NEW |
| (Empty) |
1 /* | |
2 ************************************************************************ | |
3 * Copyright (c) 1997-2010, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ************************************************************************ | |
6 */ | |
7 | |
8 #include "unicode/utypes.h" | |
9 | |
10 #if !UCONFIG_NO_NORMALIZATION | |
11 | |
12 #include "unicode/uchar.h" | |
13 #include "unicode/normlzr.h" | |
14 #include "unicode/uniset.h" | |
15 #include "unicode/putil.h" | |
16 #include "cstring.h" | |
17 #include "filestrm.h" | |
18 #include "normconf.h" | |
19 #include <stdio.h> | |
20 | |
21 #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) | |
22 | |
23 #define CASE(id,test,exec) case id: \ | |
24 name = #test; \ | |
25 if (exec) { \ | |
26 logln(#test "---"); \ | |
27 logln((UnicodeString)""); \ | |
28 test(); \ | |
29 } \ | |
30 break | |
31 | |
32 void NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const
char* &name, char* /*par*/) { | |
33 switch (index) { | |
34 CASE(0, TestConformance, exec); | |
35 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION | |
36 CASE(1, TestConformance32, exec); | |
37 #endif | |
38 // CASE(2, TestCase6); | |
39 default: name = ""; break; | |
40 } | |
41 } | |
42 | |
43 #define FIELD_COUNT 5 | |
44 | |
45 NormalizerConformanceTest::NormalizerConformanceTest() : | |
46 normalizer(UnicodeString(), UNORM_NFC) {} | |
47 | |
48 NormalizerConformanceTest::~NormalizerConformanceTest() {} | |
49 | |
50 // more interesting conformance test cases, not in the unicode.org Normalization
Test.txt | |
51 static const char *moreCases[]={ | |
52 // Markus 2001aug30 | |
53 "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus
0", | |
54 | |
55 // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decompo
sition.lead.cc==129 | |
56 "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71
0F72 0301; # Markus 1" | |
57 }; | |
58 | |
59 void NormalizerConformanceTest::compare(const UnicodeString& s1, const UnicodeSt
ring& s2){ | |
60 UErrorCode status=U_ZERO_ERROR; | |
61 // TODO: Re-enable this tests after UTC fixes UAX 21 | |
62 if(s1.indexOf((UChar32)0x0345)>=0)return; | |
63 if(Normalizer::compare(s1,s2,U_FOLD_CASE_DEFAULT,status)!=0){ | |
64 errln("Normalizer::compare() failed for s1: " + prettify(s1) + " s2: " +
prettify(s2)); | |
65 } | |
66 } | |
67 | |
68 FileStream * | |
69 NormalizerConformanceTest::openNormalizationTestFile(const char *filename) { | |
70 char unidataPath[2000]; | |
71 const char *folder; | |
72 FileStream *input; | |
73 UErrorCode errorCode; | |
74 | |
75 // look inside ICU_DATA first | |
76 folder=pathToDataDirectory(); | |
77 if(folder!=NULL) { | |
78 strcpy(unidataPath, folder); | |
79 strcat(unidataPath, "unidata" U_FILE_SEP_STRING); | |
80 strcat(unidataPath, filename); | |
81 input=T_FileStream_open(unidataPath, "rb"); | |
82 if(input!=NULL) { | |
83 return input; | |
84 } | |
85 } | |
86 | |
87 // find icu/source/data/unidata relative to the test data | |
88 errorCode=U_ZERO_ERROR; | |
89 folder=loadTestData(errorCode); | |
90 if(U_SUCCESS(errorCode)) { | |
91 strcpy(unidataPath, folder); | |
92 strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." | |
93 U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." | |
94 U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE
_SEP_STRING); | |
95 strcat(unidataPath, filename); | |
96 input=T_FileStream_open(unidataPath, "rb"); | |
97 if(input!=NULL) { | |
98 return input; | |
99 } | |
100 } | |
101 | |
102 // look in icu/source/test/testdata/out/build | |
103 errorCode=U_ZERO_ERROR; | |
104 folder=loadTestData(errorCode); | |
105 if(U_SUCCESS(errorCode)) { | |
106 strcpy(unidataPath, folder); | |
107 strcat(unidataPath, U_FILE_SEP_STRING); | |
108 strcat(unidataPath, filename); | |
109 input=T_FileStream_open(unidataPath, "rb"); | |
110 if(input!=NULL) { | |
111 return input; | |
112 } | |
113 } | |
114 | |
115 // look in icu/source/test/testdata | |
116 errorCode=U_ZERO_ERROR; | |
117 folder=loadTestData(errorCode); | |
118 if(U_SUCCESS(errorCode)) { | |
119 strcpy(unidataPath, folder); | |
120 strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE
_SEP_STRING); | |
121 strcat(unidataPath, filename); | |
122 input=T_FileStream_open(unidataPath, "rb"); | |
123 if(input!=NULL) { | |
124 return input; | |
125 } | |
126 } | |
127 | |
128 // find icu/source/data/unidata relative to U_TOPSRCDIR | |
129 #if defined(U_TOPSRCDIR) | |
130 strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "
unidata" U_FILE_SEP_STRING); | |
131 strcat(unidataPath, filename); | |
132 input=T_FileStream_open(unidataPath, "rb"); | |
133 if(input!=NULL) { | |
134 return input; | |
135 } | |
136 | |
137 strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "
testdata" U_FILE_SEP_STRING); | |
138 strcat(unidataPath, filename); | |
139 input=T_FileStream_open(unidataPath, "rb"); | |
140 if(input!=NULL) { | |
141 return input; | |
142 } | |
143 #endif | |
144 | |
145 dataerrln("Failed to open %s", filename); | |
146 return NULL; | |
147 } | |
148 | |
149 /** | |
150 * Test the conformance of Normalizer to | |
151 * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt | |
152 */ | |
153 void NormalizerConformanceTest::TestConformance() { | |
154 TestConformance(openNormalizationTestFile("NormalizationTest.txt"), 0); | |
155 } | |
156 | |
157 void NormalizerConformanceTest::TestConformance32() { | |
158 TestConformance(openNormalizationTestFile("NormalizationTest-3.2.0.txt"), UN
ORM_UNICODE_3_2); | |
159 } | |
160 | |
161 void NormalizerConformanceTest::TestConformance(FileStream *input, int32_t optio
ns) { | |
162 enum { BUF_SIZE = 1024 }; | |
163 char lineBuf[BUF_SIZE]; | |
164 UnicodeString fields[FIELD_COUNT]; | |
165 UErrorCode status = U_ZERO_ERROR; | |
166 int32_t passCount = 0; | |
167 int32_t failCount = 0; | |
168 UChar32 c; | |
169 | |
170 if(input==NULL) { | |
171 return; | |
172 } | |
173 | |
174 // UnicodeSet for all code points that are not mentioned in NormalizationTes
t.txt | |
175 UnicodeSet other(0, 0x10ffff); | |
176 | |
177 int32_t count, countMoreCases = sizeof(moreCases)/sizeof(moreCases[0]); | |
178 for (count = 1;;++count) { | |
179 if (!T_FileStream_eof(input)) { | |
180 T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf)); | |
181 } else { | |
182 // once NormalizationTest.txt is finished, use moreCases[] | |
183 if(count > countMoreCases) { | |
184 count = 0; | |
185 } else if(count == countMoreCases) { | |
186 // all done | |
187 break; | |
188 } | |
189 uprv_strcpy(lineBuf, moreCases[count]); | |
190 } | |
191 if (lineBuf[0] == 0 || lineBuf[0] == '\n' || lineBuf[0] == '\r') continu
e; | |
192 | |
193 // Expect 5 columns of this format: | |
194 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> | |
195 | |
196 // Parse out the comment. | |
197 if (lineBuf[0] == '#') continue; | |
198 | |
199 // Read separator lines starting with '@' | |
200 if (lineBuf[0] == '@') { | |
201 logln(lineBuf); | |
202 continue; | |
203 } | |
204 | |
205 // Parse out the fields | |
206 if (!hexsplit(lineBuf, ';', fields, FIELD_COUNT)) { | |
207 errln((UnicodeString)"Unable to parse line " + count); | |
208 break; // Syntax error | |
209 } | |
210 | |
211 // Remove a single code point from the "other" UnicodeSet | |
212 if(fields[0].length()==fields[0].moveIndex32(0, 1)) { | |
213 c=fields[0].char32At(0); | |
214 if(0xac20<=c && c<=0xd73f && quick) { | |
215 // not an exhaustive test run: skip most Hangul syllables | |
216 if(c==0xac20) { | |
217 other.remove(0xac20, 0xd73f); | |
218 } | |
219 continue; | |
220 } | |
221 other.remove(c); | |
222 } | |
223 | |
224 if (checkConformance(fields, lineBuf, options, status)) { | |
225 ++passCount; | |
226 } else { | |
227 ++failCount; | |
228 if(status == U_FILE_ACCESS_ERROR) { | |
229 dataerrln("Something is wrong with the normalizer, skipping the re
st of the test."); | |
230 break; | |
231 } | |
232 } | |
233 if ((count % 1000) == 0) { | |
234 logln("Line %d", count); | |
235 } | |
236 } | |
237 | |
238 T_FileStream_close(input); | |
239 | |
240 /* | |
241 * Test that all characters that are not mentioned | |
242 * as single code points in column 1 | |
243 * do not change under any normalization. | |
244 */ | |
245 | |
246 // remove U+ffff because that is the end-of-iteration sentinel value | |
247 other.remove(0xffff); | |
248 | |
249 for(c=0; c<=0x10ffff; quick ? c+=113 : ++c) { | |
250 if(0x30000<=c && c<0xe0000) { | |
251 c=0xe0000; | |
252 } | |
253 if(!other.contains(c)) { | |
254 continue; | |
255 } | |
256 | |
257 fields[0]=fields[1]=fields[2]=fields[3]=fields[4].setTo(c); | |
258 sprintf(lineBuf, "not mentioned code point U+%04lx", (long)c); | |
259 | |
260 if (checkConformance(fields, lineBuf, options, status)) { | |
261 ++passCount; | |
262 } else { | |
263 ++failCount; | |
264 if(status == U_FILE_ACCESS_ERROR) { | |
265 dataerrln("Something is wrong with the normalizer, skipping the re
st of the test.: %s", u_errorName(status)); | |
266 break; | |
267 } | |
268 } | |
269 if ((c % 0x1000) == 0) { | |
270 logln("Code point U+%04lx", c); | |
271 } | |
272 } | |
273 | |
274 if (failCount != 0) { | |
275 dataerrln((UnicodeString)"Total: " + failCount + " lines/code points fai
led, " + | |
276 passCount + " lines/code points passed"); | |
277 } else { | |
278 logln((UnicodeString)"Total: " + passCount + " lines/code points passed"
); | |
279 } | |
280 } | |
281 | |
282 /** | |
283 * Verify the conformance of the given line of the Unicode | |
284 * normalization (UTR 15) test suite file. For each line, | |
285 * there are five columns, corresponding to field[0]..field[4]. | |
286 * | |
287 * The following invariants must be true for all conformant implementations | |
288 * c2 == NFC(c1) == NFC(c2) == NFC(c3) | |
289 * c3 == NFD(c1) == NFD(c2) == NFD(c3) | |
290 * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) | |
291 * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) | |
292 * | |
293 * @param field the 5 columns | |
294 * @param line the source line from the test suite file | |
295 * @return true if the test passes | |
296 */ | |
297 UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, | |
298 const char *line, | |
299 int32_t options, | |
300 UErrorCode &status) { | |
301 UBool pass = TRUE, result; | |
302 //UErrorCode status = U_ZERO_ERROR; | |
303 UnicodeString out, fcd; | |
304 int32_t fieldNum; | |
305 | |
306 for (int32_t i=0; i<FIELD_COUNT; ++i) { | |
307 fieldNum = i+1; | |
308 if (i<3) { | |
309 Normalizer::normalize(field[i], UNORM_NFC, options, out, status); | |
310 if (U_FAILURE(status)) { | |
311 dataerrln("Error running normalize UNORM_NFC: %s", u_errorName(s
tatus)); | |
312 } else { | |
313 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fie
ldNum); | |
314 iterativeNorm(field[i], UNORM_NFC, options, out, +1); | |
315 pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c",
fieldNum); | |
316 iterativeNorm(field[i], UNORM_NFC, options, out, -1); | |
317 pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c",
fieldNum); | |
318 } | |
319 | |
320 Normalizer::normalize(field[i], UNORM_NFD, options, out, status); | |
321 if (U_FAILURE(status)) { | |
322 dataerrln("Error running normalize UNORM_NFD: %s", u_errorName(s
tatus)); | |
323 } else { | |
324 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fie
ldNum); | |
325 iterativeNorm(field[i], UNORM_NFD, options, out, +1); | |
326 pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c",
fieldNum); | |
327 iterativeNorm(field[i], UNORM_NFD, options, out, -1); | |
328 pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c",
fieldNum); | |
329 } | |
330 } | |
331 Normalizer::normalize(field[i], UNORM_NFKC, options, out, status); | |
332 if (U_FAILURE(status)) { | |
333 dataerrln("Error running normalize UNORM_NFKC: %s", u_errorName(stat
us)); | |
334 } else { | |
335 pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", field
Num); | |
336 iterativeNorm(field[i], UNORM_NFKC, options, out, +1); | |
337 pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", f
ieldNum); | |
338 iterativeNorm(field[i], UNORM_NFKC, options, out, -1); | |
339 pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", f
ieldNum); | |
340 } | |
341 | |
342 Normalizer::normalize(field[i], UNORM_NFKD, options, out, status); | |
343 if (U_FAILURE(status)) { | |
344 dataerrln("Error running normalize UNORM_NFKD: %s", u_errorName(stat
us)); | |
345 } else { | |
346 pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", field
Num); | |
347 iterativeNorm(field[i], UNORM_NFKD, options, out, +1); | |
348 pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", f
ieldNum); | |
349 iterativeNorm(field[i], UNORM_NFKD, options, out, -1); | |
350 pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", f
ieldNum); | |
351 } | |
352 } | |
353 compare(field[1],field[2]); | |
354 compare(field[0],field[1]); | |
355 // test quick checks | |
356 if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, options, status))
{ | |
357 errln("Normalizer error: quickCheck(NFC(s), UNORM_NFC) is UNORM_NO"); | |
358 pass = FALSE; | |
359 } | |
360 if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, options, status))
{ | |
361 errln("Normalizer error: quickCheck(NFD(s), UNORM_NFD) is UNORM_NO"); | |
362 pass = FALSE; | |
363 } | |
364 if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, options, status)
) { | |
365 errln("Normalizer error: quickCheck(NFKC(s), UNORM_NFKC) is UNORM_NO"); | |
366 pass = FALSE; | |
367 } | |
368 if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, options, status)
) { | |
369 errln("Normalizer error: quickCheck(NFKD(s), UNORM_NFKD) is UNORM_NO"); | |
370 pass = FALSE; | |
371 } | |
372 | |
373 // branch on options==0 for better code coverage | |
374 if(options==0) { | |
375 result = Normalizer::isNormalized(field[1], UNORM_NFC, status); | |
376 } else { | |
377 result = Normalizer::isNormalized(field[1], UNORM_NFC, options, status); | |
378 } | |
379 if(!result) { | |
380 dataerrln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE"); | |
381 pass = FALSE; | |
382 } | |
383 if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, optio
ns, status)) { | |
384 errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE"); | |
385 pass = FALSE; | |
386 } | |
387 if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) { | |
388 dataerrln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE"
); | |
389 pass = FALSE; | |
390 } | |
391 if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, opti
ons, status)) { | |
392 errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE"); | |
393 pass = FALSE; | |
394 } | |
395 | |
396 // test FCD quick check and "makeFCD" | |
397 Normalizer::normalize(field[0], UNORM_FCD, options, fcd, status); | |
398 if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, options, status)) { | |
399 errln("Normalizer error: quickCheck(FCD(s), UNORM_FCD) is UNORM_NO"); | |
400 pass = FALSE; | |
401 } | |
402 if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, options, status))
{ | |
403 errln("Normalizer error: quickCheck(NFD(s), UNORM_FCD) is UNORM_NO"); | |
404 pass = FALSE; | |
405 } | |
406 if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, options, status))
{ | |
407 errln("Normalizer error: quickCheck(NFKD(s), UNORM_FCD) is UNORM_NO"); | |
408 pass = FALSE; | |
409 } | |
410 | |
411 Normalizer::normalize(fcd, UNORM_NFD, options, out, status); | |
412 if(out != field[2]) { | |
413 dataerrln("Normalizer error: NFD(FCD(s))!=NFD(s)"); | |
414 pass = FALSE; | |
415 } | |
416 | |
417 if (U_FAILURE(status)) { | |
418 dataerrln("Normalizer::normalize returned error status: %s", u_errorName
(status)); | |
419 pass = FALSE; | |
420 } | |
421 | |
422 if(field[0]!=field[2]) { | |
423 // two strings that are canonically equivalent must test | |
424 // equal under a canonical caseless match | |
425 // see UAX #21 Case Mappings and Jitterbug 2021 and | |
426 // Unicode Technical Committee meeting consensus 92-C31 | |
427 int32_t rc; | |
428 | |
429 status=U_ZERO_ERROR; | |
430 rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_
OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status); | |
431 if(U_FAILURE(status)) { | |
432 dataerrln("Normalizer::compare(case-insensitive) sets %s", u_errorNa
me(status)); | |
433 pass=FALSE; | |
434 } else if(rc!=0) { | |
435 errln("Normalizer::compare(original, NFD, case-insensitive) returned
%d instead of 0 for equal", rc); | |
436 pass=FALSE; | |
437 } | |
438 } | |
439 | |
440 if (!pass) { | |
441 dataerrln("FAIL: %s", line); | |
442 } | |
443 return pass; | |
444 } | |
445 | |
446 /** | |
447 * Do a normalization using the iterative API in the given direction. | |
448 * @param dir either +1 or -1 | |
449 */ | |
450 void NormalizerConformanceTest::iterativeNorm(const UnicodeString& str, | |
451 UNormalizationMode mode, int32_t o
ptions, | |
452 UnicodeString& result, | |
453 int8_t dir) { | |
454 UErrorCode status = U_ZERO_ERROR; | |
455 normalizer.setText(str, status); | |
456 normalizer.setMode(mode); | |
457 normalizer.setOption(-1, 0); // reset all options | |
458 normalizer.setOption(options, 1); // set desired options | |
459 result.truncate(0); | |
460 if (U_FAILURE(status)) { | |
461 return; | |
462 } | |
463 UChar32 ch; | |
464 if (dir > 0) { | |
465 for (ch = normalizer.first(); ch != Normalizer::DONE; | |
466 ch = normalizer.next()) { | |
467 result.append(ch); | |
468 } | |
469 } else { | |
470 for (ch = normalizer.last(); ch != Normalizer::DONE; | |
471 ch = normalizer.previous()) { | |
472 result.insert(0, ch); | |
473 } | |
474 } | |
475 } | |
476 | |
477 /** | |
478 * @param op name of normalization form, e.g., "KC" | |
479 * @param s string being normalized | |
480 * @param got value received | |
481 * @param exp expected value | |
482 * @param msg description of this test | |
483 * @param return true if got == exp | |
484 */ | |
485 UBool NormalizerConformanceTest::assertEqual(const char *op, | |
486 const UnicodeString& s, | |
487 const UnicodeString& got, | |
488 const UnicodeString& exp, | |
489 const char *msg, | |
490 int32_t field) | |
491 { | |
492 if (exp == got) | |
493 return TRUE; | |
494 | |
495 char *sChars, *gotChars, *expChars; | |
496 UnicodeString sPretty(prettify(s)); | |
497 UnicodeString gotPretty(prettify(got)); | |
498 UnicodeString expPretty(prettify(exp)); | |
499 | |
500 sChars = new char[sPretty.length() + 1]; | |
501 gotChars = new char[gotPretty.length() + 1]; | |
502 expChars = new char[expPretty.length() + 1]; | |
503 | |
504 sPretty.extract(0, sPretty.length(), sChars, sPretty.length() + 1); | |
505 sChars[sPretty.length()] = 0; | |
506 gotPretty.extract(0, gotPretty.length(), gotChars, gotPretty.length() + 1); | |
507 gotChars[gotPretty.length()] = 0; | |
508 expPretty.extract(0, expPretty.length(), expChars, expPretty.length() + 1); | |
509 expChars[expPretty.length()] = 0; | |
510 | |
511 errln(" %s%d)%s(%s)=%s, exp. %s", msg, field, op, sChars, gotChars, expCh
ars); | |
512 | |
513 delete []sChars; | |
514 delete []gotChars; | |
515 delete []expChars; | |
516 return FALSE; | |
517 } | |
518 | |
519 /** | |
520 * Split a string into pieces based on the given delimiter | |
521 * character. Then, parse the resultant fields from hex into | |
522 * characters. That is, "0040 0400;0C00;0899" -> new String[] { | |
523 * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to | |
524 * be of the proper length already, and exactly output.length | |
525 * fields are parsed. If there are too few an exception is | |
526 * thrown. If there are too many the extras are ignored. | |
527 * | |
528 * @return FALSE upon failure | |
529 */ | |
530 UBool NormalizerConformanceTest::hexsplit(const char *s, char delimiter, | |
531 UnicodeString output[], int32_t output
Length) { | |
532 const char *t = s; | |
533 char *end = NULL; | |
534 UChar32 c; | |
535 int32_t i; | |
536 for (i=0; i<outputLength; ++i) { | |
537 // skip whitespace | |
538 while(*t == ' ' || *t == '\t') { | |
539 ++t; | |
540 } | |
541 | |
542 // read a sequence of code points | |
543 output[i].remove(); | |
544 for(;;) { | |
545 c = (UChar32)uprv_strtoul(t, &end, 16); | |
546 | |
547 if( (char *)t == end || | |
548 (uint32_t)c > 0x10ffff || | |
549 (*end != ' ' && *end != '\t' && *end != delimiter) | |
550 ) { | |
551 errln(UnicodeString("Bad field ", "") + (i + 1) + " in " + Unico
deString(s, "")); | |
552 return FALSE; | |
553 } | |
554 | |
555 output[i].append(c); | |
556 | |
557 t = (const char *)end; | |
558 | |
559 // skip whitespace | |
560 while(*t == ' ' || *t == '\t') { | |
561 ++t; | |
562 } | |
563 | |
564 if(*t == delimiter) { | |
565 ++t; | |
566 break; | |
567 } | |
568 if(*t == 0) { | |
569 if((i + 1) == outputLength) { | |
570 return TRUE; | |
571 } else { | |
572 errln(UnicodeString("Missing field(s) in ", "") + s + " only
" + (i + 1) + " out of " + outputLength); | |
573 return FALSE; | |
574 } | |
575 } | |
576 } | |
577 } | |
578 return TRUE; | |
579 } | |
580 | |
581 // Specific tests for debugging. These are generally failures taken from | |
582 // the conformance file, but culled out to make debugging easier. | |
583 | |
584 void NormalizerConformanceTest::TestCase6(void) { | |
585 _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); | |
586 } | |
587 | |
588 void NormalizerConformanceTest::_testOneLine(const char *line) { | |
589 UErrorCode status = U_ZERO_ERROR; | |
590 UnicodeString fields[FIELD_COUNT]; | |
591 if (!hexsplit(line, ';', fields, FIELD_COUNT)) { | |
592 errln((UnicodeString)"Unable to parse line " + line); | |
593 } else { | |
594 checkConformance(fields, line, 0, status); | |
595 } | |
596 } | |
597 | |
598 #endif /* #if !UCONFIG_NO_NORMALIZATION */ | |
OLD | NEW |