OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 1998-2012, International Business Machines Corporation and | 3 * Copyright (c) 1998-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 /* | 6 /* |
7 * File utf8tst.c | 7 * File utf8tst.c |
8 * | 8 * |
9 * Modification History: | 9 * Modification History: |
10 * | 10 * |
11 * Date Name Description | 11 * Date Name Description |
12 * 07/24/2000 Madhu Creation | 12 * 07/24/2000 Madhu Creation |
13 ******************************************************************************* | 13 ******************************************************************************* |
14 */ | 14 */ |
15 | 15 |
16 #include "unicode/utypes.h" | 16 #include "unicode/utypes.h" |
17 #include "unicode/utf8.h" | 17 #include "unicode/utf8.h" |
18 #include "cmemory.h" | 18 #include "cmemory.h" |
19 #include "cintltst.h" | 19 #include "cintltst.h" |
20 | 20 |
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
22 | |
23 /* lenient UTF-8 ------------------------------------------------------------ */ | 21 /* lenient UTF-8 ------------------------------------------------------------ */ |
24 | 22 |
25 /* | 23 /* |
26 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate | 24 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate |
27 * code points with their "natural" encoding. | 25 * code points with their "natural" encoding. |
28 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of | 26 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of |
29 * single surrogates. | 27 * single surrogates. |
30 * | 28 * |
31 * This is not conformant with UTF-8. | 29 * This is not conformant with UTF-8. |
32 * | 30 * |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
94 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar"); | 92 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar"); |
95 addTest(root, &TestAppend, "utf8tst/TestAppend"); | 93 addTest(root, &TestAppend, "utf8tst/TestAppend"); |
96 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates"); | 94 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates"); |
97 } | 95 } |
98 | 96 |
99 static void TestCodeUnitValues() | 97 static void TestCodeUnitValues() |
100 { | 98 { |
101 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0
xfd, 0x80, 0x81, 0xbc, 0xbe,}; | 99 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0
xfd, 0x80, 0x81, 0xbc, 0xbe,}; |
102 | 100 |
103 int16_t i; | 101 int16_t i; |
104 for(i=0; i<LENGTHOF(codeunit); i++){ | 102 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){ |
105 uint8_t c=codeunit[i]; | 103 uint8_t c=codeunit[i]; |
106 log_verbose("Testing code unit value of %x\n", c); | 104 log_verbose("Testing code unit value of %x\n", c); |
107 if(i<4){ | 105 if(i<4){ |
108 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_
IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){ | 106 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_
IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){ |
109 log_err("ERROR: 0x%02x is a single byte but results in single: %
c lead: %c trail: %c\n", | 107 log_err("ERROR: 0x%02x is a single byte but results in single: %
c lead: %c trail: %c\n", |
110 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n
', UTF8_IS_TRAIL(c) ? 'y' : 'n'); | 108 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n
', UTF8_IS_TRAIL(c) ? 'y' : 'n'); |
111 } | 109 } |
112 } else if(i< 8){ | 110 } else if(i< 8){ |
113 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_
IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){ | 111 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_
IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){ |
114 log_err("ERROR: 0x%02x is a lead byte but results in single: %c
lead: %c trail: %c\n", | 112 log_err("ERROR: 0x%02x is a lead byte but results in single: %c
lead: %c trail: %c\n", |
(...skipping 22 matching lines...) Expand all Loading... |
137 4, 0x24506, | 135 4, 0x24506, |
138 4, 0x20402, | 136 4, 0x20402, |
139 4, 0x10402, | 137 4, 0x10402, |
140 3, 0xd7ff, | 138 3, 0xd7ff, |
141 3, 0xe000, | 139 3, 0xe000, |
142 | 140 |
143 }; | 141 }; |
144 | 142 |
145 int16_t i; | 143 int16_t i; |
146 UBool multiple; | 144 UBool multiple; |
147 for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){ | 145 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){ |
148 UChar32 c=codepoint[i+1]; | 146 UChar32 c=codepoint[i+1]; |
149 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uin
t16_t)codepoint[i]){ | 147 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uin
t16_t)codepoint[i]){ |
150 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n",
c, codepoint[i], UTF8_CHAR_LENGTH(c)); | 148 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n",
c, codepoint[i], UTF8_CHAR_LENGTH(c)); |
151 }else{ | 149 }else{ |
152 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L
ENGTH(c)); | 150 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L
ENGTH(c)); |
153 } | 151 } |
154 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); | 152 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); |
155 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ | 153 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ |
156 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); | 154 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); |
157 } | 155 } |
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
535 } | 533 } |
536 } | 534 } |
537 for(i=0, offset=0; offset<sizeof(input); ++i) { | 535 for(i=0, offset=0; offset<sizeof(input); ++i) { |
538 U8_NEXT_UNSAFE(input, offset, c); | 536 U8_NEXT_UNSAFE(input, offset, c); |
539 if(c != codePoints[i]){ | 537 if(c != codePoints[i]){ |
540 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G
ot:%lx\n", | 538 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G
ot:%lx\n", |
541 offset, codePoints[i], c); | 539 offset, codePoints[i], c); |
542 } | 540 } |
543 } | 541 } |
544 | 542 |
545 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ | 543 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ |
546 UTF8_PREV_CHAR_UNSAFE(input, offset, c); | 544 UTF8_PREV_CHAR_UNSAFE(input, offset, c); |
547 if(c != codePoints[i]){ | 545 if(c != codePoints[i]){ |
548 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect
ed:%lx Got:%lx\n", | 546 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect
ed:%lx Got:%lx\n", |
549 offset, codePoints[i], c); | 547 offset, codePoints[i], c); |
550 } | 548 } |
551 } | 549 } |
552 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ | 550 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ |
553 U8_PREV_UNSAFE(input, offset, c); | 551 U8_PREV_UNSAFE(input, offset, c); |
554 if(c != codePoints[i]){ | 552 if(c != codePoints[i]){ |
555 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx
Got:%lx\n", | 553 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx
Got:%lx\n", |
556 offset, codePoints[i], c); | 554 offset, codePoints[i], c); |
557 } | 555 } |
558 } | 556 } |
559 } | 557 } |
560 | 558 |
561 static void TestFwdBack() { | 559 static void TestFwdBack() { |
562 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0
, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00}; | 560 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0
, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00}; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
601 offsafe=sizeof(input); | 599 offsafe=sizeof(input); |
602 while(offsafe > 0){ | 600 while(offsafe > 0){ |
603 U8_BACK_1(input, 0, offsafe); | 601 U8_BACK_1(input, 0, offsafe); |
604 if(offsafe != back_safe[i]){ | 602 if(offsafe != back_safe[i]){ |
605 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i
], offsafe); | 603 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i
], offsafe); |
606 } | 604 } |
607 i++; | 605 i++; |
608 } | 606 } |
609 | 607 |
610 offsafe=0; | 608 offsafe=0; |
611 for(i=0; i<LENGTHOF(Nvalue); i++){ | 609 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
612 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]); | 610 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]); |
613 if(offsafe != fwd_N_safe[i]){ | 611 if(offsafe != fwd_N_safe[i]){ |
614 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i,
fwd_N_safe[i], offsafe); | 612 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i,
fwd_N_safe[i], offsafe); |
615 } | 613 } |
616 | 614 |
617 } | 615 } |
618 | 616 |
619 offsafe=0; | 617 offsafe=0; |
620 for(i=0; i<LENGTHOF(Nvalue); i++){ | 618 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
621 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]); | 619 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]); |
622 if(offsafe != fwd_N_safe[i]){ | 620 if(offsafe != fwd_N_safe[i]){ |
623 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_
safe[i], offsafe); | 621 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_
safe[i], offsafe); |
624 } | 622 } |
625 | 623 |
626 } | 624 } |
627 | 625 |
628 offsafe=sizeof(input); | 626 offsafe=sizeof(input); |
629 for(i=0; i<LENGTHOF(Nvalue); i++){ | 627 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
630 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); | 628 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); |
631 if(offsafe != back_N_safe[i]){ | 629 if(offsafe != back_N_safe[i]){ |
632 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i
, back_N_safe[i], offsafe); | 630 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i
, back_N_safe[i], offsafe); |
633 } | 631 } |
634 } | 632 } |
635 | 633 |
636 offsafe=sizeof(input); | 634 offsafe=sizeof(input); |
637 for(i=0; i<LENGTHOF(Nvalue); i++){ | 635 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
638 U8_BACK_N(input, 0, offsafe, Nvalue[i]); | 636 U8_BACK_N(input, 0, offsafe, Nvalue[i]); |
639 if(offsafe != back_N_safe[i]){ | 637 if(offsafe != back_N_safe[i]){ |
640 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back
_N_safe[i], offsafe); | 638 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back
_N_safe[i], offsafe); |
641 } | 639 } |
642 } | 640 } |
643 } | 641 } |
644 | 642 |
645 static void TestFwdBackUnsafe() { | 643 static void TestFwdBackUnsafe() { |
646 /* | 644 /* |
647 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries
. | 645 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries
. |
648 * The behavior of _UNSAFE macros for ill-formed strings is undefined. | 646 * The behavior of _UNSAFE macros for ill-formed strings is undefined. |
649 */ | 647 */ |
650 static const uint8_t input[]={ | 648 static const uint8_t input[]={ |
651 0x61, | 649 0x61, |
652 0xf0, 0x90, 0x90, 0x81, | 650 0xf0, 0x90, 0x90, 0x81, |
653 0xc0, 0x80, /* non-shortest form */ | 651 0xc0, 0x80, /* non-shortest form */ |
654 0xe2, 0x82, 0xac, | 652 0xe2, 0x82, 0xac, |
655 0xc2, 0xa1, | 653 0xc2, 0xa1, |
656 0xf4, 0x8f, 0xbf, 0xbf, | 654 0xf4, 0x8f, 0xbf, 0xbf, |
657 0x00 | 655 0x00 |
658 }; | 656 }; |
659 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 }; | 657 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 }; |
660 | 658 |
661 int32_t offset; | 659 int32_t offset; |
662 int32_t i; | 660 int32_t i; |
663 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { | 661 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) { |
664 UTF8_FWD_1_UNSAFE(input, offset); | 662 UTF8_FWD_1_UNSAFE(input, offset); |
665 if(offset != boundaries[i]){ | 663 if(offset != boundaries[i]){ |
666 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); | 664 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); |
667 } | 665 } |
668 } | 666 } |
669 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { | 667 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) { |
670 U8_FWD_1_UNSAFE(input, offset); | 668 U8_FWD_1_UNSAFE(input, offset); |
671 if(offset != boundaries[i]){ | 669 if(offset != boundaries[i]){ |
672 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); | 670 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); |
673 } | 671 } |
674 } | 672 } |
675 | 673 |
676 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { | 674 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --
i) { |
677 UTF8_BACK_1_UNSAFE(input, offset); | 675 UTF8_BACK_1_UNSAFE(input, offset); |
678 if(offset != boundaries[i]){ | 676 if(offset != boundaries[i]){ |
679 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[i], offset); | 677 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[i], offset); |
680 } | 678 } |
681 } | 679 } |
682 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { | 680 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --
i) { |
683 U8_BACK_1_UNSAFE(input, offset); | 681 U8_BACK_1_UNSAFE(input, offset); |
684 if(offset != boundaries[i]){ | 682 if(offset != boundaries[i]){ |
685 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun
daries[i], offset); | 683 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun
daries[i], offset); |
686 } | 684 } |
687 } | 685 } |
688 | 686 |
689 for(i=0; i<LENGTHOF(boundaries); ++i) { | 687 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
690 offset=0; | 688 offset=0; |
691 UTF8_FWD_N_UNSAFE(input, offset, i); | 689 UTF8_FWD_N_UNSAFE(input, offset, i); |
692 if(offset != boundaries[i]) { | 690 if(offset != boundaries[i]) { |
693 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); | 691 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); |
694 } | 692 } |
695 } | 693 } |
696 for(i=0; i<LENGTHOF(boundaries); ++i) { | 694 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
697 offset=0; | 695 offset=0; |
698 U8_FWD_N_UNSAFE(input, offset, i); | 696 U8_FWD_N_UNSAFE(input, offset, i); |
699 if(offset != boundaries[i]) { | 697 if(offset != boundaries[i]) { |
700 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); | 698 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); |
701 } | 699 } |
702 } | 700 } |
703 | 701 |
704 for(i=0; i<LENGTHOF(boundaries); ++i) { | 702 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
705 int32_t j=LENGTHOF(boundaries)-1-i; | 703 int32_t j=UPRV_LENGTHOF(boundaries)-1-i; |
706 offset=LENGTHOF(input); | 704 offset=UPRV_LENGTHOF(input); |
707 UTF8_BACK_N_UNSAFE(input, offset, i); | 705 UTF8_BACK_N_UNSAFE(input, offset, i); |
708 if(offset != boundaries[j]) { | 706 if(offset != boundaries[j]) { |
709 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[j], offset); | 707 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[j], offset); |
710 } | 708 } |
711 } | 709 } |
712 for(i=0; i<LENGTHOF(boundaries); ++i) { | 710 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
713 int32_t j=LENGTHOF(boundaries)-1-i; | 711 int32_t j=UPRV_LENGTHOF(boundaries)-1-i; |
714 offset=LENGTHOF(input); | 712 offset=UPRV_LENGTHOF(input); |
715 U8_BACK_N_UNSAFE(input, offset, i); | 713 U8_BACK_N_UNSAFE(input, offset, i); |
716 if(offset != boundaries[j]) { | 714 if(offset != boundaries[j]) { |
717 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun
daries[j], offset); | 715 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun
daries[j], offset); |
718 } | 716 } |
719 } | 717 } |
720 } | 718 } |
721 | 719 |
722 static void TestSetChar() { | 720 static void TestSetChar() { |
723 static const uint8_t input[] | 721 static const uint8_t input[] |
724 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x00 }; | 722 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x00 }; |
725 static const int16_t start_safe[] | 723 static const int16_t start_safe[] |
726 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; | 724 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; |
727 static const int16_t limit_safe[] | 725 static const int16_t limit_safe[] |
728 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; | 726 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; |
729 | 727 |
730 uint32_t i=0; | 728 uint32_t i=0; |
731 int32_t offset=0, setOffset=0; | 729 int32_t offset=0, setOffset=0; |
732 for(offset=0; offset<=LENGTHOF(input); offset++){ | 730 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){ |
733 if (offset<LENGTHOF(input)){ | 731 if (offset<UPRV_LENGTHOF(input)){ |
734 setOffset=offset; | 732 setOffset=offset; |
735 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset); | 733 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset); |
736 if(setOffset != start_safe[i]){ | 734 if(setOffset != start_safe[i]){ |
737 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld.
Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); | 735 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld.
Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); |
738 } | 736 } |
739 | 737 |
740 setOffset=offset; | 738 setOffset=offset; |
741 U8_SET_CP_START(input, 0, setOffset); | 739 U8_SET_CP_START(input, 0, setOffset); |
742 if(setOffset != start_safe[i]){ | 740 if(setOffset != start_safe[i]){ |
743 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:
%ld Got:%ld\n", offset, start_safe[i], setOffset); | 741 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:
%ld Got:%ld\n", offset, start_safe[i], setOffset); |
(...skipping 19 matching lines...) Expand all Loading... |
763 static void TestSetCharUnsafe() { | 761 static void TestSetCharUnsafe() { |
764 static const uint8_t input[] | 762 static const uint8_t input[] |
765 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x80, 0x80, 0x00 }; | 763 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x80, 0x80, 0x00 }; |
766 static const int16_t start_unsafe[] | 764 static const int16_t start_unsafe[] |
767 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9,
12, 12, 12, 15 }; | 765 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9,
12, 12, 12, 15 }; |
768 static const int16_t limit_unsafe[] | 766 static const int16_t limit_unsafe[] |
769 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10,
10, 15, 15, 15, 16 }; | 767 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10,
10, 15, 15, 15, 16 }; |
770 | 768 |
771 uint32_t i=0; | 769 uint32_t i=0; |
772 int32_t offset=0, setOffset=0; | 770 int32_t offset=0, setOffset=0; |
773 for(offset=0; offset<=LENGTHOF(input); offset++){ | 771 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){ |
774 if (offset<LENGTHOF(input)){ | 772 if (offset<UPRV_LENGTHOF(input)){ |
775 setOffset=offset; | 773 setOffset=offset; |
776 UTF8_SET_CHAR_START_UNSAFE(input, setOffset); | 774 UTF8_SET_CHAR_START_UNSAFE(input, setOffset); |
777 if(setOffset != start_unsafe[i]){ | 775 if(setOffset != start_unsafe[i]){ |
778 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld
. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); | 776 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld
. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); |
779 } | 777 } |
780 | 778 |
781 setOffset=offset; | 779 setOffset=offset; |
782 U8_SET_CP_START_UNSAFE(input, setOffset); | 780 U8_SET_CP_START_UNSAFE(input, setOffset); |
783 if(setOffset != start_unsafe[i]){ | 781 if(setOffset != start_unsafe[i]){ |
784 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex
pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); | 782 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex
pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
904 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, | 902 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, |
905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, | 903 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, |
906 | 904 |
907 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, | 905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, |
908 | 906 |
909 }; | 907 }; |
910 uint16_t i, count=0; | 908 uint16_t i, count=0; |
911 uint8_t str[12]; | 909 uint8_t str[12]; |
912 uint32_t offset; | 910 uint32_t offset; |
913 /* UChar32 c=0;*/ | 911 /* UChar32 c=0;*/ |
914 uint16_t size=LENGTHOF(s); | 912 uint16_t size=UPRV_LENGTHOF(s); |
915 for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){ | 913 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){ |
916 uprv_memcpy(str, s, size); | 914 uprv_memcpy(str, s, size); |
917 offset=test[i]; | 915 offset=test[i]; |
918 if(count<13){ | 916 if(count<13){ |
919 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); | 917 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); |
920 if(offset != movedOffset[count]){ | 918 if(offset != movedOffset[count]){ |
921 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse
t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", | 919 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse
t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", |
922 count, movedOffset[count], offset); | 920 count, movedOffset[count], offset); |
923 | 921 |
924 } | 922 } |
925 if(uprv_memcmp(str, result[count], size) !=0){ | 923 if(uprv_memcmp(str, result[count], size) !=0){ |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
996 /* none from this line */ | 994 /* none from this line */ |
997 0, 0xd0, 0x80 | 995 0, 0xd0, 0x80 |
998 }; | 996 }; |
999 | 997 |
1000 uint8_t buffer[100]; | 998 uint8_t buffer[100]; |
1001 UChar32 c; | 999 UChar32 c; |
1002 int32_t i, length; | 1000 int32_t i, length; |
1003 UBool isError, expectIsError, wrongIsError; | 1001 UBool isError, expectIsError, wrongIsError; |
1004 | 1002 |
1005 length=0; | 1003 length=0; |
1006 for(i=0; i<LENGTHOF(codePoints); ++i) { | 1004 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { |
1007 c=codePoints[i]; | 1005 c=codePoints[i]; |
1008 if(c<0 || 0x10ffff<c) { | 1006 if(c<0 || 0x10ffff<c) { |
1009 continue; /* skip non-code points for U8_APPEND_UNSAFE */ | 1007 continue; /* skip non-code points for U8_APPEND_UNSAFE */ |
1010 } | 1008 } |
1011 | 1009 |
1012 U8_APPEND_UNSAFE(buffer, length, c); | 1010 U8_APPEND_UNSAFE(buffer, length, c); |
1013 } | 1011 } |
1014 if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)
) { | 1012 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, le
ngth)) { |
1015 log_err("U8_APPEND_UNSAFE did not generate the expected output\n"); | 1013 log_err("U8_APPEND_UNSAFE did not generate the expected output\n"); |
1016 } | 1014 } |
1017 | 1015 |
1018 length=0; | 1016 length=0; |
1019 wrongIsError=FALSE; | 1017 wrongIsError=FALSE; |
1020 for(i=0; i<LENGTHOF(codePoints); ++i) { | 1018 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { |
1021 c=codePoints[i]; | 1019 c=codePoints[i]; |
1022 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); | 1020 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); |
1023 isError=FALSE; | 1021 isError=FALSE; |
1024 | 1022 |
1025 U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError); | 1023 U8_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError); |
1026 wrongIsError|= isError!=expectIsError; | 1024 wrongIsError|= isError!=expectIsError; |
1027 } | 1025 } |
1028 if(wrongIsError) { | 1026 if(wrongIsError) { |
1029 log_err("U8_APPEND did not set isError correctly\n"); | 1027 log_err("U8_APPEND did not set isError correctly\n"); |
1030 } | 1028 } |
1031 if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) { | 1029 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length
)) { |
1032 log_err("U8_APPEND did not generate the expected output\n"); | 1030 log_err("U8_APPEND did not generate the expected output\n"); |
1033 } | 1031 } |
1034 } | 1032 } |
1035 | 1033 |
1036 static void | 1034 static void |
1037 TestSurrogates() { | 1035 TestSurrogates() { |
1038 static const uint8_t b[]={ | 1036 static const uint8_t b[]={ |
1039 0xc3, 0x9f, /* 00DF */ | 1037 0xc3, 0x9f, /* 00DF */ |
1040 0xed, 0x9f, 0xbf, /* D7FF */ | 1038 0xed, 0x9f, 0xbf, /* D7FF */ |
1041 0xed, 0xa0, 0x81, /* D801 */ | 1039 0xed, 0xa0, 0x81, /* D801 */ |
1042 0xed, 0xbf, 0xbe, /* DFFE */ | 1040 0xed, 0xbf, 0xbe, /* DFFE */ |
1043 0xee, 0x80, 0x80, /* E000 */ | 1041 0xee, 0x80, 0x80, /* E000 */ |
1044 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ | 1042 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ |
1045 }; | 1043 }; |
1046 static const UChar32 cp[]={ | 1044 static const UChar32 cp[]={ |
1047 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe | 1045 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe |
1048 }; | 1046 }; |
1049 | 1047 |
1050 UChar32 cu, cs, cl; | 1048 UChar32 cu, cs, cl; |
1051 int32_t i, j, k, iu, is, il, length; | 1049 int32_t i, j, k, iu, is, il, length; |
1052 | 1050 |
1053 k=0; /* index into cp[] */ | 1051 k=0; /* index into cp[] */ |
1054 length=LENGTHOF(b); | 1052 length=UPRV_LENGTHOF(b); |
1055 for(i=0; i<length;) { | 1053 for(i=0; i<length;) { |
1056 j=i; | 1054 j=i; |
1057 U8_NEXT_UNSAFE(b, j, cu); | 1055 U8_NEXT_UNSAFE(b, j, cu); |
1058 iu=j; | 1056 iu=j; |
1059 | 1057 |
1060 j=i; | 1058 j=i; |
1061 U8_NEXT(b, j, length, cs); | 1059 U8_NEXT(b, j, length, cs); |
1062 is=j; | 1060 is=j; |
1063 | 1061 |
1064 j=i; | 1062 j=i; |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1123 i=iu; /* go back by one UTF-8 sequence */ | 1121 i=iu; /* go back by one UTF-8 sequence */ |
1124 } | 1122 } |
1125 } | 1123 } |
1126 | 1124 |
1127 static void printUChars(const uint8_t *uchars, int16_t len){ | 1125 static void printUChars(const uint8_t *uchars, int16_t len){ |
1128 int16_t i=0; | 1126 int16_t i=0; |
1129 for(i=0; i<len; i++){ | 1127 for(i=0; i<len; i++){ |
1130 log_err("0x%02x ", *(uchars+i)); | 1128 log_err("0x%02x ", *(uchars+i)); |
1131 } | 1129 } |
1132 } | 1130 } |
OLD | NEW |