| OLD | NEW |
| 1 /******************************************************************** | 1 /******************************************************************** |
| 2 * COPYRIGHT: | 2 * COPYRIGHT: |
| 3 * Copyright (c) 1998-2012, International Business Machines Corporation and | 3 * Copyright (c) 1998-2014, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ | 5 ********************************************************************/ |
| 6 /* | 6 /* |
| 7 * File utf8tst.c | 7 * File utf8tst.c |
| 8 * | 8 * |
| 9 * Modification History: | 9 * Modification History: |
| 10 * | 10 * |
| 11 * Date Name Description | 11 * Date Name Description |
| 12 * 07/24/2000 Madhu Creation | 12 * 07/24/2000 Madhu Creation |
| 13 ******************************************************************************* | 13 ******************************************************************************* |
| 14 */ | 14 */ |
| 15 | 15 |
| 16 #include "unicode/utypes.h" | 16 #include "unicode/utypes.h" |
| 17 #include "unicode/utf8.h" | 17 #include "unicode/utf8.h" |
| 18 #include "cmemory.h" | 18 #include "cmemory.h" |
| 19 #include "cintltst.h" | 19 #include "cintltst.h" |
| 20 | 20 |
| 21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 22 | |
| 23 /* lenient UTF-8 ------------------------------------------------------------ */ | 21 /* lenient UTF-8 ------------------------------------------------------------ */ |
| 24 | 22 |
| 25 /* | 23 /* |
| 26 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate | 24 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate |
| 27 * code points with their "natural" encoding. | 25 * code points with their "natural" encoding. |
| 28 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of | 26 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of |
| 29 * single surrogates. | 27 * single surrogates. |
| 30 * | 28 * |
| 31 * This is not conformant with UTF-8. | 29 * This is not conformant with UTF-8. |
| 32 * | 30 * |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 94 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar"); | 92 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar"); |
| 95 addTest(root, &TestAppend, "utf8tst/TestAppend"); | 93 addTest(root, &TestAppend, "utf8tst/TestAppend"); |
| 96 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates"); | 94 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates"); |
| 97 } | 95 } |
| 98 | 96 |
| 99 static void TestCodeUnitValues() | 97 static void TestCodeUnitValues() |
| 100 { | 98 { |
| 101 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0
xfd, 0x80, 0x81, 0xbc, 0xbe,}; | 99 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0
xfd, 0x80, 0x81, 0xbc, 0xbe,}; |
| 102 | 100 |
| 103 int16_t i; | 101 int16_t i; |
| 104 for(i=0; i<LENGTHOF(codeunit); i++){ | 102 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){ |
| 105 uint8_t c=codeunit[i]; | 103 uint8_t c=codeunit[i]; |
| 106 log_verbose("Testing code unit value of %x\n", c); | 104 log_verbose("Testing code unit value of %x\n", c); |
| 107 if(i<4){ | 105 if(i<4){ |
| 108 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_
IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){ | 106 if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_
IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){ |
| 109 log_err("ERROR: 0x%02x is a single byte but results in single: %
c lead: %c trail: %c\n", | 107 log_err("ERROR: 0x%02x is a single byte but results in single: %
c lead: %c trail: %c\n", |
| 110 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n
', UTF8_IS_TRAIL(c) ? 'y' : 'n'); | 108 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n
', UTF8_IS_TRAIL(c) ? 'y' : 'n'); |
| 111 } | 109 } |
| 112 } else if(i< 8){ | 110 } else if(i< 8){ |
| 113 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_
IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){ | 111 if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_
IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){ |
| 114 log_err("ERROR: 0x%02x is a lead byte but results in single: %c
lead: %c trail: %c\n", | 112 log_err("ERROR: 0x%02x is a lead byte but results in single: %c
lead: %c trail: %c\n", |
| (...skipping 22 matching lines...) Expand all Loading... |
| 137 4, 0x24506, | 135 4, 0x24506, |
| 138 4, 0x20402, | 136 4, 0x20402, |
| 139 4, 0x10402, | 137 4, 0x10402, |
| 140 3, 0xd7ff, | 138 3, 0xd7ff, |
| 141 3, 0xe000, | 139 3, 0xe000, |
| 142 | 140 |
| 143 }; | 141 }; |
| 144 | 142 |
| 145 int16_t i; | 143 int16_t i; |
| 146 UBool multiple; | 144 UBool multiple; |
| 147 for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){ | 145 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){ |
| 148 UChar32 c=codepoint[i+1]; | 146 UChar32 c=codepoint[i+1]; |
| 149 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uin
t16_t)codepoint[i]){ | 147 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uin
t16_t)codepoint[i]){ |
| 150 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n",
c, codepoint[i], UTF8_CHAR_LENGTH(c)); | 148 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n",
c, codepoint[i], UTF8_CHAR_LENGTH(c)); |
| 151 }else{ | 149 }else{ |
| 152 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L
ENGTH(c)); | 150 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L
ENGTH(c)); |
| 153 } | 151 } |
| 154 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); | 152 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); |
| 155 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ | 153 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ |
| 156 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); | 154 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); |
| 157 } | 155 } |
| (...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 535 } | 533 } |
| 536 } | 534 } |
| 537 for(i=0, offset=0; offset<sizeof(input); ++i) { | 535 for(i=0, offset=0; offset<sizeof(input); ++i) { |
| 538 U8_NEXT_UNSAFE(input, offset, c); | 536 U8_NEXT_UNSAFE(input, offset, c); |
| 539 if(c != codePoints[i]){ | 537 if(c != codePoints[i]){ |
| 540 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G
ot:%lx\n", | 538 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G
ot:%lx\n", |
| 541 offset, codePoints[i], c); | 539 offset, codePoints[i], c); |
| 542 } | 540 } |
| 543 } | 541 } |
| 544 | 542 |
| 545 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ | 543 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ |
| 546 UTF8_PREV_CHAR_UNSAFE(input, offset, c); | 544 UTF8_PREV_CHAR_UNSAFE(input, offset, c); |
| 547 if(c != codePoints[i]){ | 545 if(c != codePoints[i]){ |
| 548 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect
ed:%lx Got:%lx\n", | 546 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect
ed:%lx Got:%lx\n", |
| 549 offset, codePoints[i], c); | 547 offset, codePoints[i], c); |
| 550 } | 548 } |
| 551 } | 549 } |
| 552 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ | 550 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ |
| 553 U8_PREV_UNSAFE(input, offset, c); | 551 U8_PREV_UNSAFE(input, offset, c); |
| 554 if(c != codePoints[i]){ | 552 if(c != codePoints[i]){ |
| 555 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx
Got:%lx\n", | 553 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx
Got:%lx\n", |
| 556 offset, codePoints[i], c); | 554 offset, codePoints[i], c); |
| 557 } | 555 } |
| 558 } | 556 } |
| 559 } | 557 } |
| 560 | 558 |
| 561 static void TestFwdBack() { | 559 static void TestFwdBack() { |
| 562 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0
, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00}; | 560 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0
, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00}; |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 601 offsafe=sizeof(input); | 599 offsafe=sizeof(input); |
| 602 while(offsafe > 0){ | 600 while(offsafe > 0){ |
| 603 U8_BACK_1(input, 0, offsafe); | 601 U8_BACK_1(input, 0, offsafe); |
| 604 if(offsafe != back_safe[i]){ | 602 if(offsafe != back_safe[i]){ |
| 605 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i
], offsafe); | 603 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i
], offsafe); |
| 606 } | 604 } |
| 607 i++; | 605 i++; |
| 608 } | 606 } |
| 609 | 607 |
| 610 offsafe=0; | 608 offsafe=0; |
| 611 for(i=0; i<LENGTHOF(Nvalue); i++){ | 609 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
| 612 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]); | 610 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]); |
| 613 if(offsafe != fwd_N_safe[i]){ | 611 if(offsafe != fwd_N_safe[i]){ |
| 614 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i,
fwd_N_safe[i], offsafe); | 612 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i,
fwd_N_safe[i], offsafe); |
| 615 } | 613 } |
| 616 | 614 |
| 617 } | 615 } |
| 618 | 616 |
| 619 offsafe=0; | 617 offsafe=0; |
| 620 for(i=0; i<LENGTHOF(Nvalue); i++){ | 618 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
| 621 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]); | 619 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]); |
| 622 if(offsafe != fwd_N_safe[i]){ | 620 if(offsafe != fwd_N_safe[i]){ |
| 623 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_
safe[i], offsafe); | 621 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_
safe[i], offsafe); |
| 624 } | 622 } |
| 625 | 623 |
| 626 } | 624 } |
| 627 | 625 |
| 628 offsafe=sizeof(input); | 626 offsafe=sizeof(input); |
| 629 for(i=0; i<LENGTHOF(Nvalue); i++){ | 627 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
| 630 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); | 628 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); |
| 631 if(offsafe != back_N_safe[i]){ | 629 if(offsafe != back_N_safe[i]){ |
| 632 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i
, back_N_safe[i], offsafe); | 630 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i
, back_N_safe[i], offsafe); |
| 633 } | 631 } |
| 634 } | 632 } |
| 635 | 633 |
| 636 offsafe=sizeof(input); | 634 offsafe=sizeof(input); |
| 637 for(i=0; i<LENGTHOF(Nvalue); i++){ | 635 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){ |
| 638 U8_BACK_N(input, 0, offsafe, Nvalue[i]); | 636 U8_BACK_N(input, 0, offsafe, Nvalue[i]); |
| 639 if(offsafe != back_N_safe[i]){ | 637 if(offsafe != back_N_safe[i]){ |
| 640 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back
_N_safe[i], offsafe); | 638 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back
_N_safe[i], offsafe); |
| 641 } | 639 } |
| 642 } | 640 } |
| 643 } | 641 } |
| 644 | 642 |
| 645 static void TestFwdBackUnsafe() { | 643 static void TestFwdBackUnsafe() { |
| 646 /* | 644 /* |
| 647 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries
. | 645 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries
. |
| 648 * The behavior of _UNSAFE macros for ill-formed strings is undefined. | 646 * The behavior of _UNSAFE macros for ill-formed strings is undefined. |
| 649 */ | 647 */ |
| 650 static const uint8_t input[]={ | 648 static const uint8_t input[]={ |
| 651 0x61, | 649 0x61, |
| 652 0xf0, 0x90, 0x90, 0x81, | 650 0xf0, 0x90, 0x90, 0x81, |
| 653 0xc0, 0x80, /* non-shortest form */ | 651 0xc0, 0x80, /* non-shortest form */ |
| 654 0xe2, 0x82, 0xac, | 652 0xe2, 0x82, 0xac, |
| 655 0xc2, 0xa1, | 653 0xc2, 0xa1, |
| 656 0xf4, 0x8f, 0xbf, 0xbf, | 654 0xf4, 0x8f, 0xbf, 0xbf, |
| 657 0x00 | 655 0x00 |
| 658 }; | 656 }; |
| 659 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 }; | 657 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 }; |
| 660 | 658 |
| 661 int32_t offset; | 659 int32_t offset; |
| 662 int32_t i; | 660 int32_t i; |
| 663 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { | 661 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) { |
| 664 UTF8_FWD_1_UNSAFE(input, offset); | 662 UTF8_FWD_1_UNSAFE(input, offset); |
| 665 if(offset != boundaries[i]){ | 663 if(offset != boundaries[i]){ |
| 666 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); | 664 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); |
| 667 } | 665 } |
| 668 } | 666 } |
| 669 for(i=1, offset=0; offset<LENGTHOF(input); ++i) { | 667 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) { |
| 670 U8_FWD_1_UNSAFE(input, offset); | 668 U8_FWD_1_UNSAFE(input, offset); |
| 671 if(offset != boundaries[i]){ | 669 if(offset != boundaries[i]){ |
| 672 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); | 670 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); |
| 673 } | 671 } |
| 674 } | 672 } |
| 675 | 673 |
| 676 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { | 674 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --
i) { |
| 677 UTF8_BACK_1_UNSAFE(input, offset); | 675 UTF8_BACK_1_UNSAFE(input, offset); |
| 678 if(offset != boundaries[i]){ | 676 if(offset != boundaries[i]){ |
| 679 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[i], offset); | 677 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[i], offset); |
| 680 } | 678 } |
| 681 } | 679 } |
| 682 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) { | 680 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --
i) { |
| 683 U8_BACK_1_UNSAFE(input, offset); | 681 U8_BACK_1_UNSAFE(input, offset); |
| 684 if(offset != boundaries[i]){ | 682 if(offset != boundaries[i]){ |
| 685 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun
daries[i], offset); | 683 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun
daries[i], offset); |
| 686 } | 684 } |
| 687 } | 685 } |
| 688 | 686 |
| 689 for(i=0; i<LENGTHOF(boundaries); ++i) { | 687 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
| 690 offset=0; | 688 offset=0; |
| 691 UTF8_FWD_N_UNSAFE(input, offset, i); | 689 UTF8_FWD_N_UNSAFE(input, offset, i); |
| 692 if(offset != boundaries[i]) { | 690 if(offset != boundaries[i]) { |
| 693 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); | 691 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou
ndaries[i], offset); |
| 694 } | 692 } |
| 695 } | 693 } |
| 696 for(i=0; i<LENGTHOF(boundaries); ++i) { | 694 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
| 697 offset=0; | 695 offset=0; |
| 698 U8_FWD_N_UNSAFE(input, offset, i); | 696 U8_FWD_N_UNSAFE(input, offset, i); |
| 699 if(offset != boundaries[i]) { | 697 if(offset != boundaries[i]) { |
| 700 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); | 698 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound
aries[i], offset); |
| 701 } | 699 } |
| 702 } | 700 } |
| 703 | 701 |
| 704 for(i=0; i<LENGTHOF(boundaries); ++i) { | 702 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
| 705 int32_t j=LENGTHOF(boundaries)-1-i; | 703 int32_t j=UPRV_LENGTHOF(boundaries)-1-i; |
| 706 offset=LENGTHOF(input); | 704 offset=UPRV_LENGTHOF(input); |
| 707 UTF8_BACK_N_UNSAFE(input, offset, i); | 705 UTF8_BACK_N_UNSAFE(input, offset, i); |
| 708 if(offset != boundaries[j]) { | 706 if(offset != boundaries[j]) { |
| 709 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[j], offset); | 707 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo
undaries[j], offset); |
| 710 } | 708 } |
| 711 } | 709 } |
| 712 for(i=0; i<LENGTHOF(boundaries); ++i) { | 710 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) { |
| 713 int32_t j=LENGTHOF(boundaries)-1-i; | 711 int32_t j=UPRV_LENGTHOF(boundaries)-1-i; |
| 714 offset=LENGTHOF(input); | 712 offset=UPRV_LENGTHOF(input); |
| 715 U8_BACK_N_UNSAFE(input, offset, i); | 713 U8_BACK_N_UNSAFE(input, offset, i); |
| 716 if(offset != boundaries[j]) { | 714 if(offset != boundaries[j]) { |
| 717 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun
daries[j], offset); | 715 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun
daries[j], offset); |
| 718 } | 716 } |
| 719 } | 717 } |
| 720 } | 718 } |
| 721 | 719 |
| 722 static void TestSetChar() { | 720 static void TestSetChar() { |
| 723 static const uint8_t input[] | 721 static const uint8_t input[] |
| 724 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x00 }; | 722 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x00 }; |
| 725 static const int16_t start_safe[] | 723 static const int16_t start_safe[] |
| 726 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; | 724 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; |
| 727 static const int16_t limit_safe[] | 725 static const int16_t limit_safe[] |
| 728 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; | 726 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14 }; |
| 729 | 727 |
| 730 uint32_t i=0; | 728 uint32_t i=0; |
| 731 int32_t offset=0, setOffset=0; | 729 int32_t offset=0, setOffset=0; |
| 732 for(offset=0; offset<=LENGTHOF(input); offset++){ | 730 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){ |
| 733 if (offset<LENGTHOF(input)){ | 731 if (offset<UPRV_LENGTHOF(input)){ |
| 734 setOffset=offset; | 732 setOffset=offset; |
| 735 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset); | 733 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset); |
| 736 if(setOffset != start_safe[i]){ | 734 if(setOffset != start_safe[i]){ |
| 737 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld.
Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); | 735 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld.
Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset); |
| 738 } | 736 } |
| 739 | 737 |
| 740 setOffset=offset; | 738 setOffset=offset; |
| 741 U8_SET_CP_START(input, 0, setOffset); | 739 U8_SET_CP_START(input, 0, setOffset); |
| 742 if(setOffset != start_safe[i]){ | 740 if(setOffset != start_safe[i]){ |
| 743 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:
%ld Got:%ld\n", offset, start_safe[i], setOffset); | 741 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:
%ld Got:%ld\n", offset, start_safe[i], setOffset); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 763 static void TestSetCharUnsafe() { | 761 static void TestSetCharUnsafe() { |
| 764 static const uint8_t input[] | 762 static const uint8_t input[] |
| 765 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x80, 0x80, 0x00 }; | 763 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8
0, 0xe0, 0x80, 0x80, 0x00 }; |
| 766 static const int16_t start_unsafe[] | 764 static const int16_t start_unsafe[] |
| 767 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9,
12, 12, 12, 15 }; | 765 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9,
12, 12, 12, 15 }; |
| 768 static const int16_t limit_unsafe[] | 766 static const int16_t limit_unsafe[] |
| 769 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10,
10, 15, 15, 15, 16 }; | 767 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10,
10, 15, 15, 15, 16 }; |
| 770 | 768 |
| 771 uint32_t i=0; | 769 uint32_t i=0; |
| 772 int32_t offset=0, setOffset=0; | 770 int32_t offset=0, setOffset=0; |
| 773 for(offset=0; offset<=LENGTHOF(input); offset++){ | 771 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){ |
| 774 if (offset<LENGTHOF(input)){ | 772 if (offset<UPRV_LENGTHOF(input)){ |
| 775 setOffset=offset; | 773 setOffset=offset; |
| 776 UTF8_SET_CHAR_START_UNSAFE(input, setOffset); | 774 UTF8_SET_CHAR_START_UNSAFE(input, setOffset); |
| 777 if(setOffset != start_unsafe[i]){ | 775 if(setOffset != start_unsafe[i]){ |
| 778 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld
. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); | 776 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld
. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); |
| 779 } | 777 } |
| 780 | 778 |
| 781 setOffset=offset; | 779 setOffset=offset; |
| 782 U8_SET_CP_START_UNSAFE(input, setOffset); | 780 U8_SET_CP_START_UNSAFE(input, setOffset); |
| 783 if(setOffset != start_unsafe[i]){ | 781 if(setOffset != start_unsafe[i]){ |
| 784 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex
pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); | 782 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex
pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset); |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 904 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, | 902 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00}, |
| 905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, | 903 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00}, |
| 906 | 904 |
| 907 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, | 905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00}, |
| 908 | 906 |
| 909 }; | 907 }; |
| 910 uint16_t i, count=0; | 908 uint16_t i, count=0; |
| 911 uint8_t str[12]; | 909 uint8_t str[12]; |
| 912 uint32_t offset; | 910 uint32_t offset; |
| 913 /* UChar32 c=0;*/ | 911 /* UChar32 c=0;*/ |
| 914 uint16_t size=LENGTHOF(s); | 912 uint16_t size=UPRV_LENGTHOF(s); |
| 915 for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){ | 913 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){ |
| 916 uprv_memcpy(str, s, size); | 914 uprv_memcpy(str, s, size); |
| 917 offset=test[i]; | 915 offset=test[i]; |
| 918 if(count<13){ | 916 if(count<13){ |
| 919 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); | 917 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]); |
| 920 if(offset != movedOffset[count]){ | 918 if(offset != movedOffset[count]){ |
| 921 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse
t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", | 919 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse
t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n", |
| 922 count, movedOffset[count], offset); | 920 count, movedOffset[count], offset); |
| 923 | 921 |
| 924 } | 922 } |
| 925 if(uprv_memcmp(str, result[count], size) !=0){ | 923 if(uprv_memcmp(str, result[count], size) !=0){ |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 996 /* none from this line */ | 994 /* none from this line */ |
| 997 0, 0xd0, 0x80 | 995 0, 0xd0, 0x80 |
| 998 }; | 996 }; |
| 999 | 997 |
| 1000 uint8_t buffer[100]; | 998 uint8_t buffer[100]; |
| 1001 UChar32 c; | 999 UChar32 c; |
| 1002 int32_t i, length; | 1000 int32_t i, length; |
| 1003 UBool isError, expectIsError, wrongIsError; | 1001 UBool isError, expectIsError, wrongIsError; |
| 1004 | 1002 |
| 1005 length=0; | 1003 length=0; |
| 1006 for(i=0; i<LENGTHOF(codePoints); ++i) { | 1004 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { |
| 1007 c=codePoints[i]; | 1005 c=codePoints[i]; |
| 1008 if(c<0 || 0x10ffff<c) { | 1006 if(c<0 || 0x10ffff<c) { |
| 1009 continue; /* skip non-code points for U8_APPEND_UNSAFE */ | 1007 continue; /* skip non-code points for U8_APPEND_UNSAFE */ |
| 1010 } | 1008 } |
| 1011 | 1009 |
| 1012 U8_APPEND_UNSAFE(buffer, length, c); | 1010 U8_APPEND_UNSAFE(buffer, length, c); |
| 1013 } | 1011 } |
| 1014 if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)
) { | 1012 if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, le
ngth)) { |
| 1015 log_err("U8_APPEND_UNSAFE did not generate the expected output\n"); | 1013 log_err("U8_APPEND_UNSAFE did not generate the expected output\n"); |
| 1016 } | 1014 } |
| 1017 | 1015 |
| 1018 length=0; | 1016 length=0; |
| 1019 wrongIsError=FALSE; | 1017 wrongIsError=FALSE; |
| 1020 for(i=0; i<LENGTHOF(codePoints); ++i) { | 1018 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) { |
| 1021 c=codePoints[i]; | 1019 c=codePoints[i]; |
| 1022 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); | 1020 expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c); |
| 1023 isError=FALSE; | 1021 isError=FALSE; |
| 1024 | 1022 |
| 1025 U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError); | 1023 U8_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError); |
| 1026 wrongIsError|= isError!=expectIsError; | 1024 wrongIsError|= isError!=expectIsError; |
| 1027 } | 1025 } |
| 1028 if(wrongIsError) { | 1026 if(wrongIsError) { |
| 1029 log_err("U8_APPEND did not set isError correctly\n"); | 1027 log_err("U8_APPEND did not set isError correctly\n"); |
| 1030 } | 1028 } |
| 1031 if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) { | 1029 if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length
)) { |
| 1032 log_err("U8_APPEND did not generate the expected output\n"); | 1030 log_err("U8_APPEND did not generate the expected output\n"); |
| 1033 } | 1031 } |
| 1034 } | 1032 } |
| 1035 | 1033 |
| 1036 static void | 1034 static void |
| 1037 TestSurrogates() { | 1035 TestSurrogates() { |
| 1038 static const uint8_t b[]={ | 1036 static const uint8_t b[]={ |
| 1039 0xc3, 0x9f, /* 00DF */ | 1037 0xc3, 0x9f, /* 00DF */ |
| 1040 0xed, 0x9f, 0xbf, /* D7FF */ | 1038 0xed, 0x9f, 0xbf, /* D7FF */ |
| 1041 0xed, 0xa0, 0x81, /* D801 */ | 1039 0xed, 0xa0, 0x81, /* D801 */ |
| 1042 0xed, 0xbf, 0xbe, /* DFFE */ | 1040 0xed, 0xbf, 0xbe, /* DFFE */ |
| 1043 0xee, 0x80, 0x80, /* E000 */ | 1041 0xee, 0x80, 0x80, /* E000 */ |
| 1044 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ | 1042 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ |
| 1045 }; | 1043 }; |
| 1046 static const UChar32 cp[]={ | 1044 static const UChar32 cp[]={ |
| 1047 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe | 1045 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe |
| 1048 }; | 1046 }; |
| 1049 | 1047 |
| 1050 UChar32 cu, cs, cl; | 1048 UChar32 cu, cs, cl; |
| 1051 int32_t i, j, k, iu, is, il, length; | 1049 int32_t i, j, k, iu, is, il, length; |
| 1052 | 1050 |
| 1053 k=0; /* index into cp[] */ | 1051 k=0; /* index into cp[] */ |
| 1054 length=LENGTHOF(b); | 1052 length=UPRV_LENGTHOF(b); |
| 1055 for(i=0; i<length;) { | 1053 for(i=0; i<length;) { |
| 1056 j=i; | 1054 j=i; |
| 1057 U8_NEXT_UNSAFE(b, j, cu); | 1055 U8_NEXT_UNSAFE(b, j, cu); |
| 1058 iu=j; | 1056 iu=j; |
| 1059 | 1057 |
| 1060 j=i; | 1058 j=i; |
| 1061 U8_NEXT(b, j, length, cs); | 1059 U8_NEXT(b, j, length, cs); |
| 1062 is=j; | 1060 is=j; |
| 1063 | 1061 |
| 1064 j=i; | 1062 j=i; |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1123 i=iu; /* go back by one UTF-8 sequence */ | 1121 i=iu; /* go back by one UTF-8 sequence */ |
| 1124 } | 1122 } |
| 1125 } | 1123 } |
| 1126 | 1124 |
| 1127 static void printUChars(const uint8_t *uchars, int16_t len){ | 1125 static void printUChars(const uint8_t *uchars, int16_t len){ |
| 1128 int16_t i=0; | 1126 int16_t i=0; |
| 1129 for(i=0; i<len; i++){ | 1127 for(i=0; i<len; i++){ |
| 1130 log_err("0x%02x ", *(uchars+i)); | 1128 log_err("0x%02x ", *(uchars+i)); |
| 1131 } | 1129 } |
| 1132 } | 1130 } |
| OLD | NEW |