source/test/cintltst/utf8tst.c - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/test/cintltst/utf8tst.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 1998-2012, International Business Machines Corporation and	3 * Copyright (c) 1998-2014, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6 /*	6 /*

7 * File utf8tst.c	7 * File utf8tst.c

8 *	8 *

9 * Modification History:	9 * Modification History:

10 *	10 *

11 * Date Name Description	11 * Date Name Description

12 * 07/24/2000 Madhu Creation	12 * 07/24/2000 Madhu Creation

13 *******************************************************************************	13 *******************************************************************************

14 */	14 */

15	15

16 #include "unicode/utypes.h"	16 #include "unicode/utypes.h"

17 #include "unicode/utf8.h"	17 #include "unicode/utf8.h"

18 #include "cmemory.h"	18 #include "cmemory.h"

19 #include "cintltst.h"	19 #include "cintltst.h"

20	20

21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

22

23 /* lenient UTF-8 ------------------------------------------------------------ */	21 /* lenient UTF-8 ------------------------------------------------------------ */

24	22

25 /*	23 /*

26 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate	24 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate

27 * code points with their "natural" encoding.	25 * code points with their "natural" encoding.

28 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of	26 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of

29 * single surrogates.	27 * single surrogates.

30 *	28 *

31 * This is not conformant with UTF-8.	29 * This is not conformant with UTF-8.

32 *	30 *

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
94 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar");	92 addTest(root, &TestAppendChar, "utf8tst/TestAppendChar");

95 addTest(root, &TestAppend, "utf8tst/TestAppend");	93 addTest(root, &TestAppend, "utf8tst/TestAppend");

96 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates");	94 addTest(root, &TestSurrogates, "utf8tst/TestSurrogates");

97 }	95 }

98	96

99 static void TestCodeUnitValues()	97 static void TestCodeUnitValues()

100 {	98 {

101 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0 xfd, 0x80, 0x81, 0xbc, 0xbe,};	99 static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0 xfd, 0x80, 0x81, 0xbc, 0xbe,};

102	100

103 int16_t i;	101 int16_t i;

104 for(i=0; i<LENGTHOF(codeunit); i++){	102 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){

105 uint8_t c=codeunit[i];	103 uint8_t c=codeunit[i];

106 log_verbose("Testing code unit value of %x\n", c);	104 log_verbose("Testing code unit value of %x\n", c);

107 if(i<4){	105 if(i<4){

108 if(!UTF8_IS_SINGLE(c) \|\| UTF8_IS_LEAD(c) \|\| UTF8_IS_TRAIL(c) \|\| !U8_ IS_SINGLE(c) \|\| U8_IS_LEAD(c) \|\| U8_IS_TRAIL(c)){	106 if(!UTF8_IS_SINGLE(c) \|\| UTF8_IS_LEAD(c) \|\| UTF8_IS_TRAIL(c) \|\| !U8_ IS_SINGLE(c) \|\| U8_IS_LEAD(c) \|\| U8_IS_TRAIL(c)){

109 log_err("ERROR: 0x%02x is a single byte but results in single: % c lead: %c trail: %c\n",	107 log_err("ERROR: 0x%02x is a single byte but results in single: % c lead: %c trail: %c\n",

110 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n ', UTF8_IS_TRAIL(c) ? 'y' : 'n');	108 c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n ', UTF8_IS_TRAIL(c) ? 'y' : 'n');

111 }	109 }

112 } else if(i< 8){	110 } else if(i< 8){

113 if(!UTF8_IS_LEAD(c) \|\| UTF8_IS_SINGLE(c) \|\| UTF8_IS_TRAIL(c) \|\| !U8_ IS_LEAD(c) \|\| U8_IS_SINGLE(c) \|\| U8_IS_TRAIL(c)){	111 if(!UTF8_IS_LEAD(c) \|\| UTF8_IS_SINGLE(c) \|\| UTF8_IS_TRAIL(c) \|\| !U8_ IS_LEAD(c) \|\| U8_IS_SINGLE(c) \|\| U8_IS_TRAIL(c)){

114 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",	112 log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",

(...skipping 22 matching lines...) Expand all Loading...
137 4, 0x24506,	135 4, 0x24506,

138 4, 0x20402,	136 4, 0x20402,

139 4, 0x10402,	137 4, 0x10402,

140 3, 0xd7ff,	138 3, 0xd7ff,

141 3, 0xe000,	139 3, 0xe000,

142	140

143 };	141 };

144	142

145 int16_t i;	143 int16_t i;

146 UBool multiple;	144 UBool multiple;

147 for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){	145 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){

148 UChar32 c=codepoint[i+1];	146 UChar32 c=codepoint[i+1];

149 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] \|\| U8_LENGTH(c) != (uin t16_t)codepoint[i]){	147 if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] \|\| U8_LENGTH(c) != (uin t16_t)codepoint[i]){

150 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));	148 log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));

151 }else{	149 }else{

152 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L ENGTH(c));	150 log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_L ENGTH(c));

153 }	151 }

154 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);	152 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);

155 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){	153 if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){

156 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);	154 log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);

157 }	155 }

(...skipping 377 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
535 }	533 }

536 }	534 }

537 for(i=0, offset=0; offset<sizeof(input); ++i) {	535 for(i=0, offset=0; offset<sizeof(input); ++i) {

538 U8_NEXT_UNSAFE(input, offset, c);	536 U8_NEXT_UNSAFE(input, offset, c);

539 if(c != codePoints[i]){	537 if(c != codePoints[i]){

540 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G ot:%lx\n",	538 log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx G ot:%lx\n",

541 offset, codePoints[i], c);	539 offset, codePoints[i], c);

542 }	540 }

543 }	541 }

544	542

545 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){	543 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){

546 UTF8_PREV_CHAR_UNSAFE(input, offset, c);	544 UTF8_PREV_CHAR_UNSAFE(input, offset, c);

547 if(c != codePoints[i]){	545 if(c != codePoints[i]){

548 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect ed:%lx Got:%lx\n",	546 log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expect ed:%lx Got:%lx\n",

549 offset, codePoints[i], c);	547 offset, codePoints[i], c);

550 }	548 }

551 }	549 }

552 for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){	550 for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){

553 U8_PREV_UNSAFE(input, offset, c);	551 U8_PREV_UNSAFE(input, offset, c);

554 if(c != codePoints[i]){	552 if(c != codePoints[i]){

555 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",	553 log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",

556 offset, codePoints[i], c);	554 offset, codePoints[i], c);

557 }	555 }

558 }	556 }

559 }	557 }

560	558

561 static void TestFwdBack() {	559 static void TestFwdBack() {

562 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0 , 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};	560 static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0 , 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
601 offsafe=sizeof(input);	599 offsafe=sizeof(input);

602 while(offsafe > 0){	600 while(offsafe > 0){

603 U8_BACK_1(input, 0, offsafe);	601 U8_BACK_1(input, 0, offsafe);

604 if(offsafe != back_safe[i]){	602 if(offsafe != back_safe[i]){

605 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i ], offsafe);	603 log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i ], offsafe);

606 }	604 }

607 i++;	605 i++;

608 }	606 }

609	607

610 offsafe=0;	608 offsafe=0;

611 for(i=0; i<LENGTHOF(Nvalue); i++){	609 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){

612 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);	610 UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);

613 if(offsafe != fwd_N_safe[i]){	611 if(offsafe != fwd_N_safe[i]){

614 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);	612 log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);

615 }	613 }

616	614

617 }	615 }

618	616

619 offsafe=0;	617 offsafe=0;

620 for(i=0; i<LENGTHOF(Nvalue); i++){	618 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){

621 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);	619 U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);

622 if(offsafe != fwd_N_safe[i]){	620 if(offsafe != fwd_N_safe[i]){

623 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_ safe[i], offsafe);	621 log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_ safe[i], offsafe);

624 }	622 }

625	623

626 }	624 }

627	625

628 offsafe=sizeof(input);	626 offsafe=sizeof(input);

629 for(i=0; i<LENGTHOF(Nvalue); i++){	627 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){

630 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);	628 UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);

631 if(offsafe != back_N_safe[i]){	629 if(offsafe != back_N_safe[i]){

632 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i , back_N_safe[i], offsafe);	630 log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i , back_N_safe[i], offsafe);

633 }	631 }

634 }	632 }

635	633

636 offsafe=sizeof(input);	634 offsafe=sizeof(input);

637 for(i=0; i<LENGTHOF(Nvalue); i++){	635 for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){

638 U8_BACK_N(input, 0, offsafe, Nvalue[i]);	636 U8_BACK_N(input, 0, offsafe, Nvalue[i]);

639 if(offsafe != back_N_safe[i]){	637 if(offsafe != back_N_safe[i]){

640 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back _N_safe[i], offsafe);	638 log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back _N_safe[i], offsafe);

641 }	639 }

642 }	640 }

643 }	641 }

644	642

645 static void TestFwdBackUnsafe() {	643 static void TestFwdBackUnsafe() {

646 /*	644 /*

647 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries .	645 * Use a (mostly) well-formed UTF-8 string and test at code point boundaries .

648 * The behavior of _UNSAFE macros for ill-formed strings is undefined.	646 * The behavior of _UNSAFE macros for ill-formed strings is undefined.

649 */	647 */

650 static const uint8_t input[]={	648 static const uint8_t input[]={

651 0x61,	649 0x61,

652 0xf0, 0x90, 0x90, 0x81,	650 0xf0, 0x90, 0x90, 0x81,

653 0xc0, 0x80, /* non-shortest form */	651 0xc0, 0x80, /* non-shortest form */

654 0xe2, 0x82, 0xac,	652 0xe2, 0x82, 0xac,

655 0xc2, 0xa1,	653 0xc2, 0xa1,

656 0xf4, 0x8f, 0xbf, 0xbf,	654 0xf4, 0x8f, 0xbf, 0xbf,

657 0x00	655 0x00

658 };	656 };

659 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };	657 static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };

660	658

661 int32_t offset;	659 int32_t offset;

662 int32_t i;	660 int32_t i;

663 for(i=1, offset=0; offset<LENGTHOF(input); ++i) {	661 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {

664 UTF8_FWD_1_UNSAFE(input, offset);	662 UTF8_FWD_1_UNSAFE(input, offset);

665 if(offset != boundaries[i]){	663 if(offset != boundaries[i]){

666 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou ndaries[i], offset);	664 log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bou ndaries[i], offset);

667 }	665 }

668 }	666 }

669 for(i=1, offset=0; offset<LENGTHOF(input); ++i) {	667 for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {

670 U8_FWD_1_UNSAFE(input, offset);	668 U8_FWD_1_UNSAFE(input, offset);

671 if(offset != boundaries[i]){	669 if(offset != boundaries[i]){

672 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound aries[i], offset);	670 log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", bound aries[i], offset);

673 }	671 }

674 }	672 }

675	673

676 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {	674 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; -- i) {

677 UTF8_BACK_1_UNSAFE(input, offset);	675 UTF8_BACK_1_UNSAFE(input, offset);

678 if(offset != boundaries[i]){	676 if(offset != boundaries[i]){

679 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo undaries[i], offset);	677 log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", bo undaries[i], offset);

680 }	678 }

681 }	679 }

682 for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {	680 for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; -- i) {

683 U8_BACK_1_UNSAFE(input, offset);	681 U8_BACK_1_UNSAFE(input, offset);

684 if(offset != boundaries[i]){	682 if(offset != boundaries[i]){

685 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun daries[i], offset);	683 log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boun daries[i], offset);

686 }	684 }

687 }	685 }

688	686

689 for(i=0; i<LENGTHOF(boundaries); ++i) {	687 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {

690 offset=0;	688 offset=0;

691 UTF8_FWD_N_UNSAFE(input, offset, i);	689 UTF8_FWD_N_UNSAFE(input, offset, i);

692 if(offset != boundaries[i]) {	690 if(offset != boundaries[i]) {

693 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou ndaries[i], offset);	691 log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bou ndaries[i], offset);

694 }	692 }

695 }	693 }

696 for(i=0; i<LENGTHOF(boundaries); ++i) {	694 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {

697 offset=0;	695 offset=0;

698 U8_FWD_N_UNSAFE(input, offset, i);	696 U8_FWD_N_UNSAFE(input, offset, i);

699 if(offset != boundaries[i]) {	697 if(offset != boundaries[i]) {

700 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound aries[i], offset);	698 log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", bound aries[i], offset);

701 }	699 }

702 }	700 }

703	701

704 for(i=0; i<LENGTHOF(boundaries); ++i) {	702 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {

705 int32_t j=LENGTHOF(boundaries)-1-i;	703 int32_t j=UPRV_LENGTHOF(boundaries)-1-i;

706 offset=LENGTHOF(input);	704 offset=UPRV_LENGTHOF(input);

707 UTF8_BACK_N_UNSAFE(input, offset, i);	705 UTF8_BACK_N_UNSAFE(input, offset, i);

708 if(offset != boundaries[j]) {	706 if(offset != boundaries[j]) {

709 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo undaries[j], offset);	707 log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", bo undaries[j], offset);

710 }	708 }

711 }	709 }

712 for(i=0; i<LENGTHOF(boundaries); ++i) {	710 for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {

713 int32_t j=LENGTHOF(boundaries)-1-i;	711 int32_t j=UPRV_LENGTHOF(boundaries)-1-i;

714 offset=LENGTHOF(input);	712 offset=UPRV_LENGTHOF(input);

715 U8_BACK_N_UNSAFE(input, offset, i);	713 U8_BACK_N_UNSAFE(input, offset, i);

716 if(offset != boundaries[j]) {	714 if(offset != boundaries[j]) {

717 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun daries[j], offset);	715 log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boun daries[j], offset);

718 }	716 }

719 }	717 }

720 }	718 }

721	719

722 static void TestSetChar() {	720 static void TestSetChar() {

723 static const uint8_t input[]	721 static const uint8_t input[]

724 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8 0, 0xe0, 0x00 };	722 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8 0, 0xe0, 0x00 };

725 static const int16_t start_safe[]	723 static const int16_t start_safe[]

726 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };	724 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };

727 static const int16_t limit_safe[]	725 static const int16_t limit_safe[]

728 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };	726 = {0, 1, 4, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };

729	727

730 uint32_t i=0;	728 uint32_t i=0;

731 int32_t offset=0, setOffset=0;	729 int32_t offset=0, setOffset=0;

732 for(offset=0; offset<=LENGTHOF(input); offset++){	730 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){

733 if (offset<LENGTHOF(input)){	731 if (offset<UPRV_LENGTHOF(input)){

734 setOffset=offset;	732 setOffset=offset;

735 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);	733 UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);

736 if(setOffset != start_safe[i]){	734 if(setOffset != start_safe[i]){

737 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);	735 log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);

738 }	736 }

739	737

740 setOffset=offset;	738 setOffset=offset;

741 U8_SET_CP_START(input, 0, setOffset);	739 U8_SET_CP_START(input, 0, setOffset);

742 if(setOffset != start_safe[i]){	740 if(setOffset != start_safe[i]){

743 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected: %ld Got:%ld\n", offset, start_safe[i], setOffset);	741 log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected: %ld Got:%ld\n", offset, start_safe[i], setOffset);

(...skipping 19 matching lines...) Expand all Loading...
763 static void TestSetCharUnsafe() {	761 static void TestSetCharUnsafe() {

764 static const uint8_t input[]	762 static const uint8_t input[]

765 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8 0, 0xe0, 0x80, 0x80, 0x00 };	763 = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x8 0, 0xe0, 0x80, 0x80, 0x00 };

766 static const int16_t start_unsafe[]	764 static const int16_t start_unsafe[]

767 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 12, 12, 15 };	765 = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 12, 12, 15 };

768 static const int16_t limit_unsafe[]	766 static const int16_t limit_unsafe[]

769 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15, 15, 15, 16 };	767 = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15, 15, 15, 16 };

770	768

771 uint32_t i=0;	769 uint32_t i=0;

772 int32_t offset=0, setOffset=0;	770 int32_t offset=0, setOffset=0;

773 for(offset=0; offset<=LENGTHOF(input); offset++){	771 for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){

774 if (offset<LENGTHOF(input)){	772 if (offset<UPRV_LENGTHOF(input)){

775 setOffset=offset;	773 setOffset=offset;

776 UTF8_SET_CHAR_START_UNSAFE(input, setOffset);	774 UTF8_SET_CHAR_START_UNSAFE(input, setOffset);

777 if(setOffset != start_unsafe[i]){	775 if(setOffset != start_unsafe[i]){

778 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld . Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);	776 log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld . Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);

779 }	777 }

780	778

781 setOffset=offset;	779 setOffset=offset;

782 U8_SET_CP_START_UNSAFE(input, setOffset);	780 U8_SET_CP_START_UNSAFE(input, setOffset);

783 if(setOffset != start_unsafe[i]){	781 if(setOffset != start_unsafe[i]){

784 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);	782 log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Ex pected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
904 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},	902 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},

905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},	903 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},

906	904

907 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},	905 {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},

908	906

909 };	907 };

910 uint16_t i, count=0;	908 uint16_t i, count=0;

911 uint8_t str[12];	909 uint8_t str[12];

912 uint32_t offset;	910 uint32_t offset;

913 /* UChar32 c=0;*/	911 /* UChar32 c=0;*/

914 uint16_t size=LENGTHOF(s);	912 uint16_t size=UPRV_LENGTHOF(s);

915 for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){	913 for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){

916 uprv_memcpy(str, s, size);	914 uprv_memcpy(str, s, size);

917 offset=test[i];	915 offset=test[i];

918 if(count<13){	916 if(count<13){

919 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);	917 UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);

920 if(offset != movedOffset[count]){	918 if(offset != movedOffset[count]){

921 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",	919 log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offse t correctly for count=%d.\nExpectedOffset=%d currentOffset=%d\n",

922 count, movedOffset[count], offset);	920 count, movedOffset[count], offset);

923	921

924 }	922 }

925 if(uprv_memcmp(str, result[count], size) !=0){	923 if(uprv_memcmp(str, result[count], size) !=0){

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
996 /* none from this line */	994 /* none from this line */

997 0, 0xd0, 0x80	995 0, 0xd0, 0x80

998 };	996 };

999	997

1000 uint8_t buffer[100];	998 uint8_t buffer[100];

1001 UChar32 c;	999 UChar32 c;

1002 int32_t i, length;	1000 int32_t i, length;

1003 UBool isError, expectIsError, wrongIsError;	1001 UBool isError, expectIsError, wrongIsError;

1004	1002

1005 length=0;	1003 length=0;

1006 for(i=0; i<LENGTHOF(codePoints); ++i) {	1004 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {

1007 c=codePoints[i];	1005 c=codePoints[i];

1008 if(c<0 \|\| 0x10ffff<c) {	1006 if(c<0 \|\| 0x10ffff<c) {

1009 continue; /* skip non-code points for U8_APPEND_UNSAFE */	1007 continue; /* skip non-code points for U8_APPEND_UNSAFE */

1010 }	1008 }

1011	1009

1012 U8_APPEND_UNSAFE(buffer, length, c);	1010 U8_APPEND_UNSAFE(buffer, length, c);

1013 }	1011 }

1014 if(length!=LENGTHOF(expectUnsafe) \|\| 0!=memcmp(buffer, expectUnsafe, length) ) {	1012 if(length!=UPRV_LENGTHOF(expectUnsafe) \|\| 0!=memcmp(buffer, expectUnsafe, le ngth)) {

1015 log_err("U8_APPEND_UNSAFE did not generate the expected output\n");	1013 log_err("U8_APPEND_UNSAFE did not generate the expected output\n");

1016 }	1014 }

1017	1015

1018 length=0;	1016 length=0;

1019 wrongIsError=FALSE;	1017 wrongIsError=FALSE;

1020 for(i=0; i<LENGTHOF(codePoints); ++i) {	1018 for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {

1021 c=codePoints[i];	1019 c=codePoints[i];

1022 expectIsError= c<0 \|\| 0x10ffff<c \|\| U_IS_SURROGATE(c);	1020 expectIsError= c<0 \|\| 0x10ffff<c \|\| U_IS_SURROGATE(c);

1023 isError=FALSE;	1021 isError=FALSE;

1024	1022

1025 U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);	1023 U8_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);

1026 wrongIsError\|= isError!=expectIsError;	1024 wrongIsError\|= isError!=expectIsError;

1027 }	1025 }

1028 if(wrongIsError) {	1026 if(wrongIsError) {

1029 log_err("U8_APPEND did not set isError correctly\n");	1027 log_err("U8_APPEND did not set isError correctly\n");

1030 }	1028 }

1031 if(length!=LENGTHOF(expectSafe) \|\| 0!=memcmp(buffer, expectSafe, length)) {	1029 if(length!=UPRV_LENGTHOF(expectSafe) \|\| 0!=memcmp(buffer, expectSafe, length )) {

1032 log_err("U8_APPEND did not generate the expected output\n");	1030 log_err("U8_APPEND did not generate the expected output\n");

1033 }	1031 }

1034 }	1032 }

1035	1033

1036 static void	1034 static void

1037 TestSurrogates() {	1035 TestSurrogates() {

1038 static const uint8_t b[]={	1036 static const uint8_t b[]={

1039 0xc3, 0x9f, /* 00DF */	1037 0xc3, 0x9f, /* 00DF */

1040 0xed, 0x9f, 0xbf, /* D7FF */	1038 0xed, 0x9f, 0xbf, /* D7FF */

1041 0xed, 0xa0, 0x81, /* D801 */	1039 0xed, 0xa0, 0x81, /* D801 */

1042 0xed, 0xbf, 0xbe, /* DFFE */	1040 0xed, 0xbf, 0xbe, /* DFFE */

1043 0xee, 0x80, 0x80, /* E000 */	1041 0xee, 0x80, 0x80, /* E000 */

1044 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */	1042 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */

1045 };	1043 };

1046 static const UChar32 cp[]={	1044 static const UChar32 cp[]={

1047 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe	1045 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe

1048 };	1046 };

1049	1047

1050 UChar32 cu, cs, cl;	1048 UChar32 cu, cs, cl;

1051 int32_t i, j, k, iu, is, il, length;	1049 int32_t i, j, k, iu, is, il, length;

1052	1050

1053 k=0; /* index into cp[] */	1051 k=0; /* index into cp[] */

1054 length=LENGTHOF(b);	1052 length=UPRV_LENGTHOF(b);

1055 for(i=0; i<length;) {	1053 for(i=0; i<length;) {

1056 j=i;	1054 j=i;

1057 U8_NEXT_UNSAFE(b, j, cu);	1055 U8_NEXT_UNSAFE(b, j, cu);

1058 iu=j;	1056 iu=j;

1059	1057

1060 j=i;	1058 j=i;

1061 U8_NEXT(b, j, length, cs);	1059 U8_NEXT(b, j, length, cs);

1062 is=j;	1060 is=j;

1063	1061

1064 j=i;	1062 j=i;

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1123 i=iu; /* go back by one UTF-8 sequence */	1121 i=iu; /* go back by one UTF-8 sequence */

1124 }	1122 }

1125 }	1123 }

1126	1124

1127 static void printUChars(const uint8_t *uchars, int16_t len){	1125 static void printUChars(const uint8_t *uchars, int16_t len){

1128 int16_t i=0;	1126 int16_t i=0;

1129 for(i=0; i<len; i++){	1127 for(i=0; i<len; i++){

1130 log_err("0x%02x ", *(uchars+i));	1128 log_err("0x%02x ", *(uchars+i));

1131 }	1129 }

1132 }	1130 }

OLD	NEW

« no previous file with comments | « source/test/cintltst/utf16tst.c ('k') | source/test/cintltst/utmstest.c » ('j') | no next file with comments »