Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: source/test/cintltst/cmsccoll.c

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/cintltst/cloctst.c ('k') | source/test/cintltst/cmsgtst.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /******************************************************************** 2 /********************************************************************
3 * COPYRIGHT: 3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and 4 * Copyright (c) 2001-2014, International Business Machines Corporation and
5 * others. All Rights Reserved. 5 * others. All Rights Reserved.
6 ********************************************************************/ 6 ********************************************************************/
7 /******************************************************************************* 7 /*******************************************************************************
8 * 8 *
9 * File cmsccoll.C 9 * File cmsccoll.C
10 * 10 *
11 *******************************************************************************/ 11 *******************************************************************************/
12 /** 12 /**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit. 14 * to fit.
15 */ 15 */
16 16
17 #include <stdio.h> 17 #include <stdio.h>
18 18
19 #include "unicode/utypes.h" 19 #include "unicode/utypes.h"
20 20
21 #if !UCONFIG_NO_COLLATION 21 #if !UCONFIG_NO_COLLATION
22 22
23 #include "unicode/ucol.h" 23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h" 24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h" 25 #include "unicode/uloc.h"
26 #include "cintltst.h" 26 #include "cintltst.h"
27 #include "ccolltst.h" 27 #include "ccolltst.h"
28 #include "callcoll.h" 28 #include "callcoll.h"
29 #include "unicode/ustring.h" 29 #include "unicode/ustring.h"
30 #include "string.h" 30 #include "string.h"
31 #include "ucol_imp.h" 31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h" 32 #include "cmemory.h"
34 #include "cstring.h" 33 #include "cstring.h"
35 #include "uassert.h" 34 #include "uassert.h"
36 #include "unicode/parseerr.h" 35 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h" 36 #include "unicode/ucnv.h"
38 #include "unicode/ures.h" 37 #include "unicode/ures.h"
39 #include "unicode/uscript.h" 38 #include "unicode/uscript.h"
40 #include "unicode/utf16.h" 39 #include "unicode/utf16.h"
41 #include "uparse.h" 40 #include "uparse.h"
42 #include "putilimp.h" 41 #include "putilimp.h"
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
406 log_err("Wrong locale for French Collation Data, expecte d \"fr\" got %s", lp); 405 log_err("Wrong locale for French Collation Data, expecte d \"fr\" got %s", lp);
407 } 406 }
408 } 407 }
409 } 408 }
410 ures_close(cr); 409 ures_close(cr);
411 } 410 }
412 ures_close(lr); 411 ures_close(lr);
413 } 412 }
414 } 413 }
415 414
416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417 UChar source[256] = { '\0'};
418 UChar target[256] = { '\0'};
419 UChar preP = 0x31a3;
420 UChar preQ = 0x310d;
421 /*
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424 */
425 /*log_verbose("Testing primary\n");*/
426
427 doTest(col, p, q, UCOL_LESS);
428 /*
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTar get);
435 }
436 */
437 source[0] = preP;
438 u_strcpy(source+1,p);
439 target[0] = preQ;
440 u_strcpy(target+1,q);
441 doTest(col, source, target, UCOL_LESS);
442 /*
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSourc e,utfTarget);
444 */
445 }
446
447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448 UChar source[256] = { '\0'};
449 UChar target[256] = { '\0'};
450
451 /*log_verbose("Testing secondary\n");*/
452
453 doTest(col, p, q, UCOL_LESS);
454 /*
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarg et);
456 */
457 source[0] = 0x0053;
458 u_strcpy(source+1,p);
459 target[0]= 0x0073;
460 u_strcpy(target+1,q);
461
462 doTest(col, source, target, UCOL_LESS);
463 /*
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSour ce,utfTarget);
465 */
466
467
468 u_strcpy(source,p);
469 source[u_strlen(p)] = 0x62;
470 source[u_strlen(p)+1] = 0;
471
472
473 u_strcpy(target,q);
474 target[u_strlen(q)] = 0x61;
475 target[u_strlen(q)+1] = 0;
476
477 doTest(col, source, target, UCOL_GREATER);
478
479 /*
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",u tfSource,utfTarget);
481 */
482 }
483
484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485 UChar source[256] = { '\0'};
486 UChar target[256] = { '\0'};
487
488 /*log_verbose("Testing tertiary\n");*/
489
490 doTest(col, p, q, UCOL_LESS);
491 /*
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget );
493 */
494 source[0] = 0x0020;
495 u_strcpy(source+1,p);
496 target[0]= 0x002D;
497 u_strcpy(target+1,q);
498
499 doTest(col, source, target, UCOL_LESS);
500 /*
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSour ce,utfTarget);
502 */
503
504 u_strcpy(source,p);
505 source[u_strlen(p)] = 0xE0;
506 source[u_strlen(p)+1] = 0;
507
508 u_strcpy(target,q);
509 target[u_strlen(q)] = 0x61;
510 target[u_strlen(q)+1] = 0;
511
512 doTest(col, source, target, UCOL_GREATER);
513
514 /*
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",u tfSource,utfTarget);
516 */
517 }
518
519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520 /*
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
523 */
524
525 doTest(col, p, q, UCOL_EQUAL);
526 /*
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget );
528 */
529 }
530
531 static void testCollator(UCollator *coll, UErrorCode *status) {
532 const UChar *rules = NULL, *current = NULL;
533 int32_t ruleLen = 0;
534 uint32_t strength = 0;
535 uint32_t chOffset = 0; uint32_t chLen = 0;
536 uint32_t exOffset = 0; uint32_t exLen = 0;
537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538 uint32_t firstEx = 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen = 0;
541 UBool varT = FALSE; UBool top_ = TRUE;
542 uint16_t specs = 0;
543 UBool startOfRules = TRUE;
544 UBool lastReset = FALSE;
545 UBool before = FALSE;
546 uint32_t beforeStrength = 0;
547 UColTokenParser src;
548 UColOptionSet opts;
549
550 UChar first[256];
551 UChar second[256];
552 UChar tempB[256];
553 uint32_t tempLen;
554 UChar *rulesCopy = NULL;
555 UParseError parseError;
556
557 uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559 src.opts = &opts;
560
561 rules = ucol_getRules(coll, &ruleLen);
562 if(U_SUCCESS(*status) && ruleLen > 0) {
563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*si zeof(UChar));
564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565 src.current = src.source = rulesCopy;
566 src.end = rulesCopy+ruleLen;
567 src.extraCurrent = src.end;
568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569 *first = *second = 0;
570
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToke n can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, st atus)) != NULL) {
574 strength = src.parsedToken.strength;
575 chOffset = src.parsedToken.charsOffset;
576 chLen = src.parsedToken.charsLen;
577 exOffset = src.parsedToken.extensionOffset;
578 exLen = src.parsedToken.extensionLen;
579 prefixOffset = src.parsedToken.prefixOffset;
580 prefixLen = src.parsedToken.prefixLen;
581 specs = src.parsedToken.flags;
582
583 startOfRules = FALSE;
584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585 (void)varT; /* Suppress set but not used warning. */
586 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
587 if(top_) { /* if reset is on top, the sequence is broken. We should have a n empty string */
588 second[0] = 0;
589 } else {
590 u_strncpy(second,src.source+chOffset, chLen);
591 second[chLen] = 0;
592
593 if(exLen > 0 && firstEx == 0) {
594 u_strncat(first, src.source+exOffset, exLen);
595 first[firstLen+exLen] = 0;
596 }
597
598 if(lastReset == TRUE && prefixLen != 0) {
599 u_strncpy(first+prefixLen, first, firstLen);
600 u_strncpy(first, src.source+prefixOffset, prefixLen);
601 first[firstLen+prefixLen] = 0;
602 firstLen = firstLen+prefixLen;
603 }
604
605 if(before == TRUE) { /* swap first and second */
606 u_strcpy(tempB, first);
607 u_strcpy(first, second);
608 u_strcpy(second, tempB);
609
610 tempLen = firstLen;
611 firstLen = chLen;
612 chLen = tempLen;
613
614 tempLen = firstEx;
615 firstEx = exLen;
616 exLen = tempLen;
617 if(beforeStrength < strength) {
618 strength = beforeStrength;
619 }
620 }
621 }
622 lastReset = FALSE;
623
624 switch(strength){
625 case UCOL_IDENTICAL:
626 testEquality(coll,first,second);
627 break;
628 case UCOL_PRIMARY:
629 testPrimary(coll,first,second);
630 break;
631 case UCOL_SECONDARY:
632 testSecondary(coll,first,second);
633 break;
634 case UCOL_TERTIARY:
635 testTertiary(coll,first,second);
636 break;
637 case UCOL_TOK_RESET:
638 lastReset = TRUE;
639 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
640 if(before) {
641 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
642 }
643 break;
644 default:
645 break;
646 }
647
648 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second wer e swapped */
649 before = FALSE;
650 } else {
651 firstLen = chLen;
652 firstEx = exLen;
653 u_strcpy(first, second);
654 }
655 }
656 uprv_free(src.source);
657 uprv_free(src.reorderCodes);
658 }
659 }
660
661 static UCollationResult ucaTest(void *collator, const int object, const UChar *s ource, const int sLen, const UChar *target, const int tLen) {
662 UCollator *UCA = (UCollator *)collator;
663 return ucol_strcoll(UCA, source, sLen, target, tLen);
664 }
665
666 /*
667 static UCollationResult winTest(void *collator, const int object, const UChar *s ource, const int sLen, const UChar *target, const int tLen) {
668 #if U_PLATFORM_HAS_WIN32_API
669 LCID lcid = (LCID)collator;
670 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
671 #else
672 return 0;
673 #endif
674 }
675 */
676
677 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts ,
678 UChar s1, UChar s2,
679 const UChar *s, const uint32_t sLen,
680 const UChar *t, const uint32_t tLen) {
681 UChar source[256] = {0};
682 UChar target[256] = {0};
683
684 source[0] = s1;
685 u_strcpy(source+1, s);
686 target[0] = s2;
687 u_strcpy(target+1, t);
688
689 return func(collator, opts, source, sLen+1, target, tLen+1);
690 }
691
692 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
693 UChar s1, UChar s2,
694 const UChar *s, const uint32_t sLen,
695 const UChar *t, const uint32_t tLen) {
696 UChar source[256] = {0};
697 UChar target[256] = {0};
698
699 u_strcpy(source, s);
700 source[sLen] = s1;
701 u_strcpy(target, t);
702 target[tLen] = s2;
703
704 return func(collator, opts, source, sLen+1, target, tLen+1);
705 }
706
707 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
708 const UChar *s, const uint32_t sLen,
709 const UChar *t, const uint32_t tLen,
710 UCollationResult result) {
711 /*UChar fPrimary = 0x6d;*/
712 /*UChar sPrimary = 0x6e;*/
713 UChar fSecondary = 0x310d;
714 UChar sSecondary = 0x31a3;
715 UChar fTertiary = 0x310f;
716 UChar sTertiary = 0x31b7;
717
718 UCollationResult oposite;
719 if(result == UCOL_EQUAL) {
720 return UCOL_IDENTICAL;
721 } else if(result == UCOL_GREATER) {
722 oposite = UCOL_LESS;
723 } else {
724 oposite = UCOL_GREATER;
725 }
726
727 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen ) == result) {
728 return UCOL_PRIMARY;
729 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, t Len) == result) &&
730 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
731 return UCOL_SECONDARY;
732 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
734 return UCOL_TERTIARY;
735 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLe n) == oposite) &&
736 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
737 return UCOL_QUATERNARY;
738 } else {
739 return UCOL_IDENTICAL;
740 }
741 }
742
743 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *bu ffer) {
744 uint32_t i = 0;
745
746 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
747 buffer[0] = '=';
748 buffer[1] = '=';
749 buffer[2] = '\0';
750 } else if(res == UCOL_GREATER) {
751 for(i = 0; i<strength+1; i++) {
752 buffer[i] = '>';
753 }
754 buffer[strength+1] = '\0';
755 } else {
756 for(i = 0; i<strength+1; i++) {
757 buffer[i] = '<';
758 }
759 buffer[strength+1] = '\0';
760 }
761
762 return buffer;
763 }
764
765
766
767 static void logFailure (const char *platform, const char *test,
768 const UChar *source, const uint32_t sLen,
769 const UChar *target, const uint32_t tLen,
770 UCollationResult realRes, uint32_t realStrength,
771 UCollationResult expRes, uint32_t expStrength, UBool err or) {
772
773 uint32_t i = 0;
774
775 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
776 static int32_t maxOutputLength = 0;
777 int32_t outputLength;
778
779 *sEsc = *tEsc = *s = *t = 0;
780 if(error == TRUE) {
781 log_err("Difference between expected and generated order. Run test with -v f or more info\n");
782 } else if(getTestOption(VERBOSITY_OPTION) == 0) {
783 return;
784 }
785 for(i = 0; i<sLen; i++) {
786 sprintf(b, "%04X", source[i]);
787 strcat(sEsc, "\\u");
788 strcat(sEsc, b);
789 strcat(s, b);
790 strcat(s, " ");
791 if(source[i] < 0x80) {
792 sprintf(b, "(%c)", source[i]);
793 strcat(sEsc, b);
794 }
795 }
796 for(i = 0; i<tLen; i++) {
797 sprintf(b, "%04X", target[i]);
798 strcat(tEsc, "\\u");
799 strcat(tEsc, b);
800 strcat(t, b);
801 strcat(t, " ");
802 if(target[i] < 0x80) {
803 sprintf(b, "(%c)", target[i]);
804 strcat(tEsc, b);
805 }
806 }
807 /*
808 strcpy(output, "[[ ");
809 strcat(output, sEsc);
810 strcat(output, getRelationSymbol(expRes, expStrength, relation));
811 strcat(output, tEsc);
812
813 strcat(output, " : ");
814
815 strcat(output, sEsc);
816 strcat(output, getRelationSymbol(realRes, realStrength, relation));
817 strcat(output, tEsc);
818 strcat(output, " ]] ");
819
820 log_verbose("%s", output);
821 */
822
823
824 strcpy(output, "DIFF: ");
825
826 strcat(output, s);
827 strcat(output, " : ");
828 strcat(output, t);
829
830 strcat(output, test);
831 strcat(output, ": ");
832
833 strcat(output, sEsc);
834 strcat(output, getRelationSymbol(expRes, expStrength, relation));
835 strcat(output, tEsc);
836
837 strcat(output, " ");
838
839 strcat(output, platform);
840 strcat(output, ": ");
841
842 strcat(output, sEsc);
843 strcat(output, getRelationSymbol(realRes, realStrength, relation));
844 strcat(output, tEsc);
845
846 outputLength = (int32_t)strlen(output);
847 if(outputLength > maxOutputLength) {
848 maxOutputLength = outputLength;
849 U_ASSERT(outputLength < sizeof(output));
850 }
851
852 log_verbose("%s\n", output);
853
854 }
855
856 /*
857 static void printOutRules(const UChar *rules) {
858 uint32_t len = u_strlen(rules);
859 uint32_t i = 0;
860 char toPrint;
861 uint32_t line = 0;
862
863 fprintf(stdout, "Rules:");
864
865 for(i = 0; i<len; i++) {
866 if(rules[i]<0x7f && rules[i]>=0x20) {
867 toPrint = (char)rules[i];
868 if(toPrint == '&') {
869 line = 1;
870 fprintf(stdout, "\n&");
871 } else if(toPrint == ';') {
872 fprintf(stdout, "<<");
873 line+=2;
874 } else if(toPrint == ',') {
875 fprintf(stdout, "<<<");
876 line+=3;
877 } else {
878 fprintf(stdout, "%c", toPrint);
879 line++;
880 }
881 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
882 fprintf(stdout, "\\u%04X", rules[i]);
883 line+=6;
884 }
885 if(line>72) {
886 fprintf(stdout, "\n");
887 line = 0;
888 }
889 }
890
891 log_verbose("\n");
892
893 }
894 */
895
896 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error ) {
897 uint32_t diffs = 0;
898 UCollationResult realResult;
899 uint32_t realStrength;
900
901 uint32_t sLen = u_strlen(first);
902 uint32_t tLen = u_strlen(second);
903
904 realResult = func(collator, opts, first, sLen, second, tLen);
905 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
906
907 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre ngth, UCOL_EQUAL, strength, error);
909 diffs++;
910 } else if(realResult != UCOL_LESS || realStrength != strength) {
911 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre ngth, UCOL_LESS, strength, error);
912 diffs++;
913 }
914 return diffs;
915 }
916
917
918 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
919 const UChar *rules = NULL, *current = NULL;
920 int32_t ruleLen = 0;
921 uint32_t strength = 0;
922 uint32_t chOffset = 0; uint32_t chLen = 0;
923 uint32_t exOffset = 0; uint32_t exLen = 0;
924 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
925 /* uint32_t rExpsLen = 0; */
926 uint32_t firstLen = 0, secondLen = 0;
927 UBool varT = FALSE; UBool top_ = TRUE;
928 uint16_t specs = 0;
929 UBool startOfRules = TRUE;
930 UColTokenParser src;
931 UColOptionSet opts;
932
933 UChar first[256];
934 UChar second[256];
935 UChar *rulesCopy = NULL;
936
937 uint32_t UCAdiff = 0;
938 uint32_t Windiff = 1;
939 UParseError parseError;
940
941 (void)top_; /* Suppress set but not used warnings. */
942 (void)varT;
943 (void)secondLen;
944 (void)prefixLen;
945 (void)prefixOffset;
946
947 uprv_memset(&src, 0, sizeof(UColTokenParser));
948 src.opts = &opts;
949
950 rules = ucol_getRules(coll, &ruleLen);
951
952 /*printOutRules(rules);*/
953
954 if(U_SUCCESS(*status) && ruleLen > 0) {
955 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*si zeof(UChar));
956 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
957 src.current = src.source = rulesCopy;
958 src.end = rulesCopy+ruleLen;
959 src.extraCurrent = src.end;
960 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
961 *first = *second = 0;
962
963 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken ca n cause the pointer to
964 the rules copy in src.source to get reallocated, freeing the original poi nter in rulesCopy */
965 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,st atus)) != NULL) {
966 strength = src.parsedToken.strength;
967 chOffset = src.parsedToken.charsOffset;
968 chLen = src.parsedToken.charsLen;
969 exOffset = src.parsedToken.extensionOffset;
970 exLen = src.parsedToken.extensionLen;
971 prefixOffset = src.parsedToken.prefixOffset;
972 prefixLen = src.parsedToken.prefixLen;
973 specs = src.parsedToken.flags;
974
975 startOfRules = FALSE;
976 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
977 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
978
979 u_strncpy(second,src.source+chOffset, chLen);
980 second[chLen] = 0;
981 secondLen = chLen;
982
983 if(exLen > 0) {
984 u_strncat(first, src.source+exOffset, exLen);
985 first[firstLen+exLen] = 0;
986 firstLen += exLen;
987 }
988
989 if(strength != UCOL_TOK_RESET) {
990 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa0 00)) {
991 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, secon d, refName, error);
992 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, se cond, "Win32");*/
993 }
994 }
995
996
997 firstLen = chLen;
998 u_strcpy(first, second);
999
1000 }
1001 if(UCAdiff != 0 && Windiff != 0) {
1002 log_verbose("\n");
1003 }
1004 if(UCAdiff == 0) {
1005 log_verbose("No immediate difference with %s!\n", refName);
1006 }
1007 if(Windiff == 0) {
1008 log_verbose("No immediate difference with Win32!\n");
1009 }
1010 uprv_free(src.source);
1011 uprv_free(src.reorderCodes);
1012 }
1013 }
1014
1015 /*
1016 * Takes two CEs (lead and continuation) and
1017 * compares them as CEs should be compared:
1018 * primary vs. primary, secondary vs. secondary
1019 * tertiary vs. tertiary
1020 */
1021 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1022 uint32_t t1, uint32_t t2) {
1023 uint32_t s = 0, t = 0;
1024 if(s1 == t1 && s2 == t2) {
1025 return 0;
1026 }
1027 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1028 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1029 if(s < t) {
1030 return -1;
1031 } else if(s > t) {
1032 return 1;
1033 } else {
1034 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1035 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1036 if(s < t) {
1037 return -1;
1038 } else if(s > t) {
1039 return 1;
1040 } else {
1041 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1042 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1043 if(s < t) {
1044 return -1;
1045 } else {
1046 return 1;
1047 }
1048 }
1049 }
1050 }
1051
1052 typedef struct {
1053 uint32_t startCE;
1054 uint32_t startContCE;
1055 uint32_t limitCE;
1056 uint32_t limitContCE;
1057 } indirectBoundaries;
1058
1059 /* these values are used for finding CE values for indirect positioning. */
1060 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1061 /* values. It only works for resets and you cannot tailor indirect names */
1062 /* An indirect name can define either an anchor point or a range. An */
1063 /* anchor point behaves in exactly the same way as a code point in reset */
1064 /* would, except that it cannot be tailored. A range (we currently only */
1065 /* know for the [top] range will explicitly set the upper bound for */
1066 /* generated CEs, thus allowing for better control over how many CEs can */
1067 /* be squeezed between in the range without performance penalty. */
1068 /* In that respect, we use [top] for tailoring of locales that use CJK */
1069 /* characters. Other indirect values are currently a pure convenience, */
1070 /* they can be used to assure that the CEs will be always positioned in */
1071 /* the same place relative to a point with known properties (e.g. first */
1072 /* primary ignorable). */
1073 static indirectBoundaries ucolIndirectBoundaries[15];
1074 static UBool indirectBoundariesSet = FALSE;
1075 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *en d) {
1076 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1077 /* to initalize here. */
1078 ucolIndirectBoundaries[indexR].startCE = start[0];
1079 ucolIndirectBoundaries[indexR].startContCE = start[1];
1080 if(end) {
1081 ucolIndirectBoundaries[indexR].limitCE = end[0];
1082 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1083 } else {
1084 ucolIndirectBoundaries[indexR].limitCE = 0;
1085 ucolIndirectBoundaries[indexR].limitContCE = 0;
1086 }
1087 }
1088
1089 static void testCEs(UCollator *coll, UErrorCode *status) {
1090 const UChar *rules = NULL, *current = NULL;
1091 int32_t ruleLen = 0;
1092
1093 uint32_t strength = 0;
1094 uint32_t maxStrength = UCOL_IDENTICAL;
1095 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1096 uint32_t lastCE;
1097 uint32_t lastContCE;
1098
1099 int32_t result = 0;
1100 uint32_t chOffset = 0; uint32_t chLen = 0;
1101 uint32_t exOffset = 0; uint32_t exLen = 0;
1102 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1103 uint32_t oldOffset = 0;
1104
1105 /* uint32_t rExpsLen = 0; */
1106 /* uint32_t firstLen = 0; */
1107 uint16_t specs = 0;
1108 UBool varT = FALSE; UBool top_ = TRUE;
1109 UBool startOfRules = TRUE;
1110 UBool before = FALSE;
1111 UColTokenParser src;
1112 UColOptionSet opts;
1113 UParseError parseError;
1114 UChar *rulesCopy = NULL;
1115 collIterate *c = uprv_new_collIterate(status);
1116 UCAConstants *consts = NULL;
1117 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1118 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1119 const char *colLoc;
1120 UCollator *UCA = ucol_open("root", status);
1121
1122 (void)varT; /* Suppress set but not used warnings. */
1123 (void)prefixLen;
1124 (void)prefixOffset;
1125 (void)exLen;
1126 (void)exOffset;
1127
1128 if (U_FAILURE(*status)) {
1129 log_err("Could not open root collator %s\n", u_errorName(*status));
1130 uprv_delete_collIterate(c);
1131 return;
1132 }
1133
1134 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1135 if (U_FAILURE(*status)) {
1136 log_err("Could not get collator name: %s\n", u_errorName(*status));
1137 ucol_close(UCA);
1138 uprv_delete_collIterate(c);
1139 return;
1140 }
1141
1142 uprv_memset(&src, 0, sizeof(UColTokenParser));
1143
1144 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1145 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1146 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1147 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1148 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1149
1150 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UC OL_NOT_FOUND;
1151
1152 src.opts = &opts;
1153
1154 rules = ucol_getRules(coll, &ruleLen);
1155
1156 src.invUCA = ucol_initInverseUCA(status);
1157
1158 if(indirectBoundariesSet == FALSE) {
1159 /* UCOL_RESET_TOP_VALUE */
1160 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRS T_IMPLICIT);
1161 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1162 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1163 /* UCOL_LAST_PRIMARY_IGNORABLE */
1164 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1165 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1166 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1167 /* UCOL_LAST_SECONDARY_IGNORABLE */
1168 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1169 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1170 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1171 /* UCOL_LAST_TERTIARY_IGNORABLE */
1172 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1173 /* UCOL_FIRST_VARIABLE */
1174 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1175 /* UCOL_LAST_VARIABLE */
1176 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1177 /* UCOL_FIRST_NON_VARIABLE */
1178 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1179 /* UCOL_LAST_NON_VARIABLE */
1180 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIR ST_IMPLICIT);
1181 /* UCOL_FIRST_IMPLICIT */
1182 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1183 /* UCOL_LAST_IMPLICIT */
1184 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_T RAILING);
1185 /* UCOL_FIRST_TRAILING */
1186 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1187 /* UCOL_LAST_TRAILING */
1188 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1189 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<2 4);
1190 indirectBoundariesSet = TRUE;
1191 }
1192
1193
1194 if(U_SUCCESS(*status) && ruleLen > 0) {
1195 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE )*sizeof(UChar));
1196 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1197 src.current = src.source = rulesCopy;
1198 src.end = rulesCopy+ruleLen;
1199 src.extraCurrent = src.end;
1200 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1201
1202 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNext Token can cause the pointer to
1203 the rules copy in src.source to get reallocated, freeing the orig inal pointer in rulesCopy */
1204 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseErro r,status)) != NULL) {
1205 strength = src.parsedToken.strength;
1206 chOffset = src.parsedToken.charsOffset;
1207 chLen = src.parsedToken.charsLen;
1208 exOffset = src.parsedToken.extensionOffset;
1209 exLen = src.parsedToken.extensionLen;
1210 prefixOffset = src.parsedToken.prefixOffset;
1211 prefixLen = src.parsedToken.prefixLen;
1212 specs = src.parsedToken.flags;
1213
1214 startOfRules = FALSE;
1215 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1216 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1217
1218 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1219
1220 currCE = ucol_getNextCE(coll, c, status);
1221 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1222 log_verbose("Thai prevowel detected. Will pick next CE\n");
1223 currCE = ucol_getNextCE(coll, c, status);
1224 }
1225
1226 currContCE = ucol_getNextCE(coll, c, status);
1227 if(!isContinuation(currContCE)) {
1228 currContCE = 0;
1229 }
1230
1231 /* we need to repack CEs here */
1232
1233 if(strength == UCOL_TOK_RESET) {
1234 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1235 if(top_ == TRUE) {
1236 int32_t tokenIndex = src.parsedToken.indirectIndex;
1237
1238 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex ].startCE;
1239 nextContCE = baseContCE = currContCE = ucolIndirectBoundarie s[tokenIndex].startContCE;
1240 } else {
1241 nextCE = baseCE = currCE;
1242 nextContCE = baseContCE = currContCE;
1243 }
1244 maxStrength = UCOL_IDENTICAL;
1245 } else {
1246 if(strength < maxStrength) {
1247 maxStrength = strength;
1248 if(baseCE == UCOL_RESET_TOP_VALUE) {
1249 log_verbose("Resetting to [top]\n");
1250 nextCE = UCOL_NEXT_TOP_VALUE;
1251 nextContCE = UCOL_NEXT_TOP_CONT;
1252 } else {
1253 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, b aseContCE, &nextCE, &nextContCE, maxStrength);
1254 }
1255 if(result < 0) {
1256 if(ucol_isTailored(coll, *(src.source+oldOffset), status )) {
1257 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1258 return;
1259 } else {
1260 log_err("%s: couldn't find the CE\n", colLoc);
1261 return;
1262 }
1263 }
1264 }
1265
1266 currCE &= 0xFFFFFF3F;
1267 currContCE &= 0xFFFFFFBF;
1268
1269 if(maxStrength == UCOL_IDENTICAL) {
1270 if(baseCE != currCE || baseContCE != currContCE) {
1271 log_err("%s: current CE (initial strength UCOL_EQUAL)\n ", colLoc);
1272 }
1273 } else {
1274 if(strength == UCOL_IDENTICAL) {
1275 if(lastCE != currCE || lastContCE != currContCE) {
1276 log_err("%s: current CE (initial strength UCOL_EQUA L)\n", colLoc);
1277 }
1278 } else {
1279 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1280 /*if(currCE > nextCE || (currCE == nextCE && currCon tCE >= nextContCE)) {*/
1281 log_err("%s: current CE is not less than base CE\n", colLoc);
1282 }
1283 if(!before) {
1284 if(compareCEs(currCE, currContCE, lastCE, lastContCE ) < 0) {
1285 /*if(currCE < lastCE || (currCE == lastCE && cur rContCE <= lastContCE)) {*/
1286 log_err("%s: sequence of generated CEs is broken \n", colLoc);
1287 }
1288 } else {
1289 before = FALSE;
1290 if(compareCEs(currCE, currContCE, lastCE, lastContCE ) > 0) {
1291 /*if(currCE < lastCE || (currCE == lastCE && cur rContCE <= lastContCE)) {*/
1292 log_err("%s: sequence of generated CEs is broken \n", colLoc);
1293 }
1294 }
1295 }
1296 }
1297
1298 }
1299
1300 oldOffset = chOffset;
1301 lastCE = currCE & 0xFFFFFF3F;
1302 lastContCE = currContCE & 0xFFFFFFBF;
1303 }
1304 uprv_free(src.source);
1305 uprv_free(src.reorderCodes);
1306 }
1307 ucol_close(UCA);
1308 uprv_delete_collIterate(c);
1309 }
1310
1311 #if 0
1312 /* these locales are now picked from index RB */
1313 static const char* localesToTest[] = {
1314 "ar", "bg", "ca", "cs", "da",
1315 "el", "en_BE", "en_US_POSIX",
1316 "es", "et", "fi", "fr", "hi",
1317 "hr", "hu", "is", "iw", "ja",
1318 "ko", "lt", "lv", "mk", "mt",
1319 "nb", "nn", "nn_NO", "pl", "ro",
1320 "ru", "sh", "sk", "sl", "sq",
1321 "sr", "sv", "th", "tr", "uk",
1322 "vi", "zh", "zh_TW"
1323 };
1324 #endif
1325
1326 static const char* rulesToTest[] = {
1327 /* Funky fa rule */
1328 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1329 /*"& Z < p, P",*/
1330 /* Cui Mins rules */
1331 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1332 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1333 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1334 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F <m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1335 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f, F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1336 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F <m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1337 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f, F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1338 };
1339
1340
1341 static void TestCollations(void) {
1342 int32_t noOfLoc = uloc_countAvailable();
1343 int32_t i = 0, j = 0;
1344
1345 UErrorCode status = U_ZERO_ERROR;
1346 char cName[256];
1347 UChar name[256];
1348 int32_t nameSize;
1349
1350
1351 const char *locName = NULL;
1352 UCollator *coll = NULL;
1353 UCollator *UCA = ucol_open("", &status);
1354 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &stat us);
1355 if (U_FAILURE(status)) {
1356 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(s tatus));
1357 return;
1358 }
1359 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1360
1361 for(i = 0; i<noOfLoc; i++) {
1362 status = U_ZERO_ERROR;
1363 locName = uloc_getAvailable(i);
1364 if(uprv_strcmp("ja", locName) == 0) {
1365 log_verbose("Don't know how to test prefixes\n");
1366 continue;
1367 }
1368 if(hasCollationElements(locName)) {
1369 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1370 for(j = 0; j<nameSize; j++) {
1371 cName[j] = (char)name[j];
1372 }
1373 cName[nameSize] = 0;
1374 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1375 coll = ucol_open(locName, &status);
1376 if(U_SUCCESS(status)) {
1377 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1378 ucol_close(coll);
1379 } else {
1380 log_err("Couldn't instantiate collator for locale %s, error: %s\ n", locName, u_errorName(status));
1381 status = U_ZERO_ERROR;
1382 }
1383 }
1384 }
1385 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1386 ucol_close(UCA);
1387 }
1388
1389 static void RamsRulesTest(void) {
1390 UErrorCode status = U_ZERO_ERROR;
1391 int32_t i = 0;
1392 UCollator *coll = NULL;
1393 UChar rule[2048];
1394 uint32_t ruleLen;
1395 int32_t noOfLoc = uloc_countAvailable();
1396 const char *locName = NULL;
1397
1398 log_verbose("RamsRulesTest\n");
1399
1400 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_get Default())==0) {
1401 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1402 return;
1403 }
1404
1405 for(i = 0; i<noOfLoc; i++) {
1406 locName = uloc_getAvailable(i);
1407 if(hasCollationElements(locName)) {
1408 if (uprv_strcmp("ja", locName)==0) {
1409 log_verbose("Don't know how to test Japanese because of prefixes \n");
1410 continue;
1411 }
1412 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1413 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1414 continue;
1415 }
1416 if (uprv_strcmp("bn", locName)==0 ||
1417 uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */
1418 uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */
1419 uprv_strcmp("en_US_POSIX", locName)==0 ||
1420 uprv_strcmp("fa", locName)==0 || /* Add in #10222 wit h CLDR 24 integration */
1421 uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */
1422 uprv_strcmp("gl", locName)==0 || /* Add due to import per cldrbug 5647 */
1423 uprv_strcmp("gl_ES", locName)==0 || /* Add due to import per cldrbug 5647 */
1424 uprv_strcmp("he", locName)==0 || /* Add due to new ta iloring of \u05F3 vs \u0027 per cldrbug 5576 */
1425 uprv_strcmp("he_IL", locName)==0 || /* Add due to new ta iloring of \u05F3 vs \u0027 per cldrbug 5576 */
1426 uprv_strcmp("km", locName)==0 ||
1427 uprv_strcmp("km_KH", locName)==0 ||
1428 uprv_strcmp("my", locName)==0 ||
1429 uprv_strcmp("ps", locName)==0 || /* Add in #10222 wit h CLDR 24 integration */
1430 uprv_strcmp("si", locName)==0 ||
1431 uprv_strcmp("si_LK", locName)==0 ||
1432 uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */
1433 uprv_strcmp("th", locName)==0 ||
1434 uprv_strcmp("th_TH", locName)==0 ||
1435 uprv_strcmp("zh", locName)==0 ||
1436 uprv_strcmp("zh_Hant", locName)==0
1437 ) {
1438 if(log_knownIssue("6040", NULL)) {
1439 log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1440 continue;
1441 }
1442 }
1443 log_verbose("Testing locale %s\n", locName);
1444 status = U_ZERO_ERROR;
1445 coll = ucol_open(locName, &status);
1446 if(U_SUCCESS(status)) {
1447 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLB ACK_WARNING)) {
1448 if(coll->image->jamoSpecial == TRUE) {
1449 log_err("%s has special JAMOs\n", locName);
1450 }
1451 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1452 testCollator(coll, &status);
1453 testCEs(coll, &status);
1454 } else {
1455 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1456 }
1457 ucol_close(coll);
1458 } else {
1459 log_err("Could not open %s: %s\n", locName, u_errorName(status));
1460 }
1461 }
1462 }
1463
1464 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1465 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1466 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1467 status = U_ZERO_ERROR;
1468 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat us);
1469 if(U_SUCCESS(status)) {
1470 testCollator(coll, &status);
1471 testCEs(coll, &status);
1472 ucol_close(coll);
1473 } else {
1474 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName( status), rulesToTest[i]);
1475 }
1476 }
1477
1478 }
1479
1480 static void IsTailoredTest(void) {
1481 UErrorCode status = U_ZERO_ERROR;
1482 uint32_t i = 0;
1483 UCollator *coll = NULL;
1484 UChar rule[2048];
1485 UChar tailored[2048];
1486 UChar notTailored[2048];
1487 uint32_t ruleLen, tailoredLen, notTailoredLen;
1488
1489 log_verbose("IsTailoredTest\n");
1490
1491 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1492 ruleLen = u_strlen(rule);
1493
1494 u_uastrcpy(tailored, "ABCcd");
1495 tailoredLen = u_strlen(tailored);
1496
1497 u_uastrcpy(notTailored, "ZabD");
1498 notTailoredLen = u_strlen(notTailored);
1499
1500 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1501 if(U_SUCCESS(status)) {
1502 for(i = 0; i<tailoredLen; i++) {
1503 if(!ucol_isTailored(coll, tailored[i], &status)) {
1504 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1505 }
1506 }
1507 for(i = 0; i<notTailoredLen; i++) {
1508 if(ucol_isTailored(coll, notTailored[i], &status)) {
1509 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1510 }
1511 }
1512 ucol_close(coll);
1513 }
1514 else {
1515 log_err_status(status, "Can't tailor rules\n");
1516 }
1517 /* Code coverage */
1518 status = U_ZERO_ERROR;
1519 coll = ucol_open("ja", &status);
1520 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1521 log_err_status(status, "0x4E9C should be tailored - it is reported as no t\n");
1522 }
1523 ucol_close(coll);
1524 }
1525
1526
1527 const static char chTest[][20] = { 415 const static char chTest[][20] = {
1528 "c", 416 "c",
1529 "C", 417 "C",
1530 "ca", "cb", "cx", "cy", "CZ", 418 "ca", "cb", "cx", "cy", "CZ",
1531 "c\\u030C", "C\\u030C", 419 "c\\u030C", "C\\u030C",
1532 "h", 420 "h",
1533 "H", 421 "H",
1534 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 422 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1535 "ch", "cH", "Ch", "CH", 423 "ch", "cH", "Ch", "CH",
1536 "cha", "charly", "che", "chh", "chch", "chr", 424 "cha", "charly", "che", "chh", "chch", "chr",
(...skipping 29 matching lines...) Expand all
1566 } 454 }
1567 else { 455 else {
1568 log_data_err("Can't open collator"); 456 log_data_err("Can't open collator");
1569 } 457 }
1570 ucol_close(coll); 458 ucol_close(coll);
1571 } 459 }
1572 460
1573 461
1574 462
1575 463
464 /*
1576 const static char impTest[][20] = { 465 const static char impTest[][20] = {
1577 "\\u4e00", 466 "\\u4e00",
1578 "a", 467 "a",
1579 "A", 468 "A",
1580 "b", 469 "b",
1581 "B", 470 "B",
1582 "\\u4e01" 471 "\\u4e01"
1583 }; 472 };
473 */
1584 474
1585 475
1586 static void TestImplicitTailoring(void) { 476 static void TestImplicitTailoring(void) {
1587 static const struct { 477 static const struct {
1588 const char *rules; 478 const char *rules;
1589 const char *data[10]; 479 const char *data[10];
1590 const uint32_t len; 480 const uint32_t len;
1591 } tests[] = { 481 } tests[] = {
1592 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b" , "c", "\\u4e00"}, 5 }, 482 {
483 /* Tailor b and c before U+4E00. */
484 "&[before 1]\\u4e00 < b < c "
485 /* Now, before U+4E00 is c; put d and e after that. */
486 "&[before 1]\\u4e00 < d < e",
487 { "b", "c", "d", "e", "\\u4e00"}, 5 },
1593 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4 e01"}, 6 }, 488 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4 e01"}, 6 },
1594 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e 00"}, 3}, 489 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e 00"}, 3},
1595 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e 01"}, 3} 490 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e 01"}, 3}
1596 }; 491 };
1597 492
1598 int32_t i = 0; 493 int32_t i = 0;
1599 494
1600 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 495 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1601 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 496 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1602 } 497 }
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after
1927 "af", 822 "af",
1928 "Af", 823 "Af",
1929 "b", 824 "b",
1930 "B" 825 "B"
1931 }; 826 };
1932 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 827 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1933 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat a)/sizeof(data[0])); 828 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat a)/sizeof(data[0]));
1934 } 829 }
1935 830
1936 831
1937 /*
1938 "& a < b < c < d& r < c", "& a < b < d& r < c" ,
1939 "& a < b < c < d& c < m", "& a < b < c < m < d ",
1940 "& a < b < c < d& a < m", "& a < m < b < c < d ",
1941 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1942 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d ",
1943 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d < << x <<< e",
1944 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1945 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1946 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d < << e <<< f < x < g",
1947 */
1948 static void TestRedundantRules(void) {
1949 int32_t i;
1950
1951 static const struct {
1952 const char *rules;
1953 const char *expectedRules;
1954 const char *testdata[8];
1955 uint32_t testdatalen;
1956 } tests[] = {
1957 /* this test conflicts with positioning of CODAN placeholder */
1958 /*{
1959 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1960 "&\\u2089<<<x",
1961 {"\\u2089", "x"}, 2
1962 }, */
1963 /* this test conflicts with the [before x] syntax tightening */
1964 /*{
1965 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1966 "&\\u0252<<<x",
1967 {"\\u0252", "x"}, 2
1968 }, */
1969 /* this test conflicts with the [before x] syntax tightening */
1970 /*{
1971 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1972 "& a <<< x < b <<< c << d <<< e",
1973 {"a", "x", "b", "c", "d", "e"}, 6
1974 }, */
1975 {
1976 "& a < b < c < d& [before 1] c < m",
1977 "& a < b < m < c < d",
1978 {"a", "b", "m", "c", "d"}, 5
1979 },
1980 {
1981 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1982 "& a < b <<< c << d <<< x <<< e",
1983 {"a", "b", "c", "d", "x", "e"}, 6
1984 },
1985 /* this test conflicts with the [before x] syntax tightening */
1986 /* {
1987 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1988 "& a < b <<< c <<< x << d <<< e",
1989 {"a", "b", "c", "x", "d", "e"},, 6
1990 }, */
1991 {
1992 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1993 "& a < b <<< c << d <<< e <<< f < x < g",
1994 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1995 },
1996 {
1997 "& a <<< b << c < d& a < m",
1998 "& a <<< b << c < m < d",
1999 {"a", "b", "c", "m", "d"}, 5
2000 },
2001 {
2002 "&a<b<<b\\u0301 &z<b",
2003 "&a<b\\u0301 &z<b",
2004 {"a", "b\\u0301", "z", "b"}, 4
2005 },
2006 {
2007 "&z<m<<<q<<<m",
2008 "&z<q<<<m",
2009 {"z", "q", "m"},3
2010 },
2011 {
2012 "&z<<<m<q<<<m",
2013 "&z<q<<<m",
2014 {"z", "q", "m"}, 3
2015 },
2016 {
2017 "& a < b < c < d& r < c",
2018 "& a < b < d& r < c",
2019 {"a", "b", "d"}, 3
2020 },
2021 {
2022 "& a < b < c < d& r < c",
2023 "& a < b < d& r < c",
2024 {"r", "c"}, 2
2025 },
2026 {
2027 "& a < b < c < d& c < m",
2028 "& a < b < c < m < d",
2029 {"a", "b", "c", "m", "d"}, 5
2030 },
2031 {
2032 "& a < b < c < d& a < m",
2033 "& a < m < b < c < d",
2034 {"a", "m", "b", "c", "d"}, 5
2035 }
2036 };
2037
2038
2039 UCollator *credundant = NULL;
2040 UCollator *cresulting = NULL;
2041 UErrorCode status = U_ZERO_ERROR;
2042 UChar rlz[2048] = { 0 };
2043 uint32_t rlen = 0;
2044
2045 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2046 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i] .expectedRules);
2047 rlen = u_unescape(tests[i].rules, rlz, 2048);
2048
2049 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);
2050 if(status == U_FILE_ACCESS_ERROR) {
2051 log_data_err("Is your data around?\n");
2052 return;
2053 } else if(U_FAILURE(status)) {
2054 log_err("Error opening collator\n");
2055 return;
2056 }
2057
2058 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2059 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);
2060
2061 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2062
2063 ucol_close(credundant);
2064 ucol_close(cresulting);
2065
2066 log_verbose("testing using data\n");
2067
2068 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen) ;
2069 }
2070
2071 }
2072
2073 static void TestExpansionSyntax(void) {
2074 int32_t i;
2075
2076 const static char *rules[] = {
2077 "&AE <<< a << b <<< c &d <<< f",
2078 "&AE <<< a <<< b << c << d < e < f <<< g",
2079 "&AE <<< B <<< C / D <<< F"
2080 };
2081
2082 const static char *expectedRules[] = {
2083 "&A <<< a / E << b / E <<< c /E &d <<< f",
2084 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2085 "&A <<< B / E <<< C / ED <<< F / E"
2086 };
2087
2088 const static char *testdata[][8] = {
2089 {"AE", "a", "b", "c"},
2090 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2091 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2092 };
2093
2094 const static uint32_t testdatalen[] = {
2095 4,
2096 8,
2097 3
2098 };
2099
2100
2101
2102 UCollator *credundant = NULL;
2103 UCollator *cresulting = NULL;
2104 UErrorCode status = U_ZERO_ERROR;
2105 UChar rlz[2048] = { 0 };
2106 uint32_t rlen = 0;
2107
2108 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2109 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[ i]);
2110 rlen = u_unescape(rules[i], rlz, 2048);
2111
2112 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st atus);
2113 if(status == U_FILE_ACCESS_ERROR) {
2114 log_data_err("Is your data around?\n");
2115 return;
2116 } else if(U_FAILURE(status)) {
2117 log_err("Error opening collator\n");
2118 return;
2119 }
2120 rlen = u_unescape(expectedRules[i], rlz, 2048);
2121 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);
2122
2123 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2124 /* as a hard error test, but only in information mode */
2125 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2126
2127 ucol_close(credundant);
2128 ucol_close(cresulting);
2129
2130 log_verbose("testing using data\n");
2131
2132 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2133 }
2134 }
2135
2136 static void TestCase(void) 832 static void TestCase(void)
2137 { 833 {
2138 const static UChar gRules[MAX_TOKEN_LEN] = 834 const static UChar gRules[MAX_TOKEN_LEN] =
2139 /*" & 0 < 1,\u2461<a,A"*/ 835 /*" & 0 < 1,\u2461<a,A"*/
2140 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x 0041, 0x0000 }; 836 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x 0041, 0x0000 };
2141 837
2142 const static UChar testCase[][MAX_TOKEN_LEN] = 838 const static UChar testCase[][MAX_TOKEN_LEN] =
2143 { 839 {
2144 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 840 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2145 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 841 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
2239 "CHa", 935 "CHa",
2240 "ChA", 936 "ChA",
2241 "Cha", 937 "Cha",
2242 "chA", 938 "chA",
2243 "cha", 939 "cha",
2244 "I", 940 "I",
2245 "i" 941 "i"
2246 }; 942 };
2247 log_verbose("mixed case test\n"); 943 log_verbose("mixed case test\n");
2248 log_verbose("lower first, case level off\n"); 944 log_verbose("lower first, case level off\n");
2249 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof (lowerFirst)/sizeof(lowerFirst[0])); 945 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof (lowerFirst)/sizeof(lowerFirst[0]));
2250 log_verbose("upper first, case level off\n"); 946 log_verbose("upper first, case level off\n");
2251 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof (upperFirst)/sizeof(upperFirst[0])); 947 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof (upperFirst)/sizeof(upperFirst[0]));
2252 log_verbose("lower first, case level on\n"); 948 log_verbose("lower first, case level on\n");
2253 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowe rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 949 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowe rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2254 log_verbose("upper first, case level on\n"); 950 log_verbose("upper first, case level on\n");
2255 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", uppe rFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 951 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", uppe rFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2256 } 952 }
2257 953
2258 } 954 }
2259 955
2260 static void TestIncrementalNormalize(void) { 956 static void TestIncrementalNormalize(void) {
2261 957
2262 /*UChar baseA =0x61;*/ 958 /*UChar baseA =0x61;*/
2263 UChar baseA =0x41; 959 UChar baseA =0x41;
2264 /* UChar baseB = 0x42;*/ 960 /* UChar baseB = 0x42;*/
2265 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 961 static const UChar ccMix[] = {0x316, 0x321, 0x300};
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
2553 } 1249 }
2554 1250
2555 log_verbose("Using start of korean rules\n"); 1251 log_verbose("Using start of korean rules\n");
2556 1252
2557 if(U_SUCCESS(status)) { 1253 if(U_SUCCESS(status)) {
2558 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0 ])); 1254 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0 ]));
2559 } else { 1255 } else {
2560 log_err("Unable to open collator with rules %s\n", rules); 1256 log_err("Unable to open collator with rules %s\n", rules);
2561 } 1257 }
2562 1258
2563 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2564 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2565 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]) );
2566
2567 ucol_close(coll); 1259 ucol_close(coll);
2568 1260
2569 log_verbose("Using ko__LOTUS locale\n"); 1261 log_verbose("Using ko__LOTUS locale\n");
2570 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean Data[0])); 1262 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean Data[0]));
2571 } 1263 }
2572 1264
1265 /*
1266 * The secondary/tertiary compression middle byte
1267 * as used by the current implementation.
1268 * Subject to change as the sort key compression changes.
1269 * See class CollationKeys.
1270 */
1271 enum {
1272 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1273 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1274 };
1275
2573 static void TestCompressOverlap(void) { 1276 static void TestCompressOverlap(void) {
2574 UChar secstr[150]; 1277 UChar secstr[150];
2575 UChar tertstr[150]; 1278 UChar tertstr[150];
2576 UErrorCode status = U_ZERO_ERROR; 1279 UErrorCode status = U_ZERO_ERROR;
2577 UCollator *coll; 1280 UCollator *coll;
2578 char result[200]; 1281 uint8_t result[500];
2579 uint32_t resultlen; 1282 uint32_t resultlen;
2580 int count = 0; 1283 int count = 0;
2581 char *tempptr; 1284 uint8_t *tempptr;
2582 1285
2583 coll = ucol_open("", &status); 1286 coll = ucol_open("", &status);
2584 1287
2585 if (U_FAILURE(status)) { 1288 if (U_FAILURE(status)) {
2586 log_err_status(status, "Collator can't be created -> %s\n", u_errorName( status)); 1289 log_err_status(status, "Collator can't be created -> %s\n", u_errorName( status));
2587 return; 1290 return;
2588 } 1291 }
2589 while (count < 149) { 1292 while (count < 149) {
2590 secstr[count] = 0x0020; /* [06, 05, 05] */ 1293 secstr[count] = 0x0020; /* [06, 05, 05] */
2591 tertstr[count] = 0x0020; 1294 tertstr[count] = 0x0020;
2592 count ++; 1295 count ++;
2593 } 1296 }
2594 1297
2595 /* top down compression ----------------------------------- */ 1298 /* top down compression ----------------------------------- */
2596 secstr[count] = 0x0332; /* [, 87, 05] */ 1299 secstr[count] = 0x0332; /* [, 87, 05] */
2597 tertstr[count] = 0x3000; /* [06, 05, 07] */ 1300 tertstr[count] = 0x3000; /* [06, 05, 07] */
2598 1301
2599 /* no compression secstr should have 150 secondary bytes, tertstr should 1302 /* no compression secstr should have 150 secondary bytes, tertstr should
2600 have 150 tertiary bytes. 1303 have 150 tertiary bytes.
2601 with correct overlapping compression, secstr should have 4 secondary 1304 with correct compression, secstr should have 6 secondary
2602 bytes, tertstr should have > 2 tertiary bytes */ 1305 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes * /
2603 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 1306 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
2604 (void)resultlen; /* Suppress set but not used warning. */ 1307 (void)resultlen; /* Suppress set but not used warning. */
2605 tempptr = uprv_strchr(result, 1) + 1; 1308 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
2606 while (*(tempptr + 1) != 1) { 1309 while (*(tempptr + 1) != 1) {
2607 /* the last secondary collation element is not checked since it is not 1310 /* the last secondary collation element is not checked since it is not
2608 part of the compression */ 1311 part of the compression */
2609 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { 1312 if (*tempptr < SEC_COMMON_MIDDLE) {
2610 log_err("Secondary compression overlapped\n"); 1313 log_err("Secondary top down compression overlapped\n");
2611 } 1314 }
2612 tempptr ++; 1315 tempptr ++;
2613 } 1316 }
2614 1317
2615 /* tertiary top/bottom/common for en_US is similar to the secondary 1318 /* tertiary top/bottom/common for en_US is similar to the secondary
2616 top/bottom/common */ 1319 top/bottom/common */
2617 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 1320 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
2618 tempptr = uprv_strrchr(result, 1) + 1; 1321 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
2619 while (*(tempptr + 1) != 0) { 1322 while (*(tempptr + 1) != 0) {
2620 /* the last secondary collation element is not checked since it is not 1323 /* the last secondary collation element is not checked since it is not
2621 part of the compression */ 1324 part of the compression */
2622 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { 1325 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
2623 log_err("Tertiary compression overlapped\n"); 1326 log_err("Tertiary top down compression overlapped\n");
2624 } 1327 }
2625 tempptr ++; 1328 tempptr ++;
2626 } 1329 }
2627 1330
2628 /* bottom up compression ------------------------------------- */ 1331 /* bottom up compression ------------------------------------- */
2629 secstr[count] = 0; 1332 secstr[count] = 0;
2630 tertstr[count] = 0; 1333 tertstr[count] = 0;
2631 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 1334 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
2632 tempptr = uprv_strchr(result, 1) + 1; 1335 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
2633 while (*(tempptr + 1) != 1) { 1336 while (*(tempptr + 1) != 1) {
2634 /* the last secondary collation element is not checked since it is not 1337 /* the last secondary collation element is not checked since it is not
2635 part of the compression */ 1338 part of the compression */
2636 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { 1339 if (*tempptr > SEC_COMMON_MIDDLE) {
2637 log_err("Secondary compression overlapped\n"); 1340 log_err("Secondary bottom up compression overlapped\n");
2638 } 1341 }
2639 tempptr ++; 1342 tempptr ++;
2640 } 1343 }
2641 1344
2642 /* tertiary top/bottom/common for en_US is similar to the secondary 1345 /* tertiary top/bottom/common for en_US is similar to the secondary
2643 top/bottom/common */ 1346 top/bottom/common */
2644 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 1347 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
2645 tempptr = uprv_strrchr(result, 1) + 1; 1348 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
2646 while (*(tempptr + 1) != 0) { 1349 while (*(tempptr + 1) != 0) {
2647 /* the last secondary collation element is not checked since it is not 1350 /* the last secondary collation element is not checked since it is not
2648 part of the compression */ 1351 part of the compression */
2649 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { 1352 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
2650 log_err("Tertiary compression overlapped\n"); 1353 log_err("Tertiary bottom up compression overlapped\n");
2651 } 1354 }
2652 tempptr ++; 1355 tempptr ++;
2653 } 1356 }
2654 1357
2655 ucol_close(coll); 1358 ucol_close(coll);
2656 } 1359 }
2657 1360
2658 static void TestCyrillicTailoring(void) { 1361 static void TestCyrillicTailoring(void) {
2659 static const char *test[] = { 1362 static const char *test[] = {
2660 "\\u0410b", 1363 "\\u0410b",
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
2700 const static UChar testdata[][2] = { 1403 const static UChar testdata[][2] = {
2701 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 1404 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2702 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 1405 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2703 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 1406 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2704 }; 1407 };
2705 const static UChar testdata2[][2] = { 1408 const static UChar testdata2[][2] = {
2706 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 1409 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2707 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2708 {0x0063 /* 'c' */, 0x006C /* 'l' */} 1411 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2709 }; 1412 };
1413 #if 0
1414 /*
1415 * These pairs of rule strings are not guaranteed to yield the very same map pings.
1416 * In fact, LDML 24 recommends an improved way of creating mappings
1417 * which always yields different mappings for such pairs. See
1418 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419 */
2710 const static char *testrules3[] = { 1420 const static char *testrules3[] = {
2711 "&z < xyz &xyzw << B", 1421 "&z < xyz &xyzw << B",
2712 "&z < xyz &xyz << B / w", 1422 "&z < xyz &xyz << B / w",
2713 "&z < ch &achm << B", 1423 "&z < ch &achm << B",
2714 "&z < ch &a << B / chm", 1424 "&z < ch &a << B / chm",
2715 "&\\ud800\\udc00w << B", 1425 "&\\ud800\\udc00w << B",
2716 "&\\ud800\\udc00 << B / w", 1426 "&\\ud800\\udc00 << B / w",
2717 "&a\\ud800\\udc00m << B", 1427 "&a\\ud800\\udc00m << B",
2718 "&a << B / \\ud800\\udc00m", 1428 "&a << B / \\ud800\\udc00m",
2719 }; 1429 };
1430 #endif
2720 1431
2721 UErrorCode status = U_ZERO_ERROR; 1432 UErrorCode status = U_ZERO_ERROR;
2722 UCollator *coll; 1433 UCollator *coll;
2723 UChar rule[256] = {0}; 1434 UChar rule[256] = {0};
2724 uint32_t rlen = 0; 1435 uint32_t rlen = 0;
2725 int i; 1436 int i;
2726 1437
2727 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 1438 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2728 UCollationElements *iter1; 1439 UCollationElements *iter1;
2729 int j = 0; 1440 int j = 0;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
2775 testdata2[1][1]); 1486 testdata2[1][1]);
2776 return; 1487 return;
2777 } 1488 }
2778 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 1489 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2779 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 1490 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2780 testdata2[1][0], testdata2[1][1], testdata2[2][0], 1491 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2781 testdata2[2][1]); 1492 testdata2[2][1]);
2782 return; 1493 return;
2783 } 1494 }
2784 ucol_close(coll); 1495 ucol_close(coll);
2785 1496 #if 0 /* see above */
2786 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 1497 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
1498 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], t estrules3[i + 1]);
2787 UCollator *coll1, 1499 UCollator *coll1,
2788 *coll2; 1500 *coll2;
2789 UCollationElements *iter1, 1501 UCollationElements *iter1,
2790 *iter2; 1502 *iter2;
2791 UChar ch = 0x0042 /* 'B' */; 1503 UChar ch = 0x0042 /* 'B' */;
2792 uint32_t ce; 1504 uint32_t ce;
2793 rlen = u_unescape(testrules3[i], rule, 32); 1505 rlen = u_unescape(testrules3[i], rule, 32);
2794 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ; 1506 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;
2795 rlen = u_unescape(testrules3[i + 1], rule, 32); 1507 rlen = u_unescape(testrules3[i + 1], rule, 32);
2796 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ; 1508 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;
2797 if (U_FAILURE(status)) { 1509 if (U_FAILURE(status)) {
2798 log_err("Collator creation failed %s\n", testrules[i]); 1510 log_err("Collator creation failed %s\n", testrules[i]);
2799 return; 1511 return;
2800 } 1512 }
2801 iter1 = ucol_openElements(coll1, &ch, 1, &status); 1513 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2802 iter2 = ucol_openElements(coll2, &ch, 1, &status); 1514 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2803 if (U_FAILURE(status)) { 1515 if (U_FAILURE(status)) {
2804 log_err("Collation iterator creation failed\n"); 1516 log_err("Collation iterator creation failed\n");
2805 return; 1517 return;
2806 } 1518 }
2807 ce = ucol_next(iter1, &status); 1519 ce = ucol_next(iter1, &status);
2808 if (U_FAILURE(status)) { 1520 if (U_FAILURE(status)) {
2809 log_err("Retrieving ces failed\n"); 1521 log_err("Retrieving ces failed\n");
2810 return; 1522 return;
2811 } 1523 }
2812 while (ce != UCOL_NULLORDER) { 1524 while (ce != UCOL_NULLORDER) {
2813 if (ce != (uint32_t)ucol_next(iter2, &status)) { 1525 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
2814 log_err("CEs does not match\n"); 1526 if (ce == ce2) {
1527 log_verbose("CEs match: %08x\n", ce);
1528 } else {
1529 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
2815 return; 1530 return;
2816 } 1531 }
2817 ce = ucol_next(iter1, &status); 1532 ce = ucol_next(iter1, &status);
2818 if (U_FAILURE(status)) { 1533 if (U_FAILURE(status)) {
2819 log_err("Retrieving ces failed\n"); 1534 log_err("Retrieving ces failed\n");
2820 return; 1535 return;
2821 } 1536 }
2822 } 1537 }
2823 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 1538 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2824 log_err("CEs not exhausted\n"); 1539 log_err("CEs not exhausted\n");
2825 return; 1540 return;
2826 } 1541 }
2827 ucol_closeElements(iter1); 1542 ucol_closeElements(iter1);
2828 ucol_closeElements(iter2); 1543 ucol_closeElements(iter2);
2829 ucol_close(coll1); 1544 ucol_close(coll1);
2830 ucol_close(coll2); 1545 ucol_close(coll2);
2831 } 1546 }
1547 #endif
2832 } 1548 }
2833 1549
2834 static void TestExpansion(void) { 1550 static void TestExpansion(void) {
2835 const static char *testrules[] = { 1551 const static char *testrules[] = {
1552 #if 0
1553 /*
1554 * This seems to have tested that M was not mapped to an expansion.
1555 * I believe the old builder just did that because it computed the exten sion CEs
1556 * at the very end, which was a bug.
1557 * Among other problems, it violated the core tailoring principle
1558 * by making an earlier rule depend on a later one.
1559 * And, of course, if M did not get an expansion, then it was primary di fferent from K,
1560 * unlike what the rule &K<<M says.
1561 */
2836 "&J << K / B & K << M", 1562 "&J << K / B & K << M",
1563 #endif
2837 "&J << K / B << M" 1564 "&J << K / B << M"
2838 }; 1565 };
2839 const static UChar testdata[][3] = { 1566 const static UChar testdata[][3] = {
2840 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 1567 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2841 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 1568 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2842 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 1569 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2843 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 1570 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2844 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 1571 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2845 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 1572 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2846 }; 1573 };
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
2976 (void)klen; /* Suppress set but not used warning. */ 1703 (void)klen; /* Suppress set but not used warning. */
2977 1704
2978 ucol_close(coll); 1705 ucol_close(coll);
2979 } else { 1706 } else {
2980 log_data_err("Couldn't open UCA\n"); 1707 log_data_err("Couldn't open UCA\n");
2981 } 1708 }
2982 } 1709 }
2983 1710
2984 static void TestVariableTopSetting(void) { 1711 static void TestVariableTopSetting(void) {
2985 UErrorCode status = U_ZERO_ERROR; 1712 UErrorCode status = U_ZERO_ERROR;
2986 const UChar *current = NULL;
2987 uint32_t varTopOriginal = 0, varTop1, varTop2; 1713 uint32_t varTopOriginal = 0, varTop1, varTop2;
2988 UCollator *coll = ucol_open("", &status); 1714 UCollator *coll = ucol_open("", &status);
2989 if(U_SUCCESS(status)) { 1715 if(U_SUCCESS(status)) {
2990 1716
2991 uint32_t strength = 0; 1717 static const UChar nul = 0;
2992 uint16_t specs = 0; 1718 static const UChar space = 0x20;
2993 uint32_t chOffset = 0; 1719 static const UChar dot = 0x2e; /* punctuation */
2994 uint32_t chLen = 0; 1720 static const UChar degree = 0xb0; /* symbol */
2995 uint32_t exOffset = 0; 1721 static const UChar dollar = 0x24; /* currency symbol */
2996 uint32_t exLen = 0; 1722 static const UChar zero = 0x30; /* digit */
2997 uint32_t oldChOffset = 0;
2998 uint32_t oldChLen = 0;
2999 uint32_t oldExOffset = 0;
3000 uint32_t oldExLen = 0;
3001 uint32_t prefixOffset = 0;
3002 uint32_t prefixLen = 0;
3003 1723
3004 UBool startOfRules = TRUE; 1724 varTopOriginal = ucol_getVariableTop(coll, &status);
3005 UColTokenParser src; 1725 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
3006 UColOptionSet opts; 1726 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3007 1727
3008 UChar *rulesCopy = NULL; 1728 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
3009 uint32_t rulesLen; 1729 varTop2 = ucol_getVariableTop(coll, &status);
3010 1730 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
3011 UCollationResult result; 1731 if(U_FAILURE(status) || varTop1 != varTop2 ||
3012 1732 !ucol_equal(coll, &nul, 0, &space, 1) ||
3013 UChar first[256] = { 0 }; 1733 ucol_equal(coll, &nul, 0, &dot, 1) ||
3014 UChar second[256] = { 0 }; 1734 ucol_equal(coll, &nul, 0, &degree, 1) ||
3015 UParseError parseError; 1735 ucol_equal(coll, &nul, 0, &dollar, 1) ||
3016 int32_t myQ = getTestOption(QUICK_OPTION); 1736 ucol_equal(coll, &nul, 0, &zero, 1) ||
3017 1737 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
3018 (void)prefixLen; /* Suppress set but not used warnings. */ 1738 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status ));
3019 (void)prefixOffset;
3020 (void)specs;
3021
3022 uprv_memset(&src, 0, sizeof(UColTokenParser));
3023
3024 src.opts = &opts;
3025
3026 if(getTestOption(QUICK_OPTION) <= 0) {
3027 setTestOption(QUICK_OPTION, 1);
3028 } 1739 }
3029 1740
3030 /* this test will fail when normalization is turned on */ 1741 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
3031 /* therefore we always turn off exhaustive mode for it */ 1742 varTop2 = ucol_getVariableTop(coll, &status);
3032 { /* QUICK > 0*/ 1743 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
3033 log_verbose("Slide variable top over UCARules\n"); 1744 if(U_FAILURE(status) || varTop1 != varTop2 ||
3034 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); 1745 !ucol_equal(coll, &nul, 0, &space, 1) ||
3035 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*s izeof(UChar)); 1746 !ucol_equal(coll, &nul, 0, &dot, 1) ||
3036 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_T OK_EXTRA_RULE_SPACE_SIZE); 1747 ucol_equal(coll, &nul, 0, &degree, 1) ||
3037 1748 ucol_equal(coll, &nul, 0, &dollar, 1) ||
3038 if(U_SUCCESS(status) && rulesLen > 0) { 1749 ucol_equal(coll, &nul, 0, &zero, 1) ||
3039 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 1750 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
3040 src.current = src.source = rulesCopy; 1751 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status)) ;
3041 src.end = rulesCopy+rulesLen;
3042 src.extraCurrent = src.end;
3043 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3044
3045 » /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextTo ken can cause the pointer to
3046 » the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3047 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, &status)) != NULL) {
3048 strength = src.parsedToken.strength;
3049 chOffset = src.parsedToken.charsOffset;
3050 chLen = src.parsedToken.charsLen;
3051 exOffset = src.parsedToken.extensionOffset;
3052 exLen = src.parsedToken.extensionLen;
3053 prefixOffset = src.parsedToken.prefixOffset;
3054 prefixLen = src.parsedToken.prefixLen;
3055 specs = src.parsedToken.flags;
3056
3057 startOfRules = FALSE;
3058 {
3059 log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3060 }
3061 if(strength == UCOL_PRIMARY) {
3062 status = U_ZERO_ERROR;
3063 varTopOriginal = ucol_getVariableTop(coll, &status);
3064 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3065 if(U_FAILURE(status)) {
3066 char buffer[256];
3067 char *buf = buffer;
3068 uint32_t i = 0, j;
3069 uint32_t CE = UCOL_NO_MORE_CES;
3070
3071 /* before we start screaming, let's see if there is a problem with t he rules */
3072 UErrorCode collIterateStatus = U_ZERO_ERROR;
3073 collIterate *s = uprv_new_collIterate(&collIterateStatus);
3074 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &co llIterateStatus);
3075
3076 CE = ucol_getNextCE(coll, s, &status);
3077 (void)CE; /* Suppress set but not used warning. */
3078
3079 for(i = 0; i < oldChLen; i++) {
3080 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3081 buf += j;
3082 }
3083 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3084 log_verbose("= Expected failure for %s =", buffer);
3085 } else {
3086 if(uprv_collIterateAtEnd(s)) {
3087 log_err("Unexpected failure setting variable top at offset %d. E rror %s. Codepoints: %s\n",
3088 oldChOffset, u_errorName(status), buffer);
3089 } else {
3090 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3091 buffer);
3092 }
3093 }
3094 uprv_delete_collIterate(s);
3095 }
3096 varTop2 = ucol_getVariableTop(coll, &status);
3097 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3098 log_err("cannot retrieve set varTop value!\n");
3099 continue;
3100 }
3101
3102 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3103
3104 u_strncpy(first, src.source+oldChOffset, oldChLen);
3105 u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3106 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3107 first[2*oldChLen+chLen] = 0;
3108
3109 if(oldExLen == 0) {
3110 u_strncpy(second, src.source+chOffset, chLen);
3111 second[chLen] = 0;
3112 } else { /* This is skipped momentarily, but should work once UCARul es are fully UCA conformant */
3113 u_strncpy(second, src.source+oldExOffset, oldExLen);
3114 u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3115 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen) ;
3116 second[2*oldExLen+chLen] = 0;
3117 }
3118 result = ucol_strcoll(coll, first, -1, second, -1);
3119 if(result == UCOL_EQUAL) {
3120 doTest(coll, first, second, UCOL_EQUAL);
3121 } else {
3122 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src .source+oldChOffset), *(src.source+chOffset));
3123 }
3124 }
3125 }
3126 if(strength != UCOL_TOK_RESET) {
3127 oldChOffset = chOffset;
3128 oldChLen = chLen;
3129 oldExOffset = exOffset;
3130 oldExLen = exLen;
3131 }
3132 }
3133 status = U_ZERO_ERROR;
3134 }
3135 else {
3136 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3137 return;
3138 }
3139 if (U_FAILURE(status)) {
3140 log_err("Error parsing rules %s\n", u_errorName(status));
3141 return;
3142 }
3143 status = U_ZERO_ERROR;
3144 } 1752 }
3145 1753
3146 setTestOption(QUICK_OPTION, myQ); 1754 varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1755 varTop2 = ucol_getVariableTop(coll, &status);
1756 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757 if(U_FAILURE(status) || varTop1 != varTop2 ||
1758 !ucol_equal(coll, &nul, 0, &space, 1) ||
1759 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1761 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762 ucol_equal(coll, &nul, 0, &zero, 1) ||
1763 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1764 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(statu s));
1765 }
1766
1767 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768 varTop2 = ucol_getVariableTop(coll, &status);
1769 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770 if(U_FAILURE(status) || varTop1 != varTop2 ||
1771 !ucol_equal(coll, &nul, 0, &space, 1) ||
1772 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1774 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775 ucol_equal(coll, &nul, 0, &zero, 1) ||
1776 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(statu s));
1778 }
3147 1779
3148 log_verbose("Testing setting variable top to contractions\n"); 1780 log_verbose("Testing setting variable top to contractions\n");
3149 { 1781 {
3150 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUC ACombos); 1782 UChar first[4] = { 0 };
3151 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3152 while(*conts != 0) {
3153 /*
3154 * A continuation is NUL-terminated and NUL-padded
3155 * except if it has the maximum length.
3156 */
3157 int32_t contractionLength = maxUCAContractionLength;
3158 while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3159 --contractionLength;
3160 }
3161 if(*(conts+1)==0) { /* pre-context */
3162 varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3163 } else {
3164 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3165 }
3166 if(U_FAILURE(status)) {
3167 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3168 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3169 * therefore it is not an error when it complains about them. */
3170 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3171 *conts, *(conts+1), *(conts+2));
3172 } else {
3173 log_err("Couldn't set variable top to a contraction %04X %04X %04X - % s\n",
3174 *conts, *(conts+1), *(conts+2), u_errorName(status));
3175 }
3176 status = U_ZERO_ERROR;
3177 }
3178 conts+=maxUCAContractionLength;
3179 }
3180
3181 status = U_ZERO_ERROR;
3182
3183 first[0] = 0x0040; 1783 first[0] = 0x0040;
3184 first[1] = 0x0050; 1784 first[1] = 0x0050;
3185 first[2] = 0x0000; 1785 first[2] = 0x0000;
3186 1786
1787 status = U_ZERO_ERROR;
3187 ucol_setVariableTop(coll, first, -1, &status); 1788 ucol_setVariableTop(coll, first, -1, &status);
3188 1789
3189 if(U_SUCCESS(status)) { 1790 if(U_SUCCESS(status)) {
3190 log_err("Invalid contraction succeded in setting variable top!\n"); 1791 log_err("Invalid contraction succeded in setting variable top!\n");
3191 } 1792 }
3192 1793
3193 } 1794 }
3194 1795
3195 log_verbose("Test restoring variable top\n"); 1796 log_verbose("Test restoring variable top\n");
3196 1797
3197 status = U_ZERO_ERROR; 1798 status = U_ZERO_ERROR;
3198 ucol_restoreVariableTop(coll, varTopOriginal, &status); 1799 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3199 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 1800 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3200 log_err("Couldn't restore old variable top\n"); 1801 log_err("Couldn't restore old variable top\n");
3201 } 1802 }
3202 1803
3203 log_verbose("Testing calling with error set\n"); 1804 log_verbose("Testing calling with error set\n");
3204 1805
3205 status = U_INTERNAL_PROGRAM_ERROR; 1806 status = U_INTERNAL_PROGRAM_ERROR;
3206 varTop1 = ucol_setVariableTop(coll, first, 1, &status); 1807 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
3207 varTop2 = ucol_getVariableTop(coll, &status); 1808 varTop2 = ucol_getVariableTop(coll, &status);
3208 ucol_restoreVariableTop(coll, varTop2, &status); 1809 ucol_restoreVariableTop(coll, varTop2, &status);
3209 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); 1810 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
3210 varTop2 = ucol_getVariableTop(NULL, &status); 1811 varTop2 = ucol_getVariableTop(NULL, &status);
3211 ucol_restoreVariableTop(NULL, varTop2, &status); 1812 ucol_restoreVariableTop(NULL, varTop2, &status);
3212 if(status != U_INTERNAL_PROGRAM_ERROR) { 1813 if(status != U_INTERNAL_PROGRAM_ERROR) {
3213 log_err("Bad reaction to passed error!\n"); 1814 log_err("Bad reaction to passed error!\n");
3214 } 1815 }
3215 uprv_free(src.source);
3216 ucol_close(coll); 1816 ucol_close(coll);
3217 } else { 1817 } else {
3218 log_data_err("Couldn't open UCA collator\n"); 1818 log_data_err("Couldn't open UCA collator\n");
3219 } 1819 }
1820 }
3220 1821
1822 static void TestMaxVariable() {
1823 UErrorCode status = U_ZERO_ERROR;
1824 UColReorderCode oldMax, max;
1825 UCollator *coll;
1826
1827 static const UChar nul = 0;
1828 static const UChar space = 0x20;
1829 static const UChar dot = 0x2e; /* punctuation */
1830 static const UChar degree = 0xb0; /* symbol */
1831 static const UChar dollar = 0x24; /* currency symbol */
1832 static const UChar zero = 0x30; /* digit */
1833
1834 coll = ucol_open("", &status);
1835 if(U_FAILURE(status)) {
1836 log_data_err("Couldn't open root collator\n");
1837 return;
1838 }
1839
1840 oldMax = ucol_getMaxVariable(coll);
1841 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845 max = ucol_getMaxVariable(coll);
1846 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848 !ucol_equal(coll, &nul, 0, &space, 1) ||
1849 ucol_equal(coll, &nul, 0, &dot, 1) ||
1850 ucol_equal(coll, &nul, 0, &degree, 1) ||
1851 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852 ucol_equal(coll, &nul, 0, &zero, 1) ||
1853 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status ));
1855 }
1856
1857 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858 max = ucol_getMaxVariable(coll);
1859 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861 !ucol_equal(coll, &nul, 0, &space, 1) ||
1862 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863 ucol_equal(coll, &nul, 0, &degree, 1) ||
1864 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865 ucol_equal(coll, &nul, 0, &zero, 1) ||
1866 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1867 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName( status));
1868 }
1869
1870 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871 max = ucol_getMaxVariable(coll);
1872 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874 !ucol_equal(coll, &nul, 0, &space, 1) ||
1875 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1877 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878 ucol_equal(coll, &nul, 0, &zero, 1) ||
1879 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1880 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(statu s));
1881 }
1882
1883 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884 max = ucol_getMaxVariable(coll);
1885 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887 !ucol_equal(coll, &nul, 0, &space, 1) ||
1888 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889 !ucol_equal(coll, &nul, 0, &degree, 1) ||
1890 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891 ucol_equal(coll, &nul, 0, &zero, 1) ||
1892 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(sta tus));
1894 }
1895
1896 log_verbose("Test restoring maxVariable\n");
1897 status = U_ZERO_ERROR;
1898 ucol_setMaxVariable(coll, oldMax, &status);
1899 if(oldMax != ucol_getMaxVariable(coll)) {
1900 log_err("Couldn't restore old maxVariable\n");
1901 }
1902
1903 log_verbose("Testing calling with error set\n");
1904 status = U_INTERNAL_PROGRAM_ERROR;
1905 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906 max = ucol_getMaxVariable(coll);
1907 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908 log_err("Bad reaction to passed error!\n");
1909 }
1910 ucol_close(coll);
3221 } 1911 }
3222 1912
3223 static void TestNonChars(void) { 1913 static void TestNonChars(void) {
3224 static const char *test[] = { 1914 static const char *test[] = {
3225 "\\u0000", /* ignorable */ 1915 "\\u0000", /* ignorable */
3226 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei ghts */ 1916 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei ghts */
3227 "\\uFDD0", "\\uFDEF", 1917 "\\uFDD0", "\\uFDEF",
3228 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */ 1918 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3229 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ 1919 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3230 "\\U0003FFFE", "\\U0003FFFF", 1920 "\\U0003FFFE", "\\U0003FFFF",
(...skipping 464 matching lines...) Expand 10 before | Expand all | Expand 10 after
3695 * is not the weight of any character or string, 2385 * is not the weight of any character or string,
3696 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. 2386 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3697 */ 2387 */
3698 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" 2388 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3699 2389
3700 static const struct { 2390 static const struct {
3701 const char *rules; 2391 const char *rules;
3702 const char *data[10]; 2392 const char *data[10];
3703 const uint32_t len; 2393 const uint32_t len;
3704 } tests[] = { 2394 } tests[] = {
2395 #if 0
2396 /* "you cannot go before ...": The parser now sets an error for such nonsens ical rules. */
3705 /* - all befores here amount to zero */ 2397 /* - all befores here amount to zero */
3706 { "&[before 3][first tertiary ignorable]<<<a", 2398 { "&[before 3][first tertiary ignorable]<<<a",
3707 { "\\u0000", "a"}, 2 2399 { "\\u0000", "a"}, 2
3708 }, /* you cannot go before first tertiary ignorable */ 2400 }, /* you cannot go before first tertiary ignorable */
3709 2401
3710 { "&[before 3][last tertiary ignorable]<<<a", 2402 { "&[before 3][last tertiary ignorable]<<<a",
3711 { "\\u0000", "a"}, 2 2403 { "\\u0000", "a"}, 2
3712 }, /* you cannot go before last tertiary ignorable */ 2404 }, /* you cannot go before last tertiary ignorable */
3713 2405 #endif
2406 /*
2407 * However, there is a real secondary ignorable (artificial addition in Frac tionalUCA.txt),
2408 * and it *is* possible to "go before" that.
2409 */
3714 { "&[before 3][first secondary ignorable]<<<a", 2410 { "&[before 3][first secondary ignorable]<<<a",
3715 { "\\u0000", "a"}, 2 2411 { "\\u0000", "a"}, 2
3716 }, /* you cannot go before first secondary ignorable */ 2412 },
3717 2413
3718 { "&[before 3][last secondary ignorable]<<<a", 2414 { "&[before 3][last secondary ignorable]<<<a",
3719 { "\\u0000", "a"}, 2 2415 { "\\u0000", "a"}, 2
3720 }, /* you cannot go before first secondary ignorable */ 2416 },
3721 2417
3722 /* 'normal' befores */ 2418 /* 'normal' befores */
3723 2419
3724 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a ", 2420 /*
2421 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422 * it is not possible to tailor &[first primary ignorable]<a or &[last prima ry ignorable]<a
2423 * because there is no tailoring space before that boundary.
2424 * Made the tests work by tailoring to a space instead.
2425 */
2426 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first pri mary ignorable]<a */
3725 { "c", "b", "\\u0332", "a" }, 4 2427 { "c", "b", "\\u0332", "a" }, 4
3726 }, 2428 },
3727 2429
3728 /* we don't have a code point that corresponds to 2430 /* we don't have a code point that corresponds to
3729 * the last primary ignorable 2431 * the last primary ignorable
3730 */ 2432 */
3731 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", 2433 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last prima ry ignorable]<a */
3732 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 2434 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3733 }, 2435 },
3734 2436
3735 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 2437 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3736 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 2438 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3737 }, 2439 },
3738 2440
3739 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 2441 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3740 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_ REGULAR_CHAR_STRING }, 5 2442 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_ REGULAR_CHAR_STRING }, 5
3741 }, 2443 },
3742 2444
3743 { "&[first regular]<a" 2445 { "&[first regular]<a"
3744 "&[before 1][first regular]<b", 2446 "&[before 1][first regular]<b",
3745 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 2447 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3746 }, 2448 },
3747 2449
3748 { "&[before 1][last regular]<b" 2450 { "&[before 1][last regular]<b"
3749 "&[last regular]<a", 2451 "&[last regular]<a",
3750 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4 2452 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3751 }, 2453 },
3752 2454
3753 { "&[before 1][first implicit]<b" 2455 { "&[before 1][first implicit]<b"
3754 "&[first implicit]<a", 2456 "&[first implicit]<a",
3755 { "b", "\\u4e00", "a", "\\u4e01"}, 4 2457 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3756 }, 2458 },
3757 2459 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
3758 { "&[before 1][last implicit]<b" 2460 { "&[before 1][last implicit]<b"
3759 "&[last implicit]<a", 2461 "&[last implicit]<a",
3760 { "b", "\\U0010FFFD", "a" }, 3 2462 { "b", "\\U0010FFFD", "a" }, 3
3761 }, 2463 },
3762 2464 #endif
3763 { "&[last variable]<z" 2465 { "&[last variable]<z"
3764 "&[last primary ignorable]<x" 2466 "&' '<x" /* was &[last primary ignorable]<x, see above */
3765 "&[last secondary ignorable]<<y" 2467 "&[last secondary ignorable]<<y"
3766 "&[last tertiary ignorable]<<<w" 2468 "&[last tertiary ignorable]<<<w"
3767 "&[top]<u", 2469 "&[top]<u",
3768 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u" }, 7 2470 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u" }, 7
3769 } 2471 }
3770 2472
3771 }; 2473 };
3772 uint32_t i; 2474 uint32_t i;
3773 2475
3774 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 2476 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
4000 ucol_close(coll); 2702 ucol_close(coll);
4001 2703
4002 } 2704 }
4003 2705
4004 static void TestPartialSortKeyTermination(void) { 2706 static void TestPartialSortKeyTermination(void) {
4005 static const char* cases[] = { 2707 static const char* cases[] = {
4006 "\\u1234\\u1234\\udc00", 2708 "\\u1234\\u1234\\udc00",
4007 "\\udc00\\ud800\\ud800" 2709 "\\udc00\\ud800\\ud800"
4008 }; 2710 };
4009 2711
4010 int32_t i = sizeof(UCollator); 2712 int32_t i;
4011 2713
4012 UErrorCode status = U_ZERO_ERROR; 2714 UErrorCode status = U_ZERO_ERROR;
4013 2715
4014 UCollator *coll = ucol_open("", &status); 2716 UCollator *coll = ucol_open("", &status);
4015 2717
4016 UCharIterator iter; 2718 UCharIterator iter;
4017 2719
4018 UChar currCase[256]; 2720 UChar currCase[256];
4019 int32_t length = 0; 2721 int32_t length = 0;
4020 int32_t pKeyLen = 0; 2722 int32_t pKeyLen = 0;
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
4074 const UChar *sourceRules = NULL; 2776 const UChar *sourceRules = NULL;
4075 int32_t sourceRulesLen = 0; 2777 int32_t sourceRulesLen = 0;
4076 UParseError parseError; 2778 UParseError parseError;
4077 UColAttributeValue french = UCOL_OFF; 2779 UColAttributeValue french = UCOL_OFF;
4078 2780
4079 if(!ucol_equals(source, target)) { 2781 if(!ucol_equals(source, target)) {
4080 log_err("Same collators, different address not equal\n"); 2782 log_err("Same collators, different address not equal\n");
4081 errorNo++; 2783 errorNo++;
4082 } 2784 }
4083 ucol_close(target); 2785 ucol_close(target);
4084 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 2786 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &st atus)) == 0) {
4085 target = ucol_safeClone(source, NULL, NULL, &status); 2787 target = ucol_safeClone(source, NULL, NULL, &status);
4086 if(U_FAILURE(status)) { 2788 if(U_FAILURE(status)) {
4087 log_err("Error creating clone\n"); 2789 log_err("Error creating clone\n");
4088 errorNo++; 2790 errorNo++;
4089 return errorNo; 2791 return errorNo;
4090 } 2792 }
4091 if(!ucol_equals(source, target)) { 2793 if(!ucol_equals(source, target)) {
4092 log_err("Collator different from it's clone\n"); 2794 log_err("Collator different from it's clone\n");
4093 errorNo++; 2795 errorNo++;
4094 } 2796 }
(...skipping 14 matching lines...) Expand all
4109 } 2811 }
4110 ucol_close(target); 2812 ucol_close(target);
4111 2813
4112 sourceRules = ucol_getRules(source, &sourceRulesLen); 2814 sourceRules = ucol_getRules(source, &sourceRulesLen);
4113 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_ DEFAULT, &parseError, &status); 2815 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_ DEFAULT, &parseError, &status);
4114 if(U_FAILURE(status)) { 2816 if(U_FAILURE(status)) {
4115 log_err("Error instantiating target from rules - %s\n", u_errorName( status)); 2817 log_err("Error instantiating target from rules - %s\n", u_errorName( status));
4116 errorNo++; 2818 errorNo++;
4117 return errorNo; 2819 return errorNo;
4118 } 2820 }
4119 if(!ucol_equals(source, target)) { 2821 /* Note: The tailoring rule string is an optional data item. */
2822 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
4120 log_err("Collator different from collator that was created from the same rules\n"); 2823 log_err("Collator different from collator that was created from the same rules\n");
4121 errorNo++; 2824 errorNo++;
4122 } 2825 }
4123 ucol_close(target); 2826 ucol_close(target);
4124 } 2827 }
4125 return errorNo; 2828 return errorNo;
4126 } 2829 }
4127 2830
4128 2831
4129 static void TestEquals(void) { 2832 static void TestEquals(void) {
4130 /* ucol_equals is not currently a public API. There is a chance that it will become 2833 /* ucol_equals is not currently a public API. There is a chance that it will become
4131 * something like this, but currently it is only used by RuleBasedCollator::o perator== 2834 * something like this.
4132 */ 2835 */
4133 /* test whether the two collators instantiated from the same locale are equa l */ 2836 /* test whether the two collators instantiated from the same locale are equa l */
4134 UErrorCode status = U_ZERO_ERROR; 2837 UErrorCode status = U_ZERO_ERROR;
4135 UParseError parseError; 2838 UParseError parseError;
4136 int32_t noOfLoc = uloc_countAvailable(); 2839 int32_t noOfLoc = uloc_countAvailable();
4137 const char *locName = NULL; 2840 const char *locName = NULL;
4138 UCollator *source = NULL, *target = NULL; 2841 UCollator *source = NULL, *target = NULL;
4139 int32_t i = 0; 2842 int32_t i = 0;
4140 2843
4141 const char* rules[] = { 2844 const char* rules[] = {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
4176 } 2879 }
4177 ucol_close(source); 2880 ucol_close(source);
4178 ucol_close(target); 2881 ucol_close(target);
4179 2882
4180 source = ucol_open("root", &status); 2883 source = ucol_open("root", &status);
4181 target = ucol_open("root", &status); 2884 target = ucol_open("root", &status);
4182 log_verbose("Testing root\n"); 2885 log_verbose("Testing root\n");
4183 if(!ucol_equals(source, source)) { 2886 if(!ucol_equals(source, source)) {
4184 log_err("Same collator not equal\n"); 2887 log_err("Same collator not equal\n");
4185 } 2888 }
4186 if(TestEqualsForCollator(locName, source, target)) { 2889 if(TestEqualsForCollator("root", source, target)) {
4187 log_err("Errors for root\n", locName); 2890 log_err("Errors for root\n");
4188 } 2891 }
4189 ucol_close(source); 2892 ucol_close(source);
4190 2893
4191 for(i = 0; i<noOfLoc; i++) { 2894 for(i = 0; i<noOfLoc; i++) {
4192 status = U_ZERO_ERROR; 2895 status = U_ZERO_ERROR;
4193 locName = uloc_getAvailable(i); 2896 locName = uloc_getAvailable(i);
4194 /*if(hasCollationElements(locName)) {*/ 2897 /*if(hasCollationElements(locName)) {*/
4195 log_verbose("Testing equality for locale %s\n", locName); 2898 log_verbose("Testing equality for locale %s\n", locName);
4196 source = ucol_open(locName, &status); 2899 source = ucol_open(locName, &status);
4197 target = ucol_open(locName, &status); 2900 target = ucol_open(locName, &status);
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
4392 ucol_close(coll); 3095 ucol_close(coll);
4393 3096
4394 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 3097 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4395 } 3098 }
4396 3099
4397 static void TestPinyinProblem(void) { 3100 static void TestPinyinProblem(void) {
4398 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5 0" }; 3101 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5 0" };
4399 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 3102 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4400 } 3103 }
4401 3104
4402 #define TST_UCOL_MAX_INPUT 0x220001
4403 #define topByte 0xFF000000;
4404 #define bottomByte 0xFF;
4405 #define fourBytes 0xFFFFFFFF;
4406
4407
4408 static void showImplicit(UChar32 i) {
4409 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4410 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4411 }
4412 }
4413
4414 static void TestImplicitGeneration(void) {
4415 UErrorCode status = U_ZERO_ERROR;
4416 UChar32 last = 0;
4417 UChar32 current;
4418 UChar32 i = 0, j = 0;
4419 UChar32 roundtrip = 0;
4420 UChar32 lastBottom = 0;
4421 UChar32 currentBottom = 0;
4422 UChar32 lastTop = 0;
4423 UChar32 currentTop = 0;
4424
4425 UCollator *coll = ucol_open("root", &status);
4426 if(U_FAILURE(status)) {
4427 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)) ;
4428 return;
4429 }
4430
4431 uprv_uca_getRawFromImplicit(0xE20303E7);
4432
4433 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4434 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4435
4436 /* check that it round-trips AND that all intervening ones are illegal*/
4437 roundtrip = uprv_uca_getRawFromImplicit(current);
4438 if (roundtrip != i) {
4439 log_err("No roundtrip %08X\n", i);
4440 }
4441 if (last != 0) {
4442 for (j = last + 1; j < current; ++j) {
4443 roundtrip = uprv_uca_getRawFromImplicit(j);
4444 /* raise an error if it *doesn't* find an error*/
4445 if (roundtrip != -1) {
4446 log_err("Fails to recognize illegal %08X\n", j);
4447 }
4448 }
4449 }
4450 /* now do other consistency checks*/
4451 lastBottom = last & bottomByte;
4452 currentBottom = current & bottomByte;
4453 lastTop = last & topByte;
4454 currentTop = current & topByte;
4455 (void)lastBottom; /* Suppress set but not used warnings. */
4456 (void)currentBottom;
4457
4458 /* print out some values for spot-checking*/
4459 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4460 showImplicit(i-3);
4461 showImplicit(i-2);
4462 showImplicit(i-1);
4463 showImplicit(i);
4464 showImplicit(i+1);
4465 showImplicit(i+2);
4466 }
4467 last = current;
4468
4469 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4470 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4471 }
4472 }
4473 showImplicit(TST_UCOL_MAX_INPUT-2);
4474 showImplicit(TST_UCOL_MAX_INPUT-1);
4475 showImplicit(TST_UCOL_MAX_INPUT);
4476 ucol_close(coll);
4477 }
4478
4479 /** 3105 /**
4480 * Iterate through the given iterator, checking to see that all the strings 3106 * Iterate through the given iterator, checking to see that all the strings
4481 * in the expected array are present. 3107 * in the expected array are present.
4482 * @param expected array of strings we expect to see, or NULL 3108 * @param expected array of strings we expect to see, or NULL
4483 * @param expectedCount number of elements of expected, or 0 3109 * @param expectedCount number of elements of expected, or 0
4484 */ 3110 */
4485 static int32_t checkUEnumeration(const char* msg, 3111 static int32_t checkUEnumeration(const char* msg,
4486 UEnumeration* iter, 3112 UEnumeration* iter,
4487 const char** expected, 3113 const char** expected,
4488 int32_t expectedCount) { 3114 int32_t expectedCount) {
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
4595 if (assertSuccess("getFunctionalEquivalent", &ec)) { 3221 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4596 assertEquals("getFunctionalEquivalent(de)", "root", loc); 3222 assertEquals("getFunctionalEquivalent(de)", "root", loc);
4597 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", 3223 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4598 isAvailable == TRUE); 3224 isAvailable == TRUE);
4599 } 3225 }
4600 3226
4601 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", 3227 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4602 &isAvailable, &ec); 3228 &isAvailable, &ec);
4603 if (assertSuccess("getFunctionalEquivalent", &ec)) { 3229 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4604 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); 3230 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
4605 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", 3231 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
4606 isAvailable == TRUE); 3232 isAvailable == FALSE);
4607 } 3233 }
4608 } 3234 }
4609 3235
4610 /* supercedes TestJ784 */ 3236 /* supercedes TestJ784 */
4611 static void TestBeforePinyin(void) { 3237 static void TestBeforePinyin(void) {
4612 const static char rules[] = { 3238 const static char rules[] = {
4613 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<< \\u00E0<<<\\u00C0" 3239 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<< \\u00E0<<<\\u00C0"
4614 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<< \\u00E8<<<\\u00C8" 3240 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<< \\u00E8<<<\\u00C8"
4615 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<< \\u00EC<<<\\u00CC" 3241 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<< \\u00EC<<<\\u00CC"
4616 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<< \\u00F2<<<\\u00D2" 3242 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<< \\u00F2<<<\\u00D2"
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after
4948 } 3574 }
4949 3575
4950 static void 3576 static void
4951 TestVI5913(void) 3577 TestVI5913(void)
4952 { 3578 {
4953 UErrorCode status = U_ZERO_ERROR; 3579 UErrorCode status = U_ZERO_ERROR;
4954 int32_t i, j; 3580 int32_t i, j;
4955 UCollator *coll =NULL; 3581 UCollator *coll =NULL;
4956 uint8_t resColl[100], expColl[100]; 3582 uint8_t resColl[100], expColl[100];
4957 int32_t rLen, tLen, ruleLen, sLen, kLen; 3583 int32_t rLen, tLen, ruleLen, sLen, kLen;
4958 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypog egrammeni*/ 3584 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypog egrammeni*/
4959 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 3585 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4960 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circ umflex.*/ 3586 /*
3587 * Note: Just tailoring &z<ae^ does not work as expected:
3588 * The UCA spec requires for discontiguous contractions that they
3589 * extend an *existing match* by one combining mark at a time.
3590 * Therefore, ae must be a contraction so that the builder finds
3591 * discontiguous contractions for ae^, for example with an intervening under dot.
3592 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302 , etc.
3593 */
3594 UChar rule3[256]={
3595 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3596 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3597 0};
4961 static const UChar tData[][20]={ 3598 static const UChar tData[][20]={
4962 {0x1EAC, 0}, 3599 {0x1EAC, 0},
4963 {0x0041, 0x0323, 0x0302, 0}, 3600 {0x0041, 0x0323, 0x0302, 0},
4964 {0x1EA0, 0x0302, 0}, 3601 {0x1EA0, 0x0302, 0},
4965 {0x00C2, 0x0323, 0}, 3602 {0x00C2, 0x0323, 0},
4966 {0x1ED8, 0}, /* O with dot and circumflex */ 3603 {0x1ED8, 0}, /* O with dot and circumflex */
4967 {0x1ECC, 0x0302, 0}, 3604 {0x1ECC, 0x0302, 0},
4968 {0x1EB7, 0}, 3605 {0x1EB7, 0},
4969 {0x1EA1, 0x0306, 0}, 3606 {0x1EA1, 0x0306, 0},
4970 }; 3607 };
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
5091 } 3728 }
5092 } 3729 }
5093 } 3730 }
5094 ucol_close(coll); 3731 ucol_close(coll);
5095 3732
5096 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 3733 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5097 ruleLen = u_strlen(rule3); 3734 ruleLen = u_strlen(rule3);
5098 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ; 3735 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;
5099 tLen = u_strlen(tailorData3[3]); 3736 tLen = u_strlen(tailorData3[3]);
5100 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 3737 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3738 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3 ], tLen), tLen);
3739 for(i = 0; i<kLen; i++) {
3740 log_verbose(" %02X", expColl[i]);
3741 }
5101 for (j=4; j<6; j++) { 3742 for (j=4; j<6; j++) {
5102 tLen = u_strlen(tailorData3[j]); 3743 tLen = u_strlen(tailorData3[j]);
5103 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 3744 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5104 3745
5105 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) { 3746 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {
5106 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo rData[j], tLen); 3747 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescs trdup(tailorData3[j], tLen), tLen);
5107 for(i = 0; i<rLen; i++) { 3748 for(i = 0; i<rLen; i++) {
5108 log_err(" %02X", resColl[i]); 3749 log_err(" %02X", resColl[i]);
5109 } 3750 }
5110 } 3751 }
5111 3752
5112 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], t Len); 3753 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailo rData3[j], tLen), tLen);
5113 for(i = 0; i<rLen; i++) { 3754 for(i = 0; i<rLen; i++) {
5114 log_verbose(" %02X", resColl[i]); 3755 log_verbose(" %02X", resColl[i]);
5115 } 3756 }
5116 } 3757 }
5117 ucol_close(coll); 3758 ucol_close(coll);
5118 } 3759 }
5119 3760
5120 static void 3761 static void
5121 TestTailor6179(void) 3762 TestTailor6179(void)
5122 { 3763 {
(...skipping 23 matching lines...) Expand all
5146 }; 3787 };
5147 static const UChar tData2[][4]={ 3788 static const UChar tData2[][4]={
5148 {0x61, 0}, 3789 {0x61, 0},
5149 {0x62, 0}, 3790 {0x62, 0},
5150 { 0xFDD0,0x009E, 0} 3791 { 0xFDD0,0x009E, 0}
5151 }; 3792 };
5152 3793
5153 /* 3794 /*
5154 * These values from FractionalUCA.txt will change, 3795 * These values from FractionalUCA.txt will change,
5155 * and need to be updated here. 3796 * and need to be updated here.
3797 * TODO: Make this not check for particular sort keys.
3798 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
5156 */ 3799 */
5157 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0}; 3800 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
5158 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0}; 3801 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
5159 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 3802 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
5160 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 3803 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3804
3805 UParseError parseError;
5161 3806
5162 /* Test [Last Primary ignorable] */ 3807 /* Test [Last Primary ignorable] */
5163 3808
5164 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i gnorable]<<b\n"); 3809 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i gnorable]<<b\n");
5165 ruleLen = u_strlen(rule1); 3810 ruleLen = u_strlen(rule1);
5166 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ; 3811 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;
5167 if (U_FAILURE(status)) { 3812 if (U_FAILURE(status)) {
5168 log_err_status(status, "Tailoring test: &[last primary ignorable] failed ! -> %s\n", u_errorName(status)); 3813 log_err_status(status, "Tailoring test: &[last primary ignorable] failed ! -> %s\n", u_errorName(status));
5169 return; 3814 return;
5170 } 3815 }
(...skipping 13 matching lines...) Expand all
5184 for(i = 0; i<rLen; i++) { 3829 for(i = 0; i<rLen; i++) {
5185 log_err(" %02X", resColl[i]); 3830 log_err(" %02X", resColl[i]);
5186 } 3831 }
5187 log_err("\n"); 3832 log_err("\n");
5188 } 3833 }
5189 ucol_close(coll); 3834 ucol_close(coll);
5190 3835
5191 3836
5192 /* Test [Last Secondary ignorable] */ 3837 /* Test [Last Secondary ignorable] */
5193 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second ary ignorable]<<<b\n"); 3838 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second ary ignorable]<<<b\n");
5194 ruleLen = u_strlen(rule1); 3839 ruleLen = u_strlen(rule2);
5195 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ; 3840 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
5196 if (U_FAILURE(status)) { 3841 if (U_FAILURE(status)) {
5197 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u _errorName(status)); 3842 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u _errorName(status));
3843 log_info(" offset=%d \"%s\" | \"%s\"\n",
3844 parseError.offset, aescstrdup(parseError.preContext, -1), aescs trdup(parseError.postContext, -1));
5198 return; 3845 return;
5199 } 3846 }
5200 tLen = u_strlen(tData2[0]); 3847 tLen = u_strlen(tData2[0]);
5201 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 3848 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5202 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgn CE, rLen) != 0) { 3849 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgn CE, rLen) != 0) {
5203 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0 , tData2[0], rLen); 3850 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0 , tData2[0], rLen);
5204 for(i = 0; i<rLen; i++) { 3851 for(i = 0; i<rLen; i++) {
5205 log_err(" %02X", resColl[i]); 3852 log_err(" %02X", resColl[i]);
5206 } 3853 }
5207 log_err("\n"); 3854 log_err("\n");
5208 } 3855 }
5209 if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see tic ket #8982 */ 3856 tLen = u_strlen(tData2[1]);
5210 tLen = u_strlen(tData2[1]); 3857 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5211 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 3858 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryI gnCE, rLen) != 0) {
5212 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondar yIgnCE, rLen) != 0) { 3859 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5213 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1 , tData2[1], rLen); 3860 for(i = 0; i<rLen; i++) {
5214 for(i = 0; i<rLen; i++) { 3861 log_err(" %02X", resColl[i]);
5215 log_err(" %02X", resColl[i]);
5216 }
5217 log_err("\n");
5218 } 3862 }
3863 log_err("\n");
5219 } 3864 }
5220 ucol_close(coll); 3865 ucol_close(coll);
5221 } 3866 }
5222 3867
5223 static void 3868 static void
5224 TestUCAPrecontext(void) 3869 TestUCAPrecontext(void)
5225 { 3870 {
5226 UErrorCode status = U_ZERO_ERROR; 3871 UErrorCode status = U_ZERO_ERROR;
5227 int32_t i, j; 3872 int32_t i, j;
5228 UCollator *coll =NULL; 3873 UCollator *coll =NULL;
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
5575 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 4220 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5576 4221
5577 length = u_unescape(str_rules[rule_no], rule, 500); 4222 length = u_unescape(str_rules[rule_no], rule, 500);
5578 if (length == 0) { 4223 if (length == 0) {
5579 log_err("ERROR: The rule cannot be unescaped: %s\n"); 4224 log_err("ERROR: The rule cannot be unescaped: %s\n");
5580 return; 4225 return;
5581 } 4226 }
5582 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status); 4227 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status);
5583 if(U_FAILURE(status)){ 4228 if(U_FAILURE(status)){
5584 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status)); 4229 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));
4230 log_info(" offset=%d \"%s\" | \"%s\"\n",
4231 parse_error.offset,
4232 aescstrdup(parse_error.preContext, -1),
4233 aescstrdup(parse_error.postContext, -1));
5585 return; 4234 return;
5586 } 4235 }
5587 log_verbose("Testing the <<* syntax\n"); 4236 log_verbose("Testing the <<* syntax\n");
5588 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4237 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5589 ucol_setStrength(myCollation, UCOL_TERTIARY); 4238 ucol_setStrength(myCollation, UCOL_TERTIARY);
5590 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { 4239 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5591 doTest(myCollation, 4240 doTest(myCollation,
5592 testcases[testcase_no].source, 4241 testcases[testcase_no].source,
5593 testcases[testcase_no].target, 4242 testcases[testcase_no].target,
5594 testcases[testcase_no].result 4243 testcases[testcase_no].result
(...skipping 25 matching lines...) Expand all
5620 { {0x0061}, {0x0066}, UCOL _LESS }, /* "a" < "f" */ 4269 { {0x0061}, {0x0066}, UCOL _LESS }, /* "a" < "f" */
5621 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL _LESS }, /* "la" < "123" */ 4270 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL _LESS }, /* "la" < "123" */
5622 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL _EQUAL }, /* "aaa" = "123" */ 4271 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL _EQUAL }, /* "aaa" = "123" */
5623 { {0x0062}, {0x007a}, UCOL _LESS }, /* "b" < "z" */ 4272 { {0x0062}, {0x007a}, UCOL _LESS }, /* "b" < "z" */
5624 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL _LESS }, /* "azm" = "2yc" */ 4273 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL _LESS }, /* "azm" = "2yc" */
5625 }; 4274 };
5626 4275
5627 static int nRangeTestcases = LEN(rangeTestcases); 4276 static int nRangeTestcases = LEN(rangeTestcases);
5628 4277
5629 const static OneTestCase rangeTestcasesSupplemental[] = { 4278 const static OneTestCase rangeTestcasesSupplemental[] = {
5630 { {0xfffe}, {0xffff}, UCOL _LESS }, /* U+FFFE < U+FFFF */ 4279 { {0x4e00}, {0xfffb}, UCOL _LESS }, /* U+4E00 < U+FFFB */
5631 { {0xffff}, {0xd800, 0xdc00}, UCOL _LESS }, /* U+FFFF < U+10000 */ 4280 { {0xfffb}, {0xd800, 0xdc00}, UCOL _LESS }, /* U+FFFB < U+10000 */
5632 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+10000 < U+10001 */ 4281 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+10000 < U+10001 */
5633 { {0xfffe}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+FFFE < U+10001 */ 4282 { {0x4e00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+4E00 < U+10001 */
5634 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */ 4283 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */
5635 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */ 4284 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */
5636 { {0xfffe}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+FFFE < U+10001 */ 4285 { {0x4e00}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+4E00 < U+10001 */
5637 }; 4286 };
5638 4287
5639 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); 4288 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5640 4289
5641 const static OneTestCase rangeTestcasesQwerty[] = { 4290 const static OneTestCase rangeTestcasesQwerty[] = {
5642 { {0x0071}, {0x0077}, UCOL _LESS }, /* "q" < "w" */ 4291 { {0x0071}, {0x0077}, UCOL _LESS }, /* "q" < "w" */
5643 { {0x0077}, {0x0065}, UCOL _LESS }, /* "w" < "e" */ 4292 { {0x0077}, {0x0065}, UCOL _LESS }, /* "w" < "e" */
5644 4293
5645 { {0x0079}, {0x0075}, UCOL _LESS }, /* "y" < "u" */ 4294 { {0x0079}, {0x0075}, UCOL _LESS }, /* "y" < "u" */
5646 { {0x0071}, {0x0075}, UCOL _LESS }, /* "q" << "u" */ 4295 { {0x0071}, {0x0075}, UCOL _LESS }, /* "q" << "u" */
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5683 4332
5684 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\ u0033", 4333 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\ u0033",
5685 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\ u0032\\u0033'", 4334 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\ u0032\\u0033'",
5686 }; 4335 };
5687 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 4336 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5688 } 4337 }
5689 4338
5690 static void TestSameStrengthListSupplemental(void) 4339 static void TestSameStrengthListSupplemental(void)
5691 { 4340 {
5692 const char* strRules[] = { 4341 const char* strRules[] = {
5693 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", 4342 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
5694 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", 4343 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5695 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", 4344 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
5696 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", 4345 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5697 }; 4346 };
5698 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules)); 4347 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));
5699 } 4348 }
5700 4349
5701 static void TestSameStrengthListQwerty(void) 4350 static void TestSameStrengthListQwerty(void)
5702 { 4351 {
5703 const char* strRules[] = { 4352 const char* strRules[] = {
5704 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 4353 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5705 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 4354 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5706 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00 74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", 4355 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00 74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
(...skipping 27 matching lines...) Expand all
5734 { 4383 {
5735 const char* strRules[] = { 4384 const char* strRules[] = {
5736 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", 4385 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5737 }; 4386 };
5738 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 4387 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5739 } 4388 }
5740 4389
5741 static void TestSameStrengthListSupplementalRanges(void) 4390 static void TestSameStrengthListSupplementalRanges(void)
5742 { 4391 {
5743 const char* strRules[] = { 4392 const char* strRules[] = {
5744 "&\\ufffe<*\\uffff-\\U00010002", 4393 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4394 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
5745 }; 4395 };
5746 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules)); 4396 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));
5747 } 4397 }
5748 4398
5749 static void TestSpecialCharacters(void) 4399 static void TestSpecialCharacters(void)
5750 { 4400 {
5751 const char* strRules[] = { 4401 const char* strRules[] = {
5752 /* Normal */ 4402 /* Normal */
5753 "&';'<'+'<','<'-'<'&'<'*'", 4403 "&';'<'+'<','<'-'<'&'<'*'",
5754 4404
(...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after
6034 /* 4684 /*
6035 * Test reordering API. 4685 * Test reordering API.
6036 */ 4686 */
6037 static void TestReorderingAPI(void) 4687 static void TestReorderingAPI(void)
6038 { 4688 {
6039 UErrorCode status = U_ZERO_ERROR; 4689 UErrorCode status = U_ZERO_ERROR;
6040 UCollator *myCollation; 4690 UCollator *myCollation;
6041 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION}; 4691 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION};
6042 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_RE ORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; 4692 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_RE ORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6043 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 4693 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
6044 UCollationResult collResult; 4695 UCollationResult collResult;
6045 int32_t retrievedReorderCodesLength; 4696 int32_t retrievedReorderCodesLength;
6046 int32_t retrievedReorderCodes[10]; 4697 int32_t retrievedReorderCodes[10];
6047 UChar greekString[] = { 0x03b1 }; 4698 UChar greekString[] = { 0x03b1 };
6048 UChar punctuationString[] = { 0x203e }; 4699 UChar punctuationString[] = { 0x203e };
6049 int loopIndex; 4700 int loopIndex;
6050 4701
6051 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n"); 4702 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");
6052 4703
6053 /* build collator tertiary */ 4704 /* build collator tertiary */
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
6111 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 4762 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6112 return; 4763 return;
6113 } 4764 }
6114 4765
6115 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString)); 4766 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));
6116 if (collResult != UCOL_GREATER) { 4767 if (collResult != UCOL_GREATER) {
6117 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n"); 4768 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n");
6118 return; 4769 return;
6119 } 4770 }
6120 4771
4772 /* clear the reordering using [NONE] */
4773 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4774 if (U_FAILURE(status)) {
4775 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", m yErrorName(status));
4776 return;
4777 }
4778
4779 /* get the reordering again */
4780 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);
4781 if (retrievedReorderCodesLength != 0) {
4782 log_err_status(status,
4783 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4784 retrievedReorderCodesLength);
4785 return;
4786 }
4787
6121 /* test for error condition on duplicate reorder codes */ 4788 /* test for error condition on duplicate reorder codes */
6122 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde rCodes), &status); 4789 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde rCodes), &status);
6123 if (!U_FAILURE(status)) { 4790 if (!U_FAILURE(status)) {
6124 log_err_status(status, "ERROR: setting duplicate reorder codes did not g enerate a failure\n"); 4791 log_err_status(status, "ERROR: setting duplicate reorder codes did not g enerate a failure\n");
6125 return; 4792 return;
6126 } 4793 }
6127 4794
6128 status = U_ZERO_ERROR; 4795 status = U_ZERO_ERROR;
6129 /* test for reorder codes after a reset code */ 4796 /* test for reorder codes after a reset code */
6130 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord erCodesStartingWithDefault), &status); 4797 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord erCodesStartingWithDefault), &status);
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
6265 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 4932 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6266 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { 4933 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6267 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 4934 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6268 return; 4935 return;
6269 } 4936 }
6270 } 4937 }
6271 4938
6272 ucol_close(myCollation); 4939 ucol_close(myCollation);
6273 } 4940 }
6274 4941
6275 static int compareUScriptCodes(const void * a, const void * b) 4942 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int 32_t expectedScript) {
6276 { 4943 int32_t i;
6277 return ( *(int32_t*)a - *(int32_t*)b ); 4944 for (i = 0; i < length; ++i) {
4945 if (expectedScript == scripts[i]) { return TRUE; }
4946 }
4947 return FALSE;
6278 } 4948 }
6279 4949
6280 static void TestEquivalentReorderingScripts(void) { 4950 static void TestEquivalentReorderingScripts(void) {
6281 UErrorCode status = U_ZERO_ERROR; 4951 UErrorCode status = U_ZERO_ERROR;
6282 int32_t equivalentScripts[50]; 4952 int32_t equivalentScripts[100];
6283 int32_t equivalentScriptsLength; 4953 int32_t length;
6284 int loopIndex; 4954 int i;
6285 int32_t equivalentScriptsResult[] = { 4955 int32_t prevScript;
4956 /* At least these scripts are expected to be equivalent. There may be more. */
4957 static const int32_t expectedScripts[] = {
6286 USCRIPT_BOPOMOFO, 4958 USCRIPT_BOPOMOFO,
6287 USCRIPT_LISU, 4959 USCRIPT_LISU,
6288 USCRIPT_LYCIAN, 4960 USCRIPT_LYCIAN,
6289 USCRIPT_CARIAN, 4961 USCRIPT_CARIAN,
6290 USCRIPT_LYDIAN, 4962 USCRIPT_LYDIAN,
6291 USCRIPT_YI, 4963 USCRIPT_YI,
6292 USCRIPT_OLD_ITALIC, 4964 USCRIPT_OLD_ITALIC,
6293 USCRIPT_GOTHIC, 4965 USCRIPT_GOTHIC,
6294 USCRIPT_DESERET, 4966 USCRIPT_DESERET,
6295 USCRIPT_SHAVIAN, 4967 USCRIPT_SHAVIAN,
6296 USCRIPT_OSMANYA, 4968 USCRIPT_OSMANYA,
6297 USCRIPT_LINEAR_B, 4969 USCRIPT_LINEAR_B,
6298 USCRIPT_CYPRIOT, 4970 USCRIPT_CYPRIOT,
6299 USCRIPT_OLD_SOUTH_ARABIAN, 4971 USCRIPT_OLD_SOUTH_ARABIAN,
6300 USCRIPT_AVESTAN, 4972 USCRIPT_AVESTAN,
6301 USCRIPT_IMPERIAL_ARAMAIC, 4973 USCRIPT_IMPERIAL_ARAMAIC,
6302 USCRIPT_INSCRIPTIONAL_PARTHIAN, 4974 USCRIPT_INSCRIPTIONAL_PARTHIAN,
6303 USCRIPT_INSCRIPTIONAL_PAHLAVI, 4975 USCRIPT_INSCRIPTIONAL_PAHLAVI,
6304 USCRIPT_UGARITIC, 4976 USCRIPT_UGARITIC,
6305 USCRIPT_OLD_PERSIAN, 4977 USCRIPT_OLD_PERSIAN,
6306 USCRIPT_CUNEIFORM, 4978 USCRIPT_CUNEIFORM,
6307 USCRIPT_EGYPTIAN_HIEROGLYPHS, 4979 USCRIPT_EGYPTIAN_HIEROGLYPHS,
6308 USCRIPT_PHONETIC_POLLARD, 4980 USCRIPT_PHONETIC_POLLARD,
6309 USCRIPT_SORA_SOMPENG, 4981 USCRIPT_SORA_SOMPENG,
6310 USCRIPT_MEROITIC_CURSIVE, 4982 USCRIPT_MEROITIC_CURSIVE,
6311 USCRIPT_MEROITIC_HIEROGLYPHS 4983 USCRIPT_MEROITIC_HIEROGLYPHS
6312 }; 4984 };
6313 4985
6314 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t) , compareUScriptCodes);
6315
6316 /* UScript.GOTHIC */ 4986 /* UScript.GOTHIC */
6317 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equ ivalentScripts, LEN(equivalentScripts), &status); 4987 length = ucol_getEquivalentReorderCodes(
4988 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6318 if (U_FAILURE(status)) { 4989 if (U_FAILURE(status)) {
6319 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n ", myErrorName(status)); 4990 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code s: %s\n", myErrorName(status));
6320 return; 4991 return;
6321 } 4992 }
6322 /* 4993 if (length < LEN(expectedScripts)) {
6323 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); 4994 log_err("ERROR/Gothic: retrieved equivalent script length wrong: "
6324 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); 4995 "expected at least %d, was = %d\n",
6325 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 4996 LEN(expectedScripts), length);
6326 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6327 } 4997 }
6328 */ 4998 prevScript = -1;
6329 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 4999 for (i = 0; i < length; ++i) {
6330 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLeng th); 5000 int32_t script = equivalentScripts[i];
6331 return; 5001 if (script <= prevScript) {
5002 log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n ", i);
5003 }
5004 prevScript = script;
6332 } 5005 }
6333 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 5006 for (i = 0; i < LEN(expectedScripts); i++) {
6334 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 5007 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i ])) {
6335 log_err_status(status, "ERROR: equivalent scripts results don't matc h: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScr ipts[loopIndex]); 5008 log_err("ERROR/Gothic: equivalent scripts do not contain %d\n",
6336 return; 5009 expectedScripts[i]);
6337 } 5010 }
6338 } 5011 }
6339 5012
6340 /* UScript.SHAVIAN */ 5013 /* UScript.SHAVIAN */
6341 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, eq uivalentScripts, LEN(equivalentScripts), &status); 5014 length = ucol_getEquivalentReorderCodes(
5015 USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status) ;
6342 if (U_FAILURE(status)) { 5016 if (U_FAILURE(status)) {
6343 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n ", myErrorName(status)); 5017 log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder cod es: %s\n", myErrorName(status));
6344 return; 5018 return;
6345 } 5019 }
6346 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 5020 if (length < LEN(expectedScripts)) {
6347 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLeng th); 5021 log_err("ERROR/Shavian: retrieved equivalent script length wrong: "
6348 return; 5022 "expected at least %d, was = %d\n",
5023 LEN(expectedScripts), length);
6349 } 5024 }
6350 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 5025 for (i = 0; i < LEN(expectedScripts); i++) {
6351 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 5026 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i ])) {
6352 log_err_status(status, "ERROR: equivalent scripts results don't matc h: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScr ipts[loopIndex]); 5027 log_err("ERROR/Shavian: equivalent scripts do not contain %d\n",
6353 return; 5028 expectedScripts[i]);
6354 } 5029 }
6355 } 5030 }
6356 } 5031 }
6357 5032
6358 static void TestReorderingAcrossCloning(void) 5033 static void TestReorderingAcrossCloning(void)
6359 { 5034 {
6360 UErrorCode status = U_ZERO_ERROR; 5035 UErrorCode status = U_ZERO_ERROR;
6361 UCollator *myCollation; 5036 UCollator *myCollation;
6362 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION}; 5037 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION};
6363 UCollator *clonedCollation; 5038 UCollator *clonedCollation;
(...skipping 483 matching lines...) Expand 10 before | Expand all | Expand 10 after
6847 USet* importTailoredSet; 5522 USet* importTailoredSet;
6848 5523
6849 5524
6850 vicoll = ucol_open("vi", &status); 5525 vicoll = ucol_open("vi", &status);
6851 if(U_FAILURE(status)){ 5526 if(U_FAILURE(status)){
6852 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr orName(status)); 5527 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr orName(status));
6853 return; 5528 return;
6854 } 5529 }
6855 5530
6856 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); 5531 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5532 if(viruleslength == 0) {
5533 log_data_err("missing vi tailoring rule string\n");
5534 ucol_close(vicoll);
5535 return;
5536 }
6857 escoll = ucol_open("es", &status); 5537 escoll = ucol_open("es", &status);
6858 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); 5538 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6859 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar *)); 5539 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar *));
6860 viesrules[0] = 0; 5540 viesrules[0] = 0;
6861 u_strcat(viesrules, virules); 5541 u_strcat(viesrules, virules);
6862 u_strcat(viesrules, esrules); 5542 u_strcat(viesrules, esrules);
6863 viesruleslength = viruleslength + esruleslength; 5543 viesruleslength = viruleslength + esruleslength;
6864 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status); 5544 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status);
6865 5545
6866 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 5546 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
6946 5626
6947 USet* tailoredSet; 5627 USet* tailoredSet;
6948 USet* importTailoredSet; 5628 USet* importTailoredSet;
6949 5629
6950 vicoll = ucol_open("vi", &status); 5630 vicoll = ucol_open("vi", &status);
6951 if(U_FAILURE(status)){ 5631 if(U_FAILURE(status)){
6952 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status)); 5632 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));
6953 return; 5633 return;
6954 } 5634 }
6955 virules = ucol_getRules(vicoll, &viruleslength); 5635 virules = ucol_getRules(vicoll, &viruleslength);
5636 if(viruleslength == 0) {
5637 log_data_err("missing vi tailoring rule string\n");
5638 ucol_close(vicoll);
5639 return;
5640 }
6956 /* decoll = ucol_open("de@collation=phonebook", &status); */ 5641 /* decoll = ucol_open("de@collation=phonebook", &status); */
6957 decoll = ucol_open("de-u-co-phonebk", &status); 5642 decoll = ucol_open("de-u-co-phonebk", &status);
6958 if(U_FAILURE(status)){ 5643 if(U_FAILURE(status)){
6959 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status)); 5644 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));
6960 return; 5645 return;
6961 } 5646 }
6962 5647
6963 5648
6964 derules = ucol_getRules(decoll, &deruleslength); 5649 derules = ucol_getRules(decoll, &deruleslength);
6965 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar *)); 5650 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar *));
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
7069 } LongUpperStrItem; 5754 } LongUpperStrItem;
7070 5755
7071 /* String pointers must be in reverse collation order of the corresponding strin gs */ 5756 /* String pointers must be in reverse collation order of the corresponding strin gs */
7072 static const LongUpperStrItem longUpperStrItems[] = { 5757 static const LongUpperStrItem longUpperStrItems[] = {
7073 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, 5758 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7074 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, 5759 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7075 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, 5760 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7076 { NULL, 0 } 5761 { NULL, 0 }
7077 }; 5762 };
7078 5763
7079 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with c ollation changes */ 5764 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
7080 5765
7081 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup tion */ 5766 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup tion */
7082 static void TestCaseLevelBufferOverflow(void) 5767 static void TestCaseLevelBufferOverflow(void)
7083 { 5768 {
7084 UErrorCode status = U_ZERO_ERROR; 5769 UErrorCode status = U_ZERO_ERROR;
7085 UCollator * ucol = ucol_open("root", &status); 5770 UCollator * ucol = ucol_open("root", &status);
7086 if ( U_SUCCESS(status) ) { 5771 if ( U_SUCCESS(status) ) {
7087 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); 5772 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7088 if ( U_SUCCESS(status) ) { 5773 if ( U_SUCCESS(status) ) {
7089 const LongUpperStrItem * itemPtr; 5774 const LongUpperStrItem * itemPtr;
(...skipping 17 matching lines...) Expand all
7107 } 5792 }
7108 } else { 5793 } else {
7109 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o n: %s\n", myErrorName(status)); 5794 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o n: %s\n", myErrorName(status));
7110 } 5795 }
7111 ucol_close(ucol); 5796 ucol_close(ucol);
7112 } else { 5797 } else {
7113 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName( status)); 5798 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName( status));
7114 } 5799 }
7115 } 5800 }
7116 5801
5802 /* Test for #10595 */
5803 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5804 #define KEY_PART_SIZE 16
5805
5806 static void TestNextSortKeyPartJaIdentical(void)
5807 {
5808 UErrorCode status = U_ZERO_ERROR;
5809 UCollator *coll;
5810 uint8_t keyPart[KEY_PART_SIZE];
5811 UCharIterator iter;
5812 uint32_t state[2] = {0, 0};
5813 int32_t keyPartLen;
5814
5815 coll = ucol_open("ja", &status);
5816 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5817 if (U_FAILURE(status)) {
5818 log_err_status(status, "ERROR: in creation of Japanese collator with ide ntical strength: %s\n", myErrorName(status));
5819 return;
5820 }
5821
5822 uiter_setString(&iter, testJapaneseName, 5);
5823 keyPartLen = KEY_PART_SIZE;
5824 while (keyPartLen == KEY_PART_SIZE) {
5825 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_ SIZE, &status);
5826 if (U_FAILURE(status)) {
5827 log_err_status(status, "ERROR: in iterating next sort key part: %s\n ", myErrorName(status));
5828 break;
5829 }
5830 }
5831
5832 ucol_close(coll);
5833 }
7117 5834
7118 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 5835 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7119 5836
7120 void addMiscCollTest(TestNode** root) 5837 void addMiscCollTest(TestNode** root)
7121 { 5838 {
7122 TEST(TestRuleOptions); 5839 TEST(TestRuleOptions);
7123 TEST(TestBeforePrefixFailure); 5840 TEST(TestBeforePrefixFailure);
7124 TEST(TestContractionClosure); 5841 TEST(TestContractionClosure);
7125 TEST(TestPrefixCompose); 5842 TEST(TestPrefixCompose);
7126 TEST(TestStrCollIdenticalPrefix); 5843 TEST(TestStrCollIdenticalPrefix);
7127 TEST(TestPrefix); 5844 TEST(TestPrefix);
7128 TEST(TestNewJapanese); 5845 TEST(TestNewJapanese);
7129 /*TEST(TestLimitations);*/ 5846 /*TEST(TestLimitations);*/
7130 TEST(TestNonChars); 5847 TEST(TestNonChars);
7131 TEST(TestExtremeCompression); 5848 TEST(TestExtremeCompression);
7132 TEST(TestSurrogates); 5849 TEST(TestSurrogates);
7133 TEST(TestVariableTopSetting); 5850 TEST(TestVariableTopSetting);
5851 TEST(TestMaxVariable);
7134 TEST(TestBocsuCoverage); 5852 TEST(TestBocsuCoverage);
7135 TEST(TestCyrillicTailoring); 5853 TEST(TestCyrillicTailoring);
7136 TEST(TestCase); 5854 TEST(TestCase);
7137 TEST(IncompleteCntTest); 5855 TEST(IncompleteCntTest);
7138 TEST(BlackBirdTest); 5856 TEST(BlackBirdTest);
7139 TEST(FunkyATest); 5857 TEST(FunkyATest);
7140 TEST(BillFairmanTest); 5858 TEST(BillFairmanTest);
7141 TEST(RamsRulesTest);
7142 TEST(IsTailoredTest);
7143 TEST(TestCollations);
7144 TEST(TestChMove); 5859 TEST(TestChMove);
7145 TEST(TestImplicitTailoring); 5860 TEST(TestImplicitTailoring);
7146 TEST(TestFCDProblem); 5861 TEST(TestFCDProblem);
7147 TEST(TestEmptyRule); 5862 TEST(TestEmptyRule);
7148 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 5863 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7149 TEST(TestJ815); 5864 TEST(TestJ815);
7150 /*TEST(TestJ831);*/ /* we changed lv locale */ 5865 /*TEST(TestJ831);*/ /* we changed lv locale */
7151 TEST(TestBefore); 5866 TEST(TestBefore);
7152 TEST(TestRedundantRules);
7153 TEST(TestExpansionSyntax);
7154 TEST(TestHangulTailoring); 5867 TEST(TestHangulTailoring);
7155 TEST(TestUCARules); 5868 TEST(TestUCARules);
7156 TEST(TestIncrementalNormalize); 5869 TEST(TestIncrementalNormalize);
7157 TEST(TestComposeDecompose); 5870 TEST(TestComposeDecompose);
7158 TEST(TestCompressOverlap); 5871 TEST(TestCompressOverlap);
7159 TEST(TestContraction); 5872 TEST(TestContraction);
7160 TEST(TestExpansion); 5873 TEST(TestExpansion);
7161 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys * / 5874 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys * /
7162 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 5875 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7163 TEST(TestOptimize); 5876 TEST(TestOptimize);
7164 TEST(TestSuppressContractions); 5877 TEST(TestSuppressContractions);
7165 TEST(Alexis2); 5878 TEST(Alexis2);
7166 TEST(TestHebrewUCA); 5879 TEST(TestHebrewUCA);
7167 TEST(TestPartialSortKeyTermination); 5880 TEST(TestPartialSortKeyTermination);
7168 TEST(TestSettings); 5881 TEST(TestSettings);
7169 TEST(TestEquals); 5882 TEST(TestEquals);
7170 TEST(TestJ2726); 5883 TEST(TestJ2726);
7171 TEST(NullRule); 5884 TEST(NullRule);
7172 TEST(TestNumericCollation); 5885 TEST(TestNumericCollation);
7173 TEST(TestTibetanConformance); 5886 TEST(TestTibetanConformance);
7174 TEST(TestPinyinProblem); 5887 TEST(TestPinyinProblem);
7175 TEST(TestImplicitGeneration);
7176 TEST(TestSeparateTrees); 5888 TEST(TestSeparateTrees);
7177 TEST(TestBeforePinyin); 5889 TEST(TestBeforePinyin);
7178 TEST(TestBeforeTightening); 5890 TEST(TestBeforeTightening);
7179 /*TEST(TestMoreBefore);*/ 5891 /*TEST(TestMoreBefore);*/
7180 TEST(TestTailorNULL); 5892 TEST(TestTailorNULL);
7181 TEST(TestUpperFirstQuaternary); 5893 TEST(TestUpperFirstQuaternary);
7182 TEST(TestJ4960); 5894 TEST(TestJ4960);
7183 TEST(TestJ5223); 5895 TEST(TestJ5223);
7184 TEST(TestJ5232); 5896 TEST(TestJ5232);
7185 TEST(TestJ5367); 5897 TEST(TestJ5367);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
7217 TEST(TestGreekFirstReorder); 5929 TEST(TestGreekFirstReorder);
7218 TEST(TestGreekLastReorder); 5930 TEST(TestGreekLastReorder);
7219 TEST(TestNonScriptReorder); 5931 TEST(TestNonScriptReorder);
7220 TEST(TestHaniReorder); 5932 TEST(TestHaniReorder);
7221 TEST(TestHaniReorderWithOtherRules); 5933 TEST(TestHaniReorderWithOtherRules);
7222 TEST(TestMultipleReorder); 5934 TEST(TestMultipleReorder);
7223 TEST(TestReorderingAcrossCloning); 5935 TEST(TestReorderingAcrossCloning);
7224 TEST(TestReorderWithNumericCollation); 5936 TEST(TestReorderWithNumericCollation);
7225 5937
7226 TEST(TestCaseLevelBufferOverflow); 5938 TEST(TestCaseLevelBufferOverflow);
5939 TEST(TestNextSortKeyPartJaIdentical);
7227 } 5940 }
7228 5941
7229 #endif /* #if !UCONFIG_NO_COLLATION */ 5942 #endif /* #if !UCONFIG_NO_COLLATION */
OLDNEW
« no previous file with comments | « source/test/cintltst/cloctst.c ('k') | source/test/cintltst/cmsgtst.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698