OLD | NEW |
1 | 1 |
2 /******************************************************************** | 2 /******************************************************************** |
3 * COPYRIGHT: | 3 * COPYRIGHT: |
4 * Copyright (c) 2001-2013, International Business Machines Corporation and | 4 * Copyright (c) 2001-2014, International Business Machines Corporation and |
5 * others. All Rights Reserved. | 5 * others. All Rights Reserved. |
6 ********************************************************************/ | 6 ********************************************************************/ |
7 /******************************************************************************* | 7 /******************************************************************************* |
8 * | 8 * |
9 * File cmsccoll.C | 9 * File cmsccoll.C |
10 * | 10 * |
11 *******************************************************************************/ | 11 *******************************************************************************/ |
12 /** | 12 /** |
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where | 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where |
14 * to fit. | 14 * to fit. |
15 */ | 15 */ |
16 | 16 |
17 #include <stdio.h> | 17 #include <stdio.h> |
18 | 18 |
19 #include "unicode/utypes.h" | 19 #include "unicode/utypes.h" |
20 | 20 |
21 #if !UCONFIG_NO_COLLATION | 21 #if !UCONFIG_NO_COLLATION |
22 | 22 |
23 #include "unicode/ucol.h" | 23 #include "unicode/ucol.h" |
24 #include "unicode/ucoleitr.h" | 24 #include "unicode/ucoleitr.h" |
25 #include "unicode/uloc.h" | 25 #include "unicode/uloc.h" |
26 #include "cintltst.h" | 26 #include "cintltst.h" |
27 #include "ccolltst.h" | 27 #include "ccolltst.h" |
28 #include "callcoll.h" | 28 #include "callcoll.h" |
29 #include "unicode/ustring.h" | 29 #include "unicode/ustring.h" |
30 #include "string.h" | 30 #include "string.h" |
31 #include "ucol_imp.h" | 31 #include "ucol_imp.h" |
32 #include "ucol_tok.h" | |
33 #include "cmemory.h" | 32 #include "cmemory.h" |
34 #include "cstring.h" | 33 #include "cstring.h" |
35 #include "uassert.h" | 34 #include "uassert.h" |
36 #include "unicode/parseerr.h" | 35 #include "unicode/parseerr.h" |
37 #include "unicode/ucnv.h" | 36 #include "unicode/ucnv.h" |
38 #include "unicode/ures.h" | 37 #include "unicode/ures.h" |
39 #include "unicode/uscript.h" | 38 #include "unicode/uscript.h" |
40 #include "unicode/utf16.h" | 39 #include "unicode/utf16.h" |
41 #include "uparse.h" | 40 #include "uparse.h" |
42 #include "putilimp.h" | 41 #include "putilimp.h" |
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
406 log_err("Wrong locale for French Collation Data, expecte
d \"fr\" got %s", lp); | 405 log_err("Wrong locale for French Collation Data, expecte
d \"fr\" got %s", lp); |
407 } | 406 } |
408 } | 407 } |
409 } | 408 } |
410 ures_close(cr); | 409 ures_close(cr); |
411 } | 410 } |
412 ures_close(lr); | 411 ures_close(lr); |
413 } | 412 } |
414 } | 413 } |
415 | 414 |
416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ | |
417 UChar source[256] = { '\0'}; | |
418 UChar target[256] = { '\0'}; | |
419 UChar preP = 0x31a3; | |
420 UChar preQ = 0x310d; | |
421 /* | |
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; | |
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; | |
424 */ | |
425 /*log_verbose("Testing primary\n");*/ | |
426 | |
427 doTest(col, p, q, UCOL_LESS); | |
428 /* | |
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); | |
430 | |
431 if(result!=UCOL_LESS){ | |
432 aescstrdup(p,utfSource,256); | |
433 aescstrdup(q,utfTarget,256); | |
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTar
get); | |
435 } | |
436 */ | |
437 source[0] = preP; | |
438 u_strcpy(source+1,p); | |
439 target[0] = preQ; | |
440 u_strcpy(target+1,q); | |
441 doTest(col, source, target, UCOL_LESS); | |
442 /* | |
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSourc
e,utfTarget); | |
444 */ | |
445 } | |
446 | |
447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ | |
448 UChar source[256] = { '\0'}; | |
449 UChar target[256] = { '\0'}; | |
450 | |
451 /*log_verbose("Testing secondary\n");*/ | |
452 | |
453 doTest(col, p, q, UCOL_LESS); | |
454 /* | |
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarg
et); | |
456 */ | |
457 source[0] = 0x0053; | |
458 u_strcpy(source+1,p); | |
459 target[0]= 0x0073; | |
460 u_strcpy(target+1,q); | |
461 | |
462 doTest(col, source, target, UCOL_LESS); | |
463 /* | |
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSour
ce,utfTarget); | |
465 */ | |
466 | |
467 | |
468 u_strcpy(source,p); | |
469 source[u_strlen(p)] = 0x62; | |
470 source[u_strlen(p)+1] = 0; | |
471 | |
472 | |
473 u_strcpy(target,q); | |
474 target[u_strlen(q)] = 0x61; | |
475 target[u_strlen(q)+1] = 0; | |
476 | |
477 doTest(col, source, target, UCOL_GREATER); | |
478 | |
479 /* | |
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",u
tfSource,utfTarget); | |
481 */ | |
482 } | |
483 | |
484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ | |
485 UChar source[256] = { '\0'}; | |
486 UChar target[256] = { '\0'}; | |
487 | |
488 /*log_verbose("Testing tertiary\n");*/ | |
489 | |
490 doTest(col, p, q, UCOL_LESS); | |
491 /* | |
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget
); | |
493 */ | |
494 source[0] = 0x0020; | |
495 u_strcpy(source+1,p); | |
496 target[0]= 0x002D; | |
497 u_strcpy(target+1,q); | |
498 | |
499 doTest(col, source, target, UCOL_LESS); | |
500 /* | |
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSour
ce,utfTarget); | |
502 */ | |
503 | |
504 u_strcpy(source,p); | |
505 source[u_strlen(p)] = 0xE0; | |
506 source[u_strlen(p)+1] = 0; | |
507 | |
508 u_strcpy(target,q); | |
509 target[u_strlen(q)] = 0x61; | |
510 target[u_strlen(q)+1] = 0; | |
511 | |
512 doTest(col, source, target, UCOL_GREATER); | |
513 | |
514 /* | |
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",u
tfSource,utfTarget); | |
516 */ | |
517 } | |
518 | |
519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){ | |
520 /* | |
521 UChar source[256] = { '\0'}; | |
522 UChar target[256] = { '\0'}; | |
523 */ | |
524 | |
525 doTest(col, p, q, UCOL_EQUAL); | |
526 /* | |
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget
); | |
528 */ | |
529 } | |
530 | |
531 static void testCollator(UCollator *coll, UErrorCode *status) { | |
532 const UChar *rules = NULL, *current = NULL; | |
533 int32_t ruleLen = 0; | |
534 uint32_t strength = 0; | |
535 uint32_t chOffset = 0; uint32_t chLen = 0; | |
536 uint32_t exOffset = 0; uint32_t exLen = 0; | |
537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; | |
538 uint32_t firstEx = 0; | |
539 /* uint32_t rExpsLen = 0; */ | |
540 uint32_t firstLen = 0; | |
541 UBool varT = FALSE; UBool top_ = TRUE; | |
542 uint16_t specs = 0; | |
543 UBool startOfRules = TRUE; | |
544 UBool lastReset = FALSE; | |
545 UBool before = FALSE; | |
546 uint32_t beforeStrength = 0; | |
547 UColTokenParser src; | |
548 UColOptionSet opts; | |
549 | |
550 UChar first[256]; | |
551 UChar second[256]; | |
552 UChar tempB[256]; | |
553 uint32_t tempLen; | |
554 UChar *rulesCopy = NULL; | |
555 UParseError parseError; | |
556 | |
557 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
558 | |
559 src.opts = &opts; | |
560 | |
561 rules = ucol_getRules(coll, &ruleLen); | |
562 if(U_SUCCESS(*status) && ruleLen > 0) { | |
563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*si
zeof(UChar)); | |
564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); | |
565 src.current = src.source = rulesCopy; | |
566 src.end = rulesCopy+ruleLen; | |
567 src.extraCurrent = src.end; | |
568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
569 *first = *second = 0; | |
570 | |
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToke
n can cause the pointer to | |
572 the rules copy in src.source to get reallocated, freeing the original
pointer in rulesCopy */ | |
573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, st
atus)) != NULL) { | |
574 strength = src.parsedToken.strength; | |
575 chOffset = src.parsedToken.charsOffset; | |
576 chLen = src.parsedToken.charsLen; | |
577 exOffset = src.parsedToken.extensionOffset; | |
578 exLen = src.parsedToken.extensionLen; | |
579 prefixOffset = src.parsedToken.prefixOffset; | |
580 prefixLen = src.parsedToken.prefixLen; | |
581 specs = src.parsedToken.flags; | |
582 | |
583 startOfRules = FALSE; | |
584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); | |
585 (void)varT; /* Suppress set but not used warning. */ | |
586 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); | |
587 if(top_) { /* if reset is on top, the sequence is broken. We should have a
n empty string */ | |
588 second[0] = 0; | |
589 } else { | |
590 u_strncpy(second,src.source+chOffset, chLen); | |
591 second[chLen] = 0; | |
592 | |
593 if(exLen > 0 && firstEx == 0) { | |
594 u_strncat(first, src.source+exOffset, exLen); | |
595 first[firstLen+exLen] = 0; | |
596 } | |
597 | |
598 if(lastReset == TRUE && prefixLen != 0) { | |
599 u_strncpy(first+prefixLen, first, firstLen); | |
600 u_strncpy(first, src.source+prefixOffset, prefixLen); | |
601 first[firstLen+prefixLen] = 0; | |
602 firstLen = firstLen+prefixLen; | |
603 } | |
604 | |
605 if(before == TRUE) { /* swap first and second */ | |
606 u_strcpy(tempB, first); | |
607 u_strcpy(first, second); | |
608 u_strcpy(second, tempB); | |
609 | |
610 tempLen = firstLen; | |
611 firstLen = chLen; | |
612 chLen = tempLen; | |
613 | |
614 tempLen = firstEx; | |
615 firstEx = exLen; | |
616 exLen = tempLen; | |
617 if(beforeStrength < strength) { | |
618 strength = beforeStrength; | |
619 } | |
620 } | |
621 } | |
622 lastReset = FALSE; | |
623 | |
624 switch(strength){ | |
625 case UCOL_IDENTICAL: | |
626 testEquality(coll,first,second); | |
627 break; | |
628 case UCOL_PRIMARY: | |
629 testPrimary(coll,first,second); | |
630 break; | |
631 case UCOL_SECONDARY: | |
632 testSecondary(coll,first,second); | |
633 break; | |
634 case UCOL_TERTIARY: | |
635 testTertiary(coll,first,second); | |
636 break; | |
637 case UCOL_TOK_RESET: | |
638 lastReset = TRUE; | |
639 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); | |
640 if(before) { | |
641 beforeStrength = (specs & UCOL_TOK_BEFORE)-1; | |
642 } | |
643 break; | |
644 default: | |
645 break; | |
646 } | |
647 | |
648 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second wer
e swapped */ | |
649 before = FALSE; | |
650 } else { | |
651 firstLen = chLen; | |
652 firstEx = exLen; | |
653 u_strcpy(first, second); | |
654 } | |
655 } | |
656 uprv_free(src.source); | |
657 uprv_free(src.reorderCodes); | |
658 } | |
659 } | |
660 | |
661 static UCollationResult ucaTest(void *collator, const int object, const UChar *s
ource, const int sLen, const UChar *target, const int tLen) { | |
662 UCollator *UCA = (UCollator *)collator; | |
663 return ucol_strcoll(UCA, source, sLen, target, tLen); | |
664 } | |
665 | |
666 /* | |
667 static UCollationResult winTest(void *collator, const int object, const UChar *s
ource, const int sLen, const UChar *target, const int tLen) { | |
668 #if U_PLATFORM_HAS_WIN32_API | |
669 LCID lcid = (LCID)collator; | |
670 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); | |
671 #else | |
672 return 0; | |
673 #endif | |
674 } | |
675 */ | |
676 | |
677 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts
, | |
678 UChar s1, UChar s2, | |
679 const UChar *s, const uint32_t sLen, | |
680 const UChar *t, const uint32_t tLen) { | |
681 UChar source[256] = {0}; | |
682 UChar target[256] = {0}; | |
683 | |
684 source[0] = s1; | |
685 u_strcpy(source+1, s); | |
686 target[0] = s2; | |
687 u_strcpy(target+1, t); | |
688 | |
689 return func(collator, opts, source, sLen+1, target, tLen+1); | |
690 } | |
691 | |
692 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, | |
693 UChar s1, UChar s2, | |
694 const UChar *s, const uint32_t sLen, | |
695 const UChar *t, const uint32_t tLen) { | |
696 UChar source[256] = {0}; | |
697 UChar target[256] = {0}; | |
698 | |
699 u_strcpy(source, s); | |
700 source[sLen] = s1; | |
701 u_strcpy(target, t); | |
702 target[tLen] = s2; | |
703 | |
704 return func(collator, opts, source, sLen+1, target, tLen+1); | |
705 } | |
706 | |
707 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, | |
708 const UChar *s, const uint32_t sLen, | |
709 const UChar *t, const uint32_t tLen, | |
710 UCollationResult result) { | |
711 /*UChar fPrimary = 0x6d;*/ | |
712 /*UChar sPrimary = 0x6e;*/ | |
713 UChar fSecondary = 0x310d; | |
714 UChar sSecondary = 0x31a3; | |
715 UChar fTertiary = 0x310f; | |
716 UChar sTertiary = 0x31b7; | |
717 | |
718 UCollationResult oposite; | |
719 if(result == UCOL_EQUAL) { | |
720 return UCOL_IDENTICAL; | |
721 } else if(result == UCOL_GREATER) { | |
722 oposite = UCOL_LESS; | |
723 } else { | |
724 oposite = UCOL_GREATER; | |
725 } | |
726 | |
727 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen
) == result) { | |
728 return UCOL_PRIMARY; | |
729 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, t
Len) == result) && | |
730 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) ==
result)) { | |
731 return UCOL_SECONDARY; | |
732 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t,
tLen) == result) && | |
733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) ==
result)) { | |
734 return UCOL_TERTIARY; | |
735 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLe
n) == oposite) && | |
736 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) ==
oposite)) { | |
737 return UCOL_QUATERNARY; | |
738 } else { | |
739 return UCOL_IDENTICAL; | |
740 } | |
741 } | |
742 | |
743 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *bu
ffer) { | |
744 uint32_t i = 0; | |
745 | |
746 if(res == UCOL_EQUAL || strength == 0xdeadbeef) { | |
747 buffer[0] = '='; | |
748 buffer[1] = '='; | |
749 buffer[2] = '\0'; | |
750 } else if(res == UCOL_GREATER) { | |
751 for(i = 0; i<strength+1; i++) { | |
752 buffer[i] = '>'; | |
753 } | |
754 buffer[strength+1] = '\0'; | |
755 } else { | |
756 for(i = 0; i<strength+1; i++) { | |
757 buffer[i] = '<'; | |
758 } | |
759 buffer[strength+1] = '\0'; | |
760 } | |
761 | |
762 return buffer; | |
763 } | |
764 | |
765 | |
766 | |
767 static void logFailure (const char *platform, const char *test, | |
768 const UChar *source, const uint32_t sLen, | |
769 const UChar *target, const uint32_t tLen, | |
770 UCollationResult realRes, uint32_t realStrength, | |
771 UCollationResult expRes, uint32_t expStrength, UBool err
or) { | |
772 | |
773 uint32_t i = 0; | |
774 | |
775 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; | |
776 static int32_t maxOutputLength = 0; | |
777 int32_t outputLength; | |
778 | |
779 *sEsc = *tEsc = *s = *t = 0; | |
780 if(error == TRUE) { | |
781 log_err("Difference between expected and generated order. Run test with -v f
or more info\n"); | |
782 } else if(getTestOption(VERBOSITY_OPTION) == 0) { | |
783 return; | |
784 } | |
785 for(i = 0; i<sLen; i++) { | |
786 sprintf(b, "%04X", source[i]); | |
787 strcat(sEsc, "\\u"); | |
788 strcat(sEsc, b); | |
789 strcat(s, b); | |
790 strcat(s, " "); | |
791 if(source[i] < 0x80) { | |
792 sprintf(b, "(%c)", source[i]); | |
793 strcat(sEsc, b); | |
794 } | |
795 } | |
796 for(i = 0; i<tLen; i++) { | |
797 sprintf(b, "%04X", target[i]); | |
798 strcat(tEsc, "\\u"); | |
799 strcat(tEsc, b); | |
800 strcat(t, b); | |
801 strcat(t, " "); | |
802 if(target[i] < 0x80) { | |
803 sprintf(b, "(%c)", target[i]); | |
804 strcat(tEsc, b); | |
805 } | |
806 } | |
807 /* | |
808 strcpy(output, "[[ "); | |
809 strcat(output, sEsc); | |
810 strcat(output, getRelationSymbol(expRes, expStrength, relation)); | |
811 strcat(output, tEsc); | |
812 | |
813 strcat(output, " : "); | |
814 | |
815 strcat(output, sEsc); | |
816 strcat(output, getRelationSymbol(realRes, realStrength, relation)); | |
817 strcat(output, tEsc); | |
818 strcat(output, " ]] "); | |
819 | |
820 log_verbose("%s", output); | |
821 */ | |
822 | |
823 | |
824 strcpy(output, "DIFF: "); | |
825 | |
826 strcat(output, s); | |
827 strcat(output, " : "); | |
828 strcat(output, t); | |
829 | |
830 strcat(output, test); | |
831 strcat(output, ": "); | |
832 | |
833 strcat(output, sEsc); | |
834 strcat(output, getRelationSymbol(expRes, expStrength, relation)); | |
835 strcat(output, tEsc); | |
836 | |
837 strcat(output, " "); | |
838 | |
839 strcat(output, platform); | |
840 strcat(output, ": "); | |
841 | |
842 strcat(output, sEsc); | |
843 strcat(output, getRelationSymbol(realRes, realStrength, relation)); | |
844 strcat(output, tEsc); | |
845 | |
846 outputLength = (int32_t)strlen(output); | |
847 if(outputLength > maxOutputLength) { | |
848 maxOutputLength = outputLength; | |
849 U_ASSERT(outputLength < sizeof(output)); | |
850 } | |
851 | |
852 log_verbose("%s\n", output); | |
853 | |
854 } | |
855 | |
856 /* | |
857 static void printOutRules(const UChar *rules) { | |
858 uint32_t len = u_strlen(rules); | |
859 uint32_t i = 0; | |
860 char toPrint; | |
861 uint32_t line = 0; | |
862 | |
863 fprintf(stdout, "Rules:"); | |
864 | |
865 for(i = 0; i<len; i++) { | |
866 if(rules[i]<0x7f && rules[i]>=0x20) { | |
867 toPrint = (char)rules[i]; | |
868 if(toPrint == '&') { | |
869 line = 1; | |
870 fprintf(stdout, "\n&"); | |
871 } else if(toPrint == ';') { | |
872 fprintf(stdout, "<<"); | |
873 line+=2; | |
874 } else if(toPrint == ',') { | |
875 fprintf(stdout, "<<<"); | |
876 line+=3; | |
877 } else { | |
878 fprintf(stdout, "%c", toPrint); | |
879 line++; | |
880 } | |
881 } else if(rules[i]<0x3400 || rules[i]>=0xa000) { | |
882 fprintf(stdout, "\\u%04X", rules[i]); | |
883 line+=6; | |
884 } | |
885 if(line>72) { | |
886 fprintf(stdout, "\n"); | |
887 line = 0; | |
888 } | |
889 } | |
890 | |
891 log_verbose("\n"); | |
892 | |
893 } | |
894 */ | |
895 | |
896 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t
strength, const UChar *first, const UChar *second, const char* msg, UBool error
) { | |
897 uint32_t diffs = 0; | |
898 UCollationResult realResult; | |
899 uint32_t realStrength; | |
900 | |
901 uint32_t sLen = u_strlen(first); | |
902 uint32_t tLen = u_strlen(second); | |
903 | |
904 realResult = func(collator, opts, first, sLen, second, tLen); | |
905 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen,
realResult); | |
906 | |
907 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) { | |
908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre
ngth, UCOL_EQUAL, strength, error); | |
909 diffs++; | |
910 } else if(realResult != UCOL_LESS || realStrength != strength) { | |
911 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre
ngth, UCOL_LESS, strength, error); | |
912 diffs++; | |
913 } | |
914 return diffs; | |
915 } | |
916 | |
917 | |
918 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName,
UBool error, UErrorCode *status) { | |
919 const UChar *rules = NULL, *current = NULL; | |
920 int32_t ruleLen = 0; | |
921 uint32_t strength = 0; | |
922 uint32_t chOffset = 0; uint32_t chLen = 0; | |
923 uint32_t exOffset = 0; uint32_t exLen = 0; | |
924 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; | |
925 /* uint32_t rExpsLen = 0; */ | |
926 uint32_t firstLen = 0, secondLen = 0; | |
927 UBool varT = FALSE; UBool top_ = TRUE; | |
928 uint16_t specs = 0; | |
929 UBool startOfRules = TRUE; | |
930 UColTokenParser src; | |
931 UColOptionSet opts; | |
932 | |
933 UChar first[256]; | |
934 UChar second[256]; | |
935 UChar *rulesCopy = NULL; | |
936 | |
937 uint32_t UCAdiff = 0; | |
938 uint32_t Windiff = 1; | |
939 UParseError parseError; | |
940 | |
941 (void)top_; /* Suppress set but not used warnings. */ | |
942 (void)varT; | |
943 (void)secondLen; | |
944 (void)prefixLen; | |
945 (void)prefixOffset; | |
946 | |
947 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
948 src.opts = &opts; | |
949 | |
950 rules = ucol_getRules(coll, &ruleLen); | |
951 | |
952 /*printOutRules(rules);*/ | |
953 | |
954 if(U_SUCCESS(*status) && ruleLen > 0) { | |
955 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*si
zeof(UChar)); | |
956 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); | |
957 src.current = src.source = rulesCopy; | |
958 src.end = rulesCopy+ruleLen; | |
959 src.extraCurrent = src.end; | |
960 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
961 *first = *second = 0; | |
962 | |
963 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken ca
n cause the pointer to | |
964 the rules copy in src.source to get reallocated, freeing the original poi
nter in rulesCopy */ | |
965 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,st
atus)) != NULL) { | |
966 strength = src.parsedToken.strength; | |
967 chOffset = src.parsedToken.charsOffset; | |
968 chLen = src.parsedToken.charsLen; | |
969 exOffset = src.parsedToken.extensionOffset; | |
970 exLen = src.parsedToken.extensionLen; | |
971 prefixOffset = src.parsedToken.prefixOffset; | |
972 prefixLen = src.parsedToken.prefixLen; | |
973 specs = src.parsedToken.flags; | |
974 | |
975 startOfRules = FALSE; | |
976 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); | |
977 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); | |
978 | |
979 u_strncpy(second,src.source+chOffset, chLen); | |
980 second[chLen] = 0; | |
981 secondLen = chLen; | |
982 | |
983 if(exLen > 0) { | |
984 u_strncat(first, src.source+exOffset, exLen); | |
985 first[firstLen+exLen] = 0; | |
986 firstLen += exLen; | |
987 } | |
988 | |
989 if(strength != UCOL_TOK_RESET) { | |
990 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa0
00)) { | |
991 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, secon
d, refName, error); | |
992 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, se
cond, "Win32");*/ | |
993 } | |
994 } | |
995 | |
996 | |
997 firstLen = chLen; | |
998 u_strcpy(first, second); | |
999 | |
1000 } | |
1001 if(UCAdiff != 0 && Windiff != 0) { | |
1002 log_verbose("\n"); | |
1003 } | |
1004 if(UCAdiff == 0) { | |
1005 log_verbose("No immediate difference with %s!\n", refName); | |
1006 } | |
1007 if(Windiff == 0) { | |
1008 log_verbose("No immediate difference with Win32!\n"); | |
1009 } | |
1010 uprv_free(src.source); | |
1011 uprv_free(src.reorderCodes); | |
1012 } | |
1013 } | |
1014 | |
1015 /* | |
1016 * Takes two CEs (lead and continuation) and | |
1017 * compares them as CEs should be compared: | |
1018 * primary vs. primary, secondary vs. secondary | |
1019 * tertiary vs. tertiary | |
1020 */ | |
1021 static int32_t compareCEs(uint32_t s1, uint32_t s2, | |
1022 uint32_t t1, uint32_t t2) { | |
1023 uint32_t s = 0, t = 0; | |
1024 if(s1 == t1 && s2 == t2) { | |
1025 return 0; | |
1026 } | |
1027 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); | |
1028 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); | |
1029 if(s < t) { | |
1030 return -1; | |
1031 } else if(s > t) { | |
1032 return 1; | |
1033 } else { | |
1034 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; | |
1035 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; | |
1036 if(s < t) { | |
1037 return -1; | |
1038 } else if(s > t) { | |
1039 return 1; | |
1040 } else { | |
1041 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); | |
1042 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); | |
1043 if(s < t) { | |
1044 return -1; | |
1045 } else { | |
1046 return 1; | |
1047 } | |
1048 } | |
1049 } | |
1050 } | |
1051 | |
1052 typedef struct { | |
1053 uint32_t startCE; | |
1054 uint32_t startContCE; | |
1055 uint32_t limitCE; | |
1056 uint32_t limitContCE; | |
1057 } indirectBoundaries; | |
1058 | |
1059 /* these values are used for finding CE values for indirect positioning. */ | |
1060 /* Indirect positioning is a mechanism for allowing resets on symbolic */ | |
1061 /* values. It only works for resets and you cannot tailor indirect names */ | |
1062 /* An indirect name can define either an anchor point or a range. An */ | |
1063 /* anchor point behaves in exactly the same way as a code point in reset */ | |
1064 /* would, except that it cannot be tailored. A range (we currently only */ | |
1065 /* know for the [top] range will explicitly set the upper bound for */ | |
1066 /* generated CEs, thus allowing for better control over how many CEs can */ | |
1067 /* be squeezed between in the range without performance penalty. */ | |
1068 /* In that respect, we use [top] for tailoring of locales that use CJK */ | |
1069 /* characters. Other indirect values are currently a pure convenience, */ | |
1070 /* they can be used to assure that the CEs will be always positioned in */ | |
1071 /* the same place relative to a point with known properties (e.g. first */ | |
1072 /* primary ignorable). */ | |
1073 static indirectBoundaries ucolIndirectBoundaries[15]; | |
1074 static UBool indirectBoundariesSet = FALSE; | |
1075 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *en
d) { | |
1076 /* Set values for the top - TODO: once we have values for all the indirects,
we are going */ | |
1077 /* to initalize here. */ | |
1078 ucolIndirectBoundaries[indexR].startCE = start[0]; | |
1079 ucolIndirectBoundaries[indexR].startContCE = start[1]; | |
1080 if(end) { | |
1081 ucolIndirectBoundaries[indexR].limitCE = end[0]; | |
1082 ucolIndirectBoundaries[indexR].limitContCE = end[1]; | |
1083 } else { | |
1084 ucolIndirectBoundaries[indexR].limitCE = 0; | |
1085 ucolIndirectBoundaries[indexR].limitContCE = 0; | |
1086 } | |
1087 } | |
1088 | |
1089 static void testCEs(UCollator *coll, UErrorCode *status) { | |
1090 const UChar *rules = NULL, *current = NULL; | |
1091 int32_t ruleLen = 0; | |
1092 | |
1093 uint32_t strength = 0; | |
1094 uint32_t maxStrength = UCOL_IDENTICAL; | |
1095 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; | |
1096 uint32_t lastCE; | |
1097 uint32_t lastContCE; | |
1098 | |
1099 int32_t result = 0; | |
1100 uint32_t chOffset = 0; uint32_t chLen = 0; | |
1101 uint32_t exOffset = 0; uint32_t exLen = 0; | |
1102 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; | |
1103 uint32_t oldOffset = 0; | |
1104 | |
1105 /* uint32_t rExpsLen = 0; */ | |
1106 /* uint32_t firstLen = 0; */ | |
1107 uint16_t specs = 0; | |
1108 UBool varT = FALSE; UBool top_ = TRUE; | |
1109 UBool startOfRules = TRUE; | |
1110 UBool before = FALSE; | |
1111 UColTokenParser src; | |
1112 UColOptionSet opts; | |
1113 UParseError parseError; | |
1114 UChar *rulesCopy = NULL; | |
1115 collIterate *c = uprv_new_collIterate(status); | |
1116 UCAConstants *consts = NULL; | |
1117 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ | |
1118 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; | |
1119 const char *colLoc; | |
1120 UCollator *UCA = ucol_open("root", status); | |
1121 | |
1122 (void)varT; /* Suppress set but not used warnings. */ | |
1123 (void)prefixLen; | |
1124 (void)prefixOffset; | |
1125 (void)exLen; | |
1126 (void)exOffset; | |
1127 | |
1128 if (U_FAILURE(*status)) { | |
1129 log_err("Could not open root collator %s\n", u_errorName(*status)); | |
1130 uprv_delete_collIterate(c); | |
1131 return; | |
1132 } | |
1133 | |
1134 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); | |
1135 if (U_FAILURE(*status)) { | |
1136 log_err("Could not get collator name: %s\n", u_errorName(*status)); | |
1137 ucol_close(UCA); | |
1138 uprv_delete_collIterate(c); | |
1139 return; | |
1140 } | |
1141 | |
1142 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
1143 | |
1144 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); | |
1145 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; | |
1146 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ | |
1147 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; | |
1148 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; | |
1149 | |
1150 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UC
OL_NOT_FOUND; | |
1151 | |
1152 src.opts = &opts; | |
1153 | |
1154 rules = ucol_getRules(coll, &ruleLen); | |
1155 | |
1156 src.invUCA = ucol_initInverseUCA(status); | |
1157 | |
1158 if(indirectBoundariesSet == FALSE) { | |
1159 /* UCOL_RESET_TOP_VALUE */ | |
1160 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRS
T_IMPLICIT); | |
1161 /* UCOL_FIRST_PRIMARY_IGNORABLE */ | |
1162 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); | |
1163 /* UCOL_LAST_PRIMARY_IGNORABLE */ | |
1164 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); | |
1165 /* UCOL_FIRST_SECONDARY_IGNORABLE */ | |
1166 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); | |
1167 /* UCOL_LAST_SECONDARY_IGNORABLE */ | |
1168 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); | |
1169 /* UCOL_FIRST_TERTIARY_IGNORABLE */ | |
1170 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); | |
1171 /* UCOL_LAST_TERTIARY_IGNORABLE */ | |
1172 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); | |
1173 /* UCOL_FIRST_VARIABLE */ | |
1174 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); | |
1175 /* UCOL_LAST_VARIABLE */ | |
1176 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); | |
1177 /* UCOL_FIRST_NON_VARIABLE */ | |
1178 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); | |
1179 /* UCOL_LAST_NON_VARIABLE */ | |
1180 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIR
ST_IMPLICIT); | |
1181 /* UCOL_FIRST_IMPLICIT */ | |
1182 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); | |
1183 /* UCOL_LAST_IMPLICIT */ | |
1184 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_T
RAILING); | |
1185 /* UCOL_FIRST_TRAILING */ | |
1186 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); | |
1187 /* UCOL_LAST_TRAILING */ | |
1188 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); | |
1189 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<2
4); | |
1190 indirectBoundariesSet = TRUE; | |
1191 } | |
1192 | |
1193 | |
1194 if(U_SUCCESS(*status) && ruleLen > 0) { | |
1195 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar)); | |
1196 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); | |
1197 src.current = src.source = rulesCopy; | |
1198 src.end = rulesCopy+ruleLen; | |
1199 src.extraCurrent = src.end; | |
1200 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
1201 | |
1202 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNext
Token can cause the pointer to | |
1203 the rules copy in src.source to get reallocated, freeing the orig
inal pointer in rulesCopy */ | |
1204 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseErro
r,status)) != NULL) { | |
1205 strength = src.parsedToken.strength; | |
1206 chOffset = src.parsedToken.charsOffset; | |
1207 chLen = src.parsedToken.charsLen; | |
1208 exOffset = src.parsedToken.extensionOffset; | |
1209 exLen = src.parsedToken.extensionLen; | |
1210 prefixOffset = src.parsedToken.prefixOffset; | |
1211 prefixLen = src.parsedToken.prefixLen; | |
1212 specs = src.parsedToken.flags; | |
1213 | |
1214 startOfRules = FALSE; | |
1215 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); | |
1216 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); | |
1217 | |
1218 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status); | |
1219 | |
1220 currCE = ucol_getNextCE(coll, c, status); | |
1221 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) { | |
1222 log_verbose("Thai prevowel detected. Will pick next CE\n"); | |
1223 currCE = ucol_getNextCE(coll, c, status); | |
1224 } | |
1225 | |
1226 currContCE = ucol_getNextCE(coll, c, status); | |
1227 if(!isContinuation(currContCE)) { | |
1228 currContCE = 0; | |
1229 } | |
1230 | |
1231 /* we need to repack CEs here */ | |
1232 | |
1233 if(strength == UCOL_TOK_RESET) { | |
1234 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); | |
1235 if(top_ == TRUE) { | |
1236 int32_t tokenIndex = src.parsedToken.indirectIndex; | |
1237 | |
1238 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex
].startCE; | |
1239 nextContCE = baseContCE = currContCE = ucolIndirectBoundarie
s[tokenIndex].startContCE; | |
1240 } else { | |
1241 nextCE = baseCE = currCE; | |
1242 nextContCE = baseContCE = currContCE; | |
1243 } | |
1244 maxStrength = UCOL_IDENTICAL; | |
1245 } else { | |
1246 if(strength < maxStrength) { | |
1247 maxStrength = strength; | |
1248 if(baseCE == UCOL_RESET_TOP_VALUE) { | |
1249 log_verbose("Resetting to [top]\n"); | |
1250 nextCE = UCOL_NEXT_TOP_VALUE; | |
1251 nextContCE = UCOL_NEXT_TOP_CONT; | |
1252 } else { | |
1253 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, b
aseContCE, &nextCE, &nextContCE, maxStrength); | |
1254 } | |
1255 if(result < 0) { | |
1256 if(ucol_isTailored(coll, *(src.source+oldOffset), status
)) { | |
1257 log_verbose("Reset is tailored codepoint %04X, don't
know how to continue, taking next test\n", *(src.source+oldOffset)); | |
1258 return; | |
1259 } else { | |
1260 log_err("%s: couldn't find the CE\n", colLoc); | |
1261 return; | |
1262 } | |
1263 } | |
1264 } | |
1265 | |
1266 currCE &= 0xFFFFFF3F; | |
1267 currContCE &= 0xFFFFFFBF; | |
1268 | |
1269 if(maxStrength == UCOL_IDENTICAL) { | |
1270 if(baseCE != currCE || baseContCE != currContCE) { | |
1271 log_err("%s: current CE (initial strength UCOL_EQUAL)\n
", colLoc); | |
1272 } | |
1273 } else { | |
1274 if(strength == UCOL_IDENTICAL) { | |
1275 if(lastCE != currCE || lastContCE != currContCE) { | |
1276 log_err("%s: current CE (initial strength UCOL_EQUA
L)\n", colLoc); | |
1277 } | |
1278 } else { | |
1279 if(compareCEs(currCE, currContCE, nextCE, nextContCE) >
0) { | |
1280 /*if(currCE > nextCE || (currCE == nextCE && currCon
tCE >= nextContCE)) {*/ | |
1281 log_err("%s: current CE is not less than base CE\n",
colLoc); | |
1282 } | |
1283 if(!before) { | |
1284 if(compareCEs(currCE, currContCE, lastCE, lastContCE
) < 0) { | |
1285 /*if(currCE < lastCE || (currCE == lastCE && cur
rContCE <= lastContCE)) {*/ | |
1286 log_err("%s: sequence of generated CEs is broken
\n", colLoc); | |
1287 } | |
1288 } else { | |
1289 before = FALSE; | |
1290 if(compareCEs(currCE, currContCE, lastCE, lastContCE
) > 0) { | |
1291 /*if(currCE < lastCE || (currCE == lastCE && cur
rContCE <= lastContCE)) {*/ | |
1292 log_err("%s: sequence of generated CEs is broken
\n", colLoc); | |
1293 } | |
1294 } | |
1295 } | |
1296 } | |
1297 | |
1298 } | |
1299 | |
1300 oldOffset = chOffset; | |
1301 lastCE = currCE & 0xFFFFFF3F; | |
1302 lastContCE = currContCE & 0xFFFFFFBF; | |
1303 } | |
1304 uprv_free(src.source); | |
1305 uprv_free(src.reorderCodes); | |
1306 } | |
1307 ucol_close(UCA); | |
1308 uprv_delete_collIterate(c); | |
1309 } | |
1310 | |
1311 #if 0 | |
1312 /* these locales are now picked from index RB */ | |
1313 static const char* localesToTest[] = { | |
1314 "ar", "bg", "ca", "cs", "da", | |
1315 "el", "en_BE", "en_US_POSIX", | |
1316 "es", "et", "fi", "fr", "hi", | |
1317 "hr", "hu", "is", "iw", "ja", | |
1318 "ko", "lt", "lv", "mk", "mt", | |
1319 "nb", "nn", "nn_NO", "pl", "ro", | |
1320 "ru", "sh", "sk", "sl", "sq", | |
1321 "sr", "sv", "th", "tr", "uk", | |
1322 "vi", "zh", "zh_TW" | |
1323 }; | |
1324 #endif | |
1325 | |
1326 static const char* rulesToTest[] = { | |
1327 /* Funky fa rule */ | |
1328 "&\\u0622 < \\u0627 << \\u0671 < \\u0621", | |
1329 /*"& Z < p, P",*/ | |
1330 /* Cui Mins rules */ | |
1331 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ | |
1332 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ | |
1333 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ | |
1334 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F
<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ | |
1335 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,
F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ | |
1336 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F
<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ | |
1337 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,
F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ | |
1338 }; | |
1339 | |
1340 | |
1341 static void TestCollations(void) { | |
1342 int32_t noOfLoc = uloc_countAvailable(); | |
1343 int32_t i = 0, j = 0; | |
1344 | |
1345 UErrorCode status = U_ZERO_ERROR; | |
1346 char cName[256]; | |
1347 UChar name[256]; | |
1348 int32_t nameSize; | |
1349 | |
1350 | |
1351 const char *locName = NULL; | |
1352 UCollator *coll = NULL; | |
1353 UCollator *UCA = ucol_open("", &status); | |
1354 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &stat
us); | |
1355 if (U_FAILURE(status)) { | |
1356 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(s
tatus)); | |
1357 return; | |
1358 } | |
1359 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); | |
1360 | |
1361 for(i = 0; i<noOfLoc; i++) { | |
1362 status = U_ZERO_ERROR; | |
1363 locName = uloc_getAvailable(i); | |
1364 if(uprv_strcmp("ja", locName) == 0) { | |
1365 log_verbose("Don't know how to test prefixes\n"); | |
1366 continue; | |
1367 } | |
1368 if(hasCollationElements(locName)) { | |
1369 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); | |
1370 for(j = 0; j<nameSize; j++) { | |
1371 cName[j] = (char)name[j]; | |
1372 } | |
1373 cName[nameSize] = 0; | |
1374 log_verbose("\nTesting locale %s (%s)\n", locName, cName); | |
1375 coll = ucol_open(locName, &status); | |
1376 if(U_SUCCESS(status)) { | |
1377 testAgainstUCA(coll, UCA, "UCA", FALSE, &status); | |
1378 ucol_close(coll); | |
1379 } else { | |
1380 log_err("Couldn't instantiate collator for locale %s, error: %s\
n", locName, u_errorName(status)); | |
1381 status = U_ZERO_ERROR; | |
1382 } | |
1383 } | |
1384 } | |
1385 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); | |
1386 ucol_close(UCA); | |
1387 } | |
1388 | |
1389 static void RamsRulesTest(void) { | |
1390 UErrorCode status = U_ZERO_ERROR; | |
1391 int32_t i = 0; | |
1392 UCollator *coll = NULL; | |
1393 UChar rule[2048]; | |
1394 uint32_t ruleLen; | |
1395 int32_t noOfLoc = uloc_countAvailable(); | |
1396 const char *locName = NULL; | |
1397 | |
1398 log_verbose("RamsRulesTest\n"); | |
1399 | |
1400 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_get
Default())==0) { | |
1401 /* This test will fail if the default locale is "km" or "km_KH". Enable
after trac#6040. */ | |
1402 return; | |
1403 } | |
1404 | |
1405 for(i = 0; i<noOfLoc; i++) { | |
1406 locName = uloc_getAvailable(i); | |
1407 if(hasCollationElements(locName)) { | |
1408 if (uprv_strcmp("ja", locName)==0) { | |
1409 log_verbose("Don't know how to test Japanese because of prefixes
\n"); | |
1410 continue; | |
1411 } | |
1412 if (uprv_strcmp("de__PHONEBOOK", locName)==0) { | |
1413 log_verbose("Don't know how to test Phonebook because the reset
is on an expanding character\n"); | |
1414 continue; | |
1415 } | |
1416 if (uprv_strcmp("bn", locName)==0 || | |
1417 uprv_strcmp("bs", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1418 uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1419 uprv_strcmp("en_US_POSIX", locName)==0 || | |
1420 uprv_strcmp("fa", locName)==0 || /* Add in #10222 wit
h CLDR 24 integration */ | |
1421 uprv_strcmp("fa_AF", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1422 uprv_strcmp("gl", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1423 uprv_strcmp("gl_ES", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1424 uprv_strcmp("he", locName)==0 || /* Add due to new ta
iloring of \u05F3 vs \u0027 per cldrbug 5576 */ | |
1425 uprv_strcmp("he_IL", locName)==0 || /* Add due to new ta
iloring of \u05F3 vs \u0027 per cldrbug 5576 */ | |
1426 uprv_strcmp("km", locName)==0 || | |
1427 uprv_strcmp("km_KH", locName)==0 || | |
1428 uprv_strcmp("my", locName)==0 || | |
1429 uprv_strcmp("ps", locName)==0 || /* Add in #10222 wit
h CLDR 24 integration */ | |
1430 uprv_strcmp("si", locName)==0 || | |
1431 uprv_strcmp("si_LK", locName)==0 || | |
1432 uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import
per cldrbug 5647 */ | |
1433 uprv_strcmp("th", locName)==0 || | |
1434 uprv_strcmp("th_TH", locName)==0 || | |
1435 uprv_strcmp("zh", locName)==0 || | |
1436 uprv_strcmp("zh_Hant", locName)==0 | |
1437 ) { | |
1438 if(log_knownIssue("6040", NULL)) { | |
1439 log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable
RamsRulesTest for this locale.\n", locName); | |
1440 continue; | |
1441 } | |
1442 } | |
1443 log_verbose("Testing locale %s\n", locName); | |
1444 status = U_ZERO_ERROR; | |
1445 coll = ucol_open(locName, &status); | |
1446 if(U_SUCCESS(status)) { | |
1447 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLB
ACK_WARNING)) { | |
1448 if(coll->image->jamoSpecial == TRUE) { | |
1449 log_err("%s has special JAMOs\n", locName); | |
1450 } | |
1451 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); | |
1452 testCollator(coll, &status); | |
1453 testCEs(coll, &status); | |
1454 } else { | |
1455 log_verbose("Skipping %s: %s\n", locName, u_errorName(status)); | |
1456 } | |
1457 ucol_close(coll); | |
1458 } else { | |
1459 log_err("Could not open %s: %s\n", locName, u_errorName(status)); | |
1460 } | |
1461 } | |
1462 } | |
1463 | |
1464 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { | |
1465 log_verbose("Testing rule: %s\n", rulesToTest[i]); | |
1466 ruleLen = u_unescape(rulesToTest[i], rule, 2048); | |
1467 status = U_ZERO_ERROR; | |
1468 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat
us); | |
1469 if(U_SUCCESS(status)) { | |
1470 testCollator(coll, &status); | |
1471 testCEs(coll, &status); | |
1472 ucol_close(coll); | |
1473 } else { | |
1474 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(
status), rulesToTest[i]); | |
1475 } | |
1476 } | |
1477 | |
1478 } | |
1479 | |
1480 static void IsTailoredTest(void) { | |
1481 UErrorCode status = U_ZERO_ERROR; | |
1482 uint32_t i = 0; | |
1483 UCollator *coll = NULL; | |
1484 UChar rule[2048]; | |
1485 UChar tailored[2048]; | |
1486 UChar notTailored[2048]; | |
1487 uint32_t ruleLen, tailoredLen, notTailoredLen; | |
1488 | |
1489 log_verbose("IsTailoredTest\n"); | |
1490 | |
1491 u_uastrcpy(rule, "&Z < A, B, C;c < d"); | |
1492 ruleLen = u_strlen(rule); | |
1493 | |
1494 u_uastrcpy(tailored, "ABCcd"); | |
1495 tailoredLen = u_strlen(tailored); | |
1496 | |
1497 u_uastrcpy(notTailored, "ZabD"); | |
1498 notTailoredLen = u_strlen(notTailored); | |
1499 | |
1500 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); | |
1501 if(U_SUCCESS(status)) { | |
1502 for(i = 0; i<tailoredLen; i++) { | |
1503 if(!ucol_isTailored(coll, tailored[i], &status)) { | |
1504 log_err("%i: %04X should be tailored - it is reported as not\n",
i, tailored[i]); | |
1505 } | |
1506 } | |
1507 for(i = 0; i<notTailoredLen; i++) { | |
1508 if(ucol_isTailored(coll, notTailored[i], &status)) { | |
1509 log_err("%i: %04X should not be tailored - it is reported as it
is\n", i, notTailored[i]); | |
1510 } | |
1511 } | |
1512 ucol_close(coll); | |
1513 } | |
1514 else { | |
1515 log_err_status(status, "Can't tailor rules\n"); | |
1516 } | |
1517 /* Code coverage */ | |
1518 status = U_ZERO_ERROR; | |
1519 coll = ucol_open("ja", &status); | |
1520 if(!ucol_isTailored(coll, 0x4E9C, &status)) { | |
1521 log_err_status(status, "0x4E9C should be tailored - it is reported as no
t\n"); | |
1522 } | |
1523 ucol_close(coll); | |
1524 } | |
1525 | |
1526 | |
1527 const static char chTest[][20] = { | 415 const static char chTest[][20] = { |
1528 "c", | 416 "c", |
1529 "C", | 417 "C", |
1530 "ca", "cb", "cx", "cy", "CZ", | 418 "ca", "cb", "cx", "cy", "CZ", |
1531 "c\\u030C", "C\\u030C", | 419 "c\\u030C", "C\\u030C", |
1532 "h", | 420 "h", |
1533 "H", | 421 "H", |
1534 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", | 422 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", |
1535 "ch", "cH", "Ch", "CH", | 423 "ch", "cH", "Ch", "CH", |
1536 "cha", "charly", "che", "chh", "chch", "chr", | 424 "cha", "charly", "che", "chh", "chch", "chr", |
(...skipping 29 matching lines...) Expand all Loading... |
1566 } | 454 } |
1567 else { | 455 else { |
1568 log_data_err("Can't open collator"); | 456 log_data_err("Can't open collator"); |
1569 } | 457 } |
1570 ucol_close(coll); | 458 ucol_close(coll); |
1571 } | 459 } |
1572 | 460 |
1573 | 461 |
1574 | 462 |
1575 | 463 |
| 464 /* |
1576 const static char impTest[][20] = { | 465 const static char impTest[][20] = { |
1577 "\\u4e00", | 466 "\\u4e00", |
1578 "a", | 467 "a", |
1579 "A", | 468 "A", |
1580 "b", | 469 "b", |
1581 "B", | 470 "B", |
1582 "\\u4e01" | 471 "\\u4e01" |
1583 }; | 472 }; |
| 473 */ |
1584 | 474 |
1585 | 475 |
1586 static void TestImplicitTailoring(void) { | 476 static void TestImplicitTailoring(void) { |
1587 static const struct { | 477 static const struct { |
1588 const char *rules; | 478 const char *rules; |
1589 const char *data[10]; | 479 const char *data[10]; |
1590 const uint32_t len; | 480 const uint32_t len; |
1591 } tests[] = { | 481 } tests[] = { |
1592 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b"
, "c", "\\u4e00"}, 5 }, | 482 { |
| 483 /* Tailor b and c before U+4E00. */ |
| 484 "&[before 1]\\u4e00 < b < c " |
| 485 /* Now, before U+4E00 is c; put d and e after that. */ |
| 486 "&[before 1]\\u4e00 < d < e", |
| 487 { "b", "c", "d", "e", "\\u4e00"}, 5 }, |
1593 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4
e01"}, 6 }, | 488 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4
e01"}, 6 }, |
1594 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e
00"}, 3}, | 489 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e
00"}, 3}, |
1595 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e
01"}, 3} | 490 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e
01"}, 3} |
1596 }; | 491 }; |
1597 | 492 |
1598 int32_t i = 0; | 493 int32_t i = 0; |
1599 | 494 |
1600 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { | 495 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { |
1601 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | 496 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); |
1602 } | 497 } |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1927 "af", | 822 "af", |
1928 "Af", | 823 "Af", |
1929 "b", | 824 "b", |
1930 "B" | 825 "B" |
1931 }; | 826 }; |
1932 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); | 827 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); |
1933 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat
a)/sizeof(data[0])); | 828 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat
a)/sizeof(data[0])); |
1934 } | 829 } |
1935 | 830 |
1936 | 831 |
1937 /* | |
1938 "& a < b < c < d& r < c", "& a < b < d& r < c"
, | |
1939 "& a < b < c < d& c < m", "& a < b < c < m < d
", | |
1940 "& a < b < c < d& a < m", "& a < m < b < c < d
", | |
1941 "& a <<< b << c < d& a < m", "& a <<< b << c < m
< d", | |
1942 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d
", | |
1943 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <
<< x <<< e", | |
1944 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x
<< d <<< e", | |
1945 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c
<< d <<< e", | |
1946 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <
<< e <<< f < x < g", | |
1947 */ | |
1948 static void TestRedundantRules(void) { | |
1949 int32_t i; | |
1950 | |
1951 static const struct { | |
1952 const char *rules; | |
1953 const char *expectedRules; | |
1954 const char *testdata[8]; | |
1955 uint32_t testdatalen; | |
1956 } tests[] = { | |
1957 /* this test conflicts with positioning of CODAN placeholder */ | |
1958 /*{ | |
1959 "& a <<< b <<< c << d <<< e& [before 1] e <<< x", | |
1960 "&\\u2089<<<x", | |
1961 {"\\u2089", "x"}, 2 | |
1962 }, */ | |
1963 /* this test conflicts with the [before x] syntax tightening */ | |
1964 /*{ | |
1965 "& b <<< c <<< d << e <<< f& [before 1] f <<< x", | |
1966 "&\\u0252<<<x", | |
1967 {"\\u0252", "x"}, 2 | |
1968 }, */ | |
1969 /* this test conflicts with the [before x] syntax tightening */ | |
1970 /*{ | |
1971 "& a < b <<< c << d <<< e& [before 1] e <<< x", | |
1972 "& a <<< x < b <<< c << d <<< e", | |
1973 {"a", "x", "b", "c", "d", "e"}, 6 | |
1974 }, */ | |
1975 { | |
1976 "& a < b < c < d& [before 1] c < m", | |
1977 "& a < b < m < c < d", | |
1978 {"a", "b", "m", "c", "d"}, 5 | |
1979 }, | |
1980 { | |
1981 "& a < b <<< c << d <<< e& [before 3] e <<< x", | |
1982 "& a < b <<< c << d <<< x <<< e", | |
1983 {"a", "b", "c", "d", "x", "e"}, 6 | |
1984 }, | |
1985 /* this test conflicts with the [before x] syntax tightening */ | |
1986 /* { | |
1987 "& a < b <<< c << d <<< e& [before 2] e <<< x", | |
1988 "& a < b <<< c <<< x << d <<< e", | |
1989 {"a", "b", "c", "x", "d", "e"},, 6 | |
1990 }, */ | |
1991 { | |
1992 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", | |
1993 "& a < b <<< c << d <<< e <<< f < x < g", | |
1994 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 | |
1995 }, | |
1996 { | |
1997 "& a <<< b << c < d& a < m", | |
1998 "& a <<< b << c < m < d", | |
1999 {"a", "b", "c", "m", "d"}, 5 | |
2000 }, | |
2001 { | |
2002 "&a<b<<b\\u0301 &z<b", | |
2003 "&a<b\\u0301 &z<b", | |
2004 {"a", "b\\u0301", "z", "b"}, 4 | |
2005 }, | |
2006 { | |
2007 "&z<m<<<q<<<m", | |
2008 "&z<q<<<m", | |
2009 {"z", "q", "m"},3 | |
2010 }, | |
2011 { | |
2012 "&z<<<m<q<<<m", | |
2013 "&z<q<<<m", | |
2014 {"z", "q", "m"}, 3 | |
2015 }, | |
2016 { | |
2017 "& a < b < c < d& r < c", | |
2018 "& a < b < d& r < c", | |
2019 {"a", "b", "d"}, 3 | |
2020 }, | |
2021 { | |
2022 "& a < b < c < d& r < c", | |
2023 "& a < b < d& r < c", | |
2024 {"r", "c"}, 2 | |
2025 }, | |
2026 { | |
2027 "& a < b < c < d& c < m", | |
2028 "& a < b < c < m < d", | |
2029 {"a", "b", "c", "m", "d"}, 5 | |
2030 }, | |
2031 { | |
2032 "& a < b < c < d& a < m", | |
2033 "& a < m < b < c < d", | |
2034 {"a", "m", "b", "c", "d"}, 5 | |
2035 } | |
2036 }; | |
2037 | |
2038 | |
2039 UCollator *credundant = NULL; | |
2040 UCollator *cresulting = NULL; | |
2041 UErrorCode status = U_ZERO_ERROR; | |
2042 UChar rlz[2048] = { 0 }; | |
2043 uint32_t rlen = 0; | |
2044 | |
2045 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { | |
2046 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i]
.expectedRules); | |
2047 rlen = u_unescape(tests[i].rules, rlz, 2048); | |
2048 | |
2049 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta
tus); | |
2050 if(status == U_FILE_ACCESS_ERROR) { | |
2051 log_data_err("Is your data around?\n"); | |
2052 return; | |
2053 } else if(U_FAILURE(status)) { | |
2054 log_err("Error opening collator\n"); | |
2055 return; | |
2056 } | |
2057 | |
2058 rlen = u_unescape(tests[i].expectedRules, rlz, 2048); | |
2059 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta
tus); | |
2060 | |
2061 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); | |
2062 | |
2063 ucol_close(credundant); | |
2064 ucol_close(cresulting); | |
2065 | |
2066 log_verbose("testing using data\n"); | |
2067 | |
2068 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen)
; | |
2069 } | |
2070 | |
2071 } | |
2072 | |
2073 static void TestExpansionSyntax(void) { | |
2074 int32_t i; | |
2075 | |
2076 const static char *rules[] = { | |
2077 "&AE <<< a << b <<< c &d <<< f", | |
2078 "&AE <<< a <<< b << c << d < e < f <<< g", | |
2079 "&AE <<< B <<< C / D <<< F" | |
2080 }; | |
2081 | |
2082 const static char *expectedRules[] = { | |
2083 "&A <<< a / E << b / E <<< c /E &d <<< f", | |
2084 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", | |
2085 "&A <<< B / E <<< C / ED <<< F / E" | |
2086 }; | |
2087 | |
2088 const static char *testdata[][8] = { | |
2089 {"AE", "a", "b", "c"}, | |
2090 {"AE", "a", "b", "c", "d", "e", "f", "g"}, | |
2091 {"AE", "B", "C"} /* / ED <<< F / E"},*/ | |
2092 }; | |
2093 | |
2094 const static uint32_t testdatalen[] = { | |
2095 4, | |
2096 8, | |
2097 3 | |
2098 }; | |
2099 | |
2100 | |
2101 | |
2102 UCollator *credundant = NULL; | |
2103 UCollator *cresulting = NULL; | |
2104 UErrorCode status = U_ZERO_ERROR; | |
2105 UChar rlz[2048] = { 0 }; | |
2106 uint32_t rlen = 0; | |
2107 | |
2108 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { | |
2109 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[
i]); | |
2110 rlen = u_unescape(rules[i], rlz, 2048); | |
2111 | |
2112 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st
atus); | |
2113 if(status == U_FILE_ACCESS_ERROR) { | |
2114 log_data_err("Is your data around?\n"); | |
2115 return; | |
2116 } else if(U_FAILURE(status)) { | |
2117 log_err("Error opening collator\n"); | |
2118 return; | |
2119 } | |
2120 rlen = u_unescape(expectedRules[i], rlz, 2048); | |
2121 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta
tus); | |
2122 | |
2123 /* testAgainstUCA still doesn't handle expansions correctly, so this is not
run */ | |
2124 /* as a hard error test, but only in information mode */ | |
2125 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); | |
2126 | |
2127 ucol_close(credundant); | |
2128 ucol_close(cresulting); | |
2129 | |
2130 log_verbose("testing using data\n"); | |
2131 | |
2132 genericRulesStarter(rules[i], testdata[i], testdatalen[i]); | |
2133 } | |
2134 } | |
2135 | |
2136 static void TestCase(void) | 832 static void TestCase(void) |
2137 { | 833 { |
2138 const static UChar gRules[MAX_TOKEN_LEN] = | 834 const static UChar gRules[MAX_TOKEN_LEN] = |
2139 /*" & 0 < 1,\u2461<a,A"*/ | 835 /*" & 0 < 1,\u2461<a,A"*/ |
2140 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x
0041, 0x0000 }; | 836 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x
0041, 0x0000 }; |
2141 | 837 |
2142 const static UChar testCase[][MAX_TOKEN_LEN] = | 838 const static UChar testCase[][MAX_TOKEN_LEN] = |
2143 { | 839 { |
2144 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, | 840 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, |
2145 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, | 841 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2239 "CHa", | 935 "CHa", |
2240 "ChA", | 936 "ChA", |
2241 "Cha", | 937 "Cha", |
2242 "chA", | 938 "chA", |
2243 "cha", | 939 "cha", |
2244 "I", | 940 "I", |
2245 "i" | 941 "i" |
2246 }; | 942 }; |
2247 log_verbose("mixed case test\n"); | 943 log_verbose("mixed case test\n"); |
2248 log_verbose("lower first, case level off\n"); | 944 log_verbose("lower first, case level off\n"); |
2249 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof
(lowerFirst)/sizeof(lowerFirst[0])); | 945 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof
(lowerFirst)/sizeof(lowerFirst[0])); |
2250 log_verbose("upper first, case level off\n"); | 946 log_verbose("upper first, case level off\n"); |
2251 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof
(upperFirst)/sizeof(upperFirst[0])); | 947 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof
(upperFirst)/sizeof(upperFirst[0])); |
2252 log_verbose("lower first, case level on\n"); | 948 log_verbose("lower first, case level on\n"); |
2253 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowe
rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); | 949 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowe
rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); |
2254 log_verbose("upper first, case level on\n"); | 950 log_verbose("upper first, case level on\n"); |
2255 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", uppe
rFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); | 951 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", uppe
rFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); |
2256 } | 952 } |
2257 | 953 |
2258 } | 954 } |
2259 | 955 |
2260 static void TestIncrementalNormalize(void) { | 956 static void TestIncrementalNormalize(void) { |
2261 | 957 |
2262 /*UChar baseA =0x61;*/ | 958 /*UChar baseA =0x61;*/ |
2263 UChar baseA =0x41; | 959 UChar baseA =0x41; |
2264 /* UChar baseB = 0x42;*/ | 960 /* UChar baseB = 0x42;*/ |
2265 static const UChar ccMix[] = {0x316, 0x321, 0x300}; | 961 static const UChar ccMix[] = {0x316, 0x321, 0x300}; |
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2553 } | 1249 } |
2554 | 1250 |
2555 log_verbose("Using start of korean rules\n"); | 1251 log_verbose("Using start of korean rules\n"); |
2556 | 1252 |
2557 if(U_SUCCESS(status)) { | 1253 if(U_SUCCESS(status)) { |
2558 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0
])); | 1254 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0
])); |
2559 } else { | 1255 } else { |
2560 log_err("Unable to open collator with rules %s\n", rules); | 1256 log_err("Unable to open collator with rules %s\n", rules); |
2561 } | 1257 } |
2562 | 1258 |
2563 log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); | |
2564 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home
*/ | |
2565 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])
); | |
2566 | |
2567 ucol_close(coll); | 1259 ucol_close(coll); |
2568 | 1260 |
2569 log_verbose("Using ko__LOTUS locale\n"); | 1261 log_verbose("Using ko__LOTUS locale\n"); |
2570 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean
Data[0])); | 1262 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean
Data[0])); |
2571 } | 1263 } |
2572 | 1264 |
| 1265 /* |
| 1266 * The secondary/tertiary compression middle byte |
| 1267 * as used by the current implementation. |
| 1268 * Subject to change as the sort key compression changes. |
| 1269 * See class CollationKeys. |
| 1270 */ |
| 1271 enum { |
| 1272 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */ |
| 1273 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */ |
| 1274 }; |
| 1275 |
2573 static void TestCompressOverlap(void) { | 1276 static void TestCompressOverlap(void) { |
2574 UChar secstr[150]; | 1277 UChar secstr[150]; |
2575 UChar tertstr[150]; | 1278 UChar tertstr[150]; |
2576 UErrorCode status = U_ZERO_ERROR; | 1279 UErrorCode status = U_ZERO_ERROR; |
2577 UCollator *coll; | 1280 UCollator *coll; |
2578 char result[200]; | 1281 uint8_t result[500]; |
2579 uint32_t resultlen; | 1282 uint32_t resultlen; |
2580 int count = 0; | 1283 int count = 0; |
2581 char *tempptr; | 1284 uint8_t *tempptr; |
2582 | 1285 |
2583 coll = ucol_open("", &status); | 1286 coll = ucol_open("", &status); |
2584 | 1287 |
2585 if (U_FAILURE(status)) { | 1288 if (U_FAILURE(status)) { |
2586 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(
status)); | 1289 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(
status)); |
2587 return; | 1290 return; |
2588 } | 1291 } |
2589 while (count < 149) { | 1292 while (count < 149) { |
2590 secstr[count] = 0x0020; /* [06, 05, 05] */ | 1293 secstr[count] = 0x0020; /* [06, 05, 05] */ |
2591 tertstr[count] = 0x0020; | 1294 tertstr[count] = 0x0020; |
2592 count ++; | 1295 count ++; |
2593 } | 1296 } |
2594 | 1297 |
2595 /* top down compression ----------------------------------- */ | 1298 /* top down compression ----------------------------------- */ |
2596 secstr[count] = 0x0332; /* [, 87, 05] */ | 1299 secstr[count] = 0x0332; /* [, 87, 05] */ |
2597 tertstr[count] = 0x3000; /* [06, 05, 07] */ | 1300 tertstr[count] = 0x3000; /* [06, 05, 07] */ |
2598 | 1301 |
2599 /* no compression secstr should have 150 secondary bytes, tertstr should | 1302 /* no compression secstr should have 150 secondary bytes, tertstr should |
2600 have 150 tertiary bytes. | 1303 have 150 tertiary bytes. |
2601 with correct overlapping compression, secstr should have 4 secondary | 1304 with correct compression, secstr should have 6 secondary |
2602 bytes, tertstr should have > 2 tertiary bytes */ | 1305 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes *
/ |
2603 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); | 1306 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); |
2604 (void)resultlen; /* Suppress set but not used warning. */ | 1307 (void)resultlen; /* Suppress set but not used warning. */ |
2605 tempptr = uprv_strchr(result, 1) + 1; | 1308 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; |
2606 while (*(tempptr + 1) != 1) { | 1309 while (*(tempptr + 1) != 1) { |
2607 /* the last secondary collation element is not checked since it is not | 1310 /* the last secondary collation element is not checked since it is not |
2608 part of the compression */ | 1311 part of the compression */ |
2609 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { | 1312 if (*tempptr < SEC_COMMON_MIDDLE) { |
2610 log_err("Secondary compression overlapped\n"); | 1313 log_err("Secondary top down compression overlapped\n"); |
2611 } | 1314 } |
2612 tempptr ++; | 1315 tempptr ++; |
2613 } | 1316 } |
2614 | 1317 |
2615 /* tertiary top/bottom/common for en_US is similar to the secondary | 1318 /* tertiary top/bottom/common for en_US is similar to the secondary |
2616 top/bottom/common */ | 1319 top/bottom/common */ |
2617 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); | 1320 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); |
2618 tempptr = uprv_strrchr(result, 1) + 1; | 1321 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; |
2619 while (*(tempptr + 1) != 0) { | 1322 while (*(tempptr + 1) != 0) { |
2620 /* the last secondary collation element is not checked since it is not | 1323 /* the last secondary collation element is not checked since it is not |
2621 part of the compression */ | 1324 part of the compression */ |
2622 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { | 1325 if (*tempptr < TER_ONLY_COMMON_MIDDLE) { |
2623 log_err("Tertiary compression overlapped\n"); | 1326 log_err("Tertiary top down compression overlapped\n"); |
2624 } | 1327 } |
2625 tempptr ++; | 1328 tempptr ++; |
2626 } | 1329 } |
2627 | 1330 |
2628 /* bottom up compression ------------------------------------- */ | 1331 /* bottom up compression ------------------------------------- */ |
2629 secstr[count] = 0; | 1332 secstr[count] = 0; |
2630 tertstr[count] = 0; | 1333 tertstr[count] = 0; |
2631 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); | 1334 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); |
2632 tempptr = uprv_strchr(result, 1) + 1; | 1335 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; |
2633 while (*(tempptr + 1) != 1) { | 1336 while (*(tempptr + 1) != 1) { |
2634 /* the last secondary collation element is not checked since it is not | 1337 /* the last secondary collation element is not checked since it is not |
2635 part of the compression */ | 1338 part of the compression */ |
2636 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { | 1339 if (*tempptr > SEC_COMMON_MIDDLE) { |
2637 log_err("Secondary compression overlapped\n"); | 1340 log_err("Secondary bottom up compression overlapped\n"); |
2638 } | 1341 } |
2639 tempptr ++; | 1342 tempptr ++; |
2640 } | 1343 } |
2641 | 1344 |
2642 /* tertiary top/bottom/common for en_US is similar to the secondary | 1345 /* tertiary top/bottom/common for en_US is similar to the secondary |
2643 top/bottom/common */ | 1346 top/bottom/common */ |
2644 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); | 1347 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); |
2645 tempptr = uprv_strrchr(result, 1) + 1; | 1348 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; |
2646 while (*(tempptr + 1) != 0) { | 1349 while (*(tempptr + 1) != 0) { |
2647 /* the last secondary collation element is not checked since it is not | 1350 /* the last secondary collation element is not checked since it is not |
2648 part of the compression */ | 1351 part of the compression */ |
2649 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { | 1352 if (*tempptr > TER_ONLY_COMMON_MIDDLE) { |
2650 log_err("Tertiary compression overlapped\n"); | 1353 log_err("Tertiary bottom up compression overlapped\n"); |
2651 } | 1354 } |
2652 tempptr ++; | 1355 tempptr ++; |
2653 } | 1356 } |
2654 | 1357 |
2655 ucol_close(coll); | 1358 ucol_close(coll); |
2656 } | 1359 } |
2657 | 1360 |
2658 static void TestCyrillicTailoring(void) { | 1361 static void TestCyrillicTailoring(void) { |
2659 static const char *test[] = { | 1362 static const char *test[] = { |
2660 "\\u0410b", | 1363 "\\u0410b", |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2700 const static UChar testdata[][2] = { | 1403 const static UChar testdata[][2] = { |
2701 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, | 1404 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, |
2702 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, | 1405 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, |
2703 {0x0063 /* 'c' */, 0x0068 /* 'h' */} | 1406 {0x0063 /* 'c' */, 0x0068 /* 'h' */} |
2704 }; | 1407 }; |
2705 const static UChar testdata2[][2] = { | 1408 const static UChar testdata2[][2] = { |
2706 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, | 1409 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, |
2707 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, | 1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, |
2708 {0x0063 /* 'c' */, 0x006C /* 'l' */} | 1411 {0x0063 /* 'c' */, 0x006C /* 'l' */} |
2709 }; | 1412 }; |
| 1413 #if 0 |
| 1414 /* |
| 1415 * These pairs of rule strings are not guaranteed to yield the very same map
pings. |
| 1416 * In fact, LDML 24 recommends an improved way of creating mappings |
| 1417 * which always yields different mappings for such pairs. See |
| 1418 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings |
| 1419 */ |
2710 const static char *testrules3[] = { | 1420 const static char *testrules3[] = { |
2711 "&z < xyz &xyzw << B", | 1421 "&z < xyz &xyzw << B", |
2712 "&z < xyz &xyz << B / w", | 1422 "&z < xyz &xyz << B / w", |
2713 "&z < ch &achm << B", | 1423 "&z < ch &achm << B", |
2714 "&z < ch &a << B / chm", | 1424 "&z < ch &a << B / chm", |
2715 "&\\ud800\\udc00w << B", | 1425 "&\\ud800\\udc00w << B", |
2716 "&\\ud800\\udc00 << B / w", | 1426 "&\\ud800\\udc00 << B / w", |
2717 "&a\\ud800\\udc00m << B", | 1427 "&a\\ud800\\udc00m << B", |
2718 "&a << B / \\ud800\\udc00m", | 1428 "&a << B / \\ud800\\udc00m", |
2719 }; | 1429 }; |
| 1430 #endif |
2720 | 1431 |
2721 UErrorCode status = U_ZERO_ERROR; | 1432 UErrorCode status = U_ZERO_ERROR; |
2722 UCollator *coll; | 1433 UCollator *coll; |
2723 UChar rule[256] = {0}; | 1434 UChar rule[256] = {0}; |
2724 uint32_t rlen = 0; | 1435 uint32_t rlen = 0; |
2725 int i; | 1436 int i; |
2726 | 1437 |
2727 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { | 1438 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { |
2728 UCollationElements *iter1; | 1439 UCollationElements *iter1; |
2729 int j = 0; | 1440 int j = 0; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2775 testdata2[1][1]); | 1486 testdata2[1][1]); |
2776 return; | 1487 return; |
2777 } | 1488 } |
2778 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { | 1489 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { |
2779 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", | 1490 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", |
2780 testdata2[1][0], testdata2[1][1], testdata2[2][0], | 1491 testdata2[1][0], testdata2[1][1], testdata2[2][0], |
2781 testdata2[2][1]); | 1492 testdata2[2][1]); |
2782 return; | 1493 return; |
2783 } | 1494 } |
2784 ucol_close(coll); | 1495 ucol_close(coll); |
2785 | 1496 #if 0 /* see above */ |
2786 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { | 1497 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { |
| 1498 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], t
estrules3[i + 1]); |
2787 UCollator *coll1, | 1499 UCollator *coll1, |
2788 *coll2; | 1500 *coll2; |
2789 UCollationElements *iter1, | 1501 UCollationElements *iter1, |
2790 *iter2; | 1502 *iter2; |
2791 UChar ch = 0x0042 /* 'B' */; | 1503 UChar ch = 0x0042 /* 'B' */; |
2792 uint32_t ce; | 1504 uint32_t ce; |
2793 rlen = u_unescape(testrules3[i], rule, 32); | 1505 rlen = u_unescape(testrules3[i], rule, 32); |
2794 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; | 1506 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; |
2795 rlen = u_unescape(testrules3[i + 1], rule, 32); | 1507 rlen = u_unescape(testrules3[i + 1], rule, 32); |
2796 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; | 1508 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; |
2797 if (U_FAILURE(status)) { | 1509 if (U_FAILURE(status)) { |
2798 log_err("Collator creation failed %s\n", testrules[i]); | 1510 log_err("Collator creation failed %s\n", testrules[i]); |
2799 return; | 1511 return; |
2800 } | 1512 } |
2801 iter1 = ucol_openElements(coll1, &ch, 1, &status); | 1513 iter1 = ucol_openElements(coll1, &ch, 1, &status); |
2802 iter2 = ucol_openElements(coll2, &ch, 1, &status); | 1514 iter2 = ucol_openElements(coll2, &ch, 1, &status); |
2803 if (U_FAILURE(status)) { | 1515 if (U_FAILURE(status)) { |
2804 log_err("Collation iterator creation failed\n"); | 1516 log_err("Collation iterator creation failed\n"); |
2805 return; | 1517 return; |
2806 } | 1518 } |
2807 ce = ucol_next(iter1, &status); | 1519 ce = ucol_next(iter1, &status); |
2808 if (U_FAILURE(status)) { | 1520 if (U_FAILURE(status)) { |
2809 log_err("Retrieving ces failed\n"); | 1521 log_err("Retrieving ces failed\n"); |
2810 return; | 1522 return; |
2811 } | 1523 } |
2812 while (ce != UCOL_NULLORDER) { | 1524 while (ce != UCOL_NULLORDER) { |
2813 if (ce != (uint32_t)ucol_next(iter2, &status)) { | 1525 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status); |
2814 log_err("CEs does not match\n"); | 1526 if (ce == ce2) { |
| 1527 log_verbose("CEs match: %08x\n", ce); |
| 1528 } else { |
| 1529 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2); |
2815 return; | 1530 return; |
2816 } | 1531 } |
2817 ce = ucol_next(iter1, &status); | 1532 ce = ucol_next(iter1, &status); |
2818 if (U_FAILURE(status)) { | 1533 if (U_FAILURE(status)) { |
2819 log_err("Retrieving ces failed\n"); | 1534 log_err("Retrieving ces failed\n"); |
2820 return; | 1535 return; |
2821 } | 1536 } |
2822 } | 1537 } |
2823 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { | 1538 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { |
2824 log_err("CEs not exhausted\n"); | 1539 log_err("CEs not exhausted\n"); |
2825 return; | 1540 return; |
2826 } | 1541 } |
2827 ucol_closeElements(iter1); | 1542 ucol_closeElements(iter1); |
2828 ucol_closeElements(iter2); | 1543 ucol_closeElements(iter2); |
2829 ucol_close(coll1); | 1544 ucol_close(coll1); |
2830 ucol_close(coll2); | 1545 ucol_close(coll2); |
2831 } | 1546 } |
| 1547 #endif |
2832 } | 1548 } |
2833 | 1549 |
2834 static void TestExpansion(void) { | 1550 static void TestExpansion(void) { |
2835 const static char *testrules[] = { | 1551 const static char *testrules[] = { |
| 1552 #if 0 |
| 1553 /* |
| 1554 * This seems to have tested that M was not mapped to an expansion. |
| 1555 * I believe the old builder just did that because it computed the exten
sion CEs |
| 1556 * at the very end, which was a bug. |
| 1557 * Among other problems, it violated the core tailoring principle |
| 1558 * by making an earlier rule depend on a later one. |
| 1559 * And, of course, if M did not get an expansion, then it was primary di
fferent from K, |
| 1560 * unlike what the rule &K<<M says. |
| 1561 */ |
2836 "&J << K / B & K << M", | 1562 "&J << K / B & K << M", |
| 1563 #endif |
2837 "&J << K / B << M" | 1564 "&J << K / B << M" |
2838 }; | 1565 }; |
2839 const static UChar testdata[][3] = { | 1566 const static UChar testdata[][3] = { |
2840 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, | 1567 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, |
2841 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, | 1568 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, |
2842 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, | 1569 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, |
2843 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, | 1570 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, |
2844 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, | 1571 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, |
2845 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} | 1572 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} |
2846 }; | 1573 }; |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2976 (void)klen; /* Suppress set but not used warning. */ | 1703 (void)klen; /* Suppress set but not used warning. */ |
2977 | 1704 |
2978 ucol_close(coll); | 1705 ucol_close(coll); |
2979 } else { | 1706 } else { |
2980 log_data_err("Couldn't open UCA\n"); | 1707 log_data_err("Couldn't open UCA\n"); |
2981 } | 1708 } |
2982 } | 1709 } |
2983 | 1710 |
2984 static void TestVariableTopSetting(void) { | 1711 static void TestVariableTopSetting(void) { |
2985 UErrorCode status = U_ZERO_ERROR; | 1712 UErrorCode status = U_ZERO_ERROR; |
2986 const UChar *current = NULL; | |
2987 uint32_t varTopOriginal = 0, varTop1, varTop2; | 1713 uint32_t varTopOriginal = 0, varTop1, varTop2; |
2988 UCollator *coll = ucol_open("", &status); | 1714 UCollator *coll = ucol_open("", &status); |
2989 if(U_SUCCESS(status)) { | 1715 if(U_SUCCESS(status)) { |
2990 | 1716 |
2991 uint32_t strength = 0; | 1717 static const UChar nul = 0; |
2992 uint16_t specs = 0; | 1718 static const UChar space = 0x20; |
2993 uint32_t chOffset = 0; | 1719 static const UChar dot = 0x2e; /* punctuation */ |
2994 uint32_t chLen = 0; | 1720 static const UChar degree = 0xb0; /* symbol */ |
2995 uint32_t exOffset = 0; | 1721 static const UChar dollar = 0x24; /* currency symbol */ |
2996 uint32_t exLen = 0; | 1722 static const UChar zero = 0x30; /* digit */ |
2997 uint32_t oldChOffset = 0; | |
2998 uint32_t oldChLen = 0; | |
2999 uint32_t oldExOffset = 0; | |
3000 uint32_t oldExLen = 0; | |
3001 uint32_t prefixOffset = 0; | |
3002 uint32_t prefixLen = 0; | |
3003 | 1723 |
3004 UBool startOfRules = TRUE; | 1724 varTopOriginal = ucol_getVariableTop(coll, &status); |
3005 UColTokenParser src; | 1725 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal); |
3006 UColOptionSet opts; | 1726 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); |
3007 | 1727 |
3008 UChar *rulesCopy = NULL; | 1728 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); |
3009 uint32_t rulesLen; | 1729 varTop2 = ucol_getVariableTop(coll, &status); |
3010 | 1730 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1); |
3011 UCollationResult result; | 1731 if(U_FAILURE(status) || varTop1 != varTop2 || |
3012 | 1732 !ucol_equal(coll, &nul, 0, &space, 1) || |
3013 UChar first[256] = { 0 }; | 1733 ucol_equal(coll, &nul, 0, &dot, 1) || |
3014 UChar second[256] = { 0 }; | 1734 ucol_equal(coll, &nul, 0, °ree, 1) || |
3015 UParseError parseError; | 1735 ucol_equal(coll, &nul, 0, &dollar, 1) || |
3016 int32_t myQ = getTestOption(QUICK_OPTION); | 1736 ucol_equal(coll, &nul, 0, &zero, 1) || |
3017 | 1737 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { |
3018 (void)prefixLen; /* Suppress set but not used warnings. */ | 1738 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status
)); |
3019 (void)prefixOffset; | |
3020 (void)specs; | |
3021 | |
3022 uprv_memset(&src, 0, sizeof(UColTokenParser)); | |
3023 | |
3024 src.opts = &opts; | |
3025 | |
3026 if(getTestOption(QUICK_OPTION) <= 0) { | |
3027 setTestOption(QUICK_OPTION, 1); | |
3028 } | 1739 } |
3029 | 1740 |
3030 /* this test will fail when normalization is turned on */ | 1741 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status); |
3031 /* therefore we always turn off exhaustive mode for it */ | 1742 varTop2 = ucol_getVariableTop(coll, &status); |
3032 { /* QUICK > 0*/ | 1743 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1); |
3033 log_verbose("Slide variable top over UCARules\n"); | 1744 if(U_FAILURE(status) || varTop1 != varTop2 || |
3034 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); | 1745 !ucol_equal(coll, &nul, 0, &space, 1) || |
3035 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*s
izeof(UChar)); | 1746 !ucol_equal(coll, &nul, 0, &dot, 1) || |
3036 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_T
OK_EXTRA_RULE_SPACE_SIZE); | 1747 ucol_equal(coll, &nul, 0, °ree, 1) || |
3037 | 1748 ucol_equal(coll, &nul, 0, &dollar, 1) || |
3038 if(U_SUCCESS(status) && rulesLen > 0) { | 1749 ucol_equal(coll, &nul, 0, &zero, 1) || |
3039 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | 1750 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { |
3040 src.current = src.source = rulesCopy; | 1751 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status))
; |
3041 src.end = rulesCopy+rulesLen; | |
3042 src.extraCurrent = src.end; | |
3043 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; | |
3044 | |
3045 » /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextTo
ken can cause the pointer to | |
3046 » the rules copy in src.source to get reallocated, freeing the original
pointer in rulesCopy */ | |
3047 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,
&status)) != NULL) { | |
3048 strength = src.parsedToken.strength; | |
3049 chOffset = src.parsedToken.charsOffset; | |
3050 chLen = src.parsedToken.charsLen; | |
3051 exOffset = src.parsedToken.extensionOffset; | |
3052 exLen = src.parsedToken.extensionLen; | |
3053 prefixOffset = src.parsedToken.prefixOffset; | |
3054 prefixLen = src.parsedToken.prefixLen; | |
3055 specs = src.parsedToken.flags; | |
3056 | |
3057 startOfRules = FALSE; | |
3058 { | |
3059 log_verbose("%04X %d ", *(src.source+chOffset), chLen); | |
3060 } | |
3061 if(strength == UCOL_PRIMARY) { | |
3062 status = U_ZERO_ERROR; | |
3063 varTopOriginal = ucol_getVariableTop(coll, &status); | |
3064 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen,
&status); | |
3065 if(U_FAILURE(status)) { | |
3066 char buffer[256]; | |
3067 char *buf = buffer; | |
3068 uint32_t i = 0, j; | |
3069 uint32_t CE = UCOL_NO_MORE_CES; | |
3070 | |
3071 /* before we start screaming, let's see if there is a problem with t
he rules */ | |
3072 UErrorCode collIterateStatus = U_ZERO_ERROR; | |
3073 collIterate *s = uprv_new_collIterate(&collIterateStatus); | |
3074 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &co
llIterateStatus); | |
3075 | |
3076 CE = ucol_getNextCE(coll, s, &status); | |
3077 (void)CE; /* Suppress set but not used warning. */ | |
3078 | |
3079 for(i = 0; i < oldChLen; i++) { | |
3080 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i)); | |
3081 buf += j; | |
3082 } | |
3083 if(status == U_PRIMARY_TOO_LONG_ERROR) { | |
3084 log_verbose("= Expected failure for %s =", buffer); | |
3085 } else { | |
3086 if(uprv_collIterateAtEnd(s)) { | |
3087 log_err("Unexpected failure setting variable top at offset %d. E
rror %s. Codepoints: %s\n", | |
3088 oldChOffset, u_errorName(status), buffer); | |
3089 } else { | |
3090 log_verbose("There is a goofy contraction in UCA rules that does
not appear in the fractional UCA. Codepoints: %s\n", | |
3091 buffer); | |
3092 } | |
3093 } | |
3094 uprv_delete_collIterate(s); | |
3095 } | |
3096 varTop2 = ucol_getVariableTop(coll, &status); | |
3097 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { | |
3098 log_err("cannot retrieve set varTop value!\n"); | |
3099 continue; | |
3100 } | |
3101 | |
3102 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { | |
3103 | |
3104 u_strncpy(first, src.source+oldChOffset, oldChLen); | |
3105 u_strncpy(first+oldChLen, src.source+chOffset, chLen); | |
3106 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen); | |
3107 first[2*oldChLen+chLen] = 0; | |
3108 | |
3109 if(oldExLen == 0) { | |
3110 u_strncpy(second, src.source+chOffset, chLen); | |
3111 second[chLen] = 0; | |
3112 } else { /* This is skipped momentarily, but should work once UCARul
es are fully UCA conformant */ | |
3113 u_strncpy(second, src.source+oldExOffset, oldExLen); | |
3114 u_strncpy(second+oldChLen, src.source+chOffset, chLen); | |
3115 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen)
; | |
3116 second[2*oldExLen+chLen] = 0; | |
3117 } | |
3118 result = ucol_strcoll(coll, first, -1, second, -1); | |
3119 if(result == UCOL_EQUAL) { | |
3120 doTest(coll, first, second, UCOL_EQUAL); | |
3121 } else { | |
3122 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src
.source+oldChOffset), *(src.source+chOffset)); | |
3123 } | |
3124 } | |
3125 } | |
3126 if(strength != UCOL_TOK_RESET) { | |
3127 oldChOffset = chOffset; | |
3128 oldChLen = chLen; | |
3129 oldExOffset = exOffset; | |
3130 oldExLen = exLen; | |
3131 } | |
3132 } | |
3133 status = U_ZERO_ERROR; | |
3134 } | |
3135 else { | |
3136 log_err("Unexpected failure getting rules %s\n", u_errorName(status)); | |
3137 return; | |
3138 } | |
3139 if (U_FAILURE(status)) { | |
3140 log_err("Error parsing rules %s\n", u_errorName(status)); | |
3141 return; | |
3142 } | |
3143 status = U_ZERO_ERROR; | |
3144 } | 1752 } |
3145 | 1753 |
3146 setTestOption(QUICK_OPTION, myQ); | 1754 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status); |
| 1755 varTop2 = ucol_getVariableTop(coll, &status); |
| 1756 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1); |
| 1757 if(U_FAILURE(status) || varTop1 != varTop2 || |
| 1758 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1759 !ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1760 !ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1761 ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1762 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1763 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { |
| 1764 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(statu
s)); |
| 1765 } |
| 1766 |
| 1767 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status); |
| 1768 varTop2 = ucol_getVariableTop(coll, &status); |
| 1769 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1); |
| 1770 if(U_FAILURE(status) || varTop1 != varTop2 || |
| 1771 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1772 !ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1773 !ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1774 !ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1775 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1776 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { |
| 1777 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(statu
s)); |
| 1778 } |
3147 | 1779 |
3148 log_verbose("Testing setting variable top to contractions\n"); | 1780 log_verbose("Testing setting variable top to contractions\n"); |
3149 { | 1781 { |
3150 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUC
ACombos); | 1782 UChar first[4] = { 0 }; |
3151 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth; | |
3152 while(*conts != 0) { | |
3153 /* | |
3154 * A continuation is NUL-terminated and NUL-padded | |
3155 * except if it has the maximum length. | |
3156 */ | |
3157 int32_t contractionLength = maxUCAContractionLength; | |
3158 while(contractionLength > 0 && conts[contractionLength - 1] == 0) { | |
3159 --contractionLength; | |
3160 } | |
3161 if(*(conts+1)==0) { /* pre-context */ | |
3162 varTop1 = ucol_setVariableTop(coll, conts, 1, &status); | |
3163 } else { | |
3164 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status); | |
3165 } | |
3166 if(U_FAILURE(status)) { | |
3167 if(status == U_PRIMARY_TOO_LONG_ERROR) { | |
3168 /* ucol_setVariableTop() is documented to not accept 3-byte primaries, | |
3169 * therefore it is not an error when it complains about them. */ | |
3170 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X
- U_PRIMARY_TOO_LONG_ERROR\n", | |
3171 *conts, *(conts+1), *(conts+2)); | |
3172 } else { | |
3173 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %
s\n", | |
3174 *conts, *(conts+1), *(conts+2), u_errorName(status)); | |
3175 } | |
3176 status = U_ZERO_ERROR; | |
3177 } | |
3178 conts+=maxUCAContractionLength; | |
3179 } | |
3180 | |
3181 status = U_ZERO_ERROR; | |
3182 | |
3183 first[0] = 0x0040; | 1783 first[0] = 0x0040; |
3184 first[1] = 0x0050; | 1784 first[1] = 0x0050; |
3185 first[2] = 0x0000; | 1785 first[2] = 0x0000; |
3186 | 1786 |
| 1787 status = U_ZERO_ERROR; |
3187 ucol_setVariableTop(coll, first, -1, &status); | 1788 ucol_setVariableTop(coll, first, -1, &status); |
3188 | 1789 |
3189 if(U_SUCCESS(status)) { | 1790 if(U_SUCCESS(status)) { |
3190 log_err("Invalid contraction succeded in setting variable top!\n"); | 1791 log_err("Invalid contraction succeded in setting variable top!\n"); |
3191 } | 1792 } |
3192 | 1793 |
3193 } | 1794 } |
3194 | 1795 |
3195 log_verbose("Test restoring variable top\n"); | 1796 log_verbose("Test restoring variable top\n"); |
3196 | 1797 |
3197 status = U_ZERO_ERROR; | 1798 status = U_ZERO_ERROR; |
3198 ucol_restoreVariableTop(coll, varTopOriginal, &status); | 1799 ucol_restoreVariableTop(coll, varTopOriginal, &status); |
3199 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { | 1800 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { |
3200 log_err("Couldn't restore old variable top\n"); | 1801 log_err("Couldn't restore old variable top\n"); |
3201 } | 1802 } |
3202 | 1803 |
3203 log_verbose("Testing calling with error set\n"); | 1804 log_verbose("Testing calling with error set\n"); |
3204 | 1805 |
3205 status = U_INTERNAL_PROGRAM_ERROR; | 1806 status = U_INTERNAL_PROGRAM_ERROR; |
3206 varTop1 = ucol_setVariableTop(coll, first, 1, &status); | 1807 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); |
3207 varTop2 = ucol_getVariableTop(coll, &status); | 1808 varTop2 = ucol_getVariableTop(coll, &status); |
3208 ucol_restoreVariableTop(coll, varTop2, &status); | 1809 ucol_restoreVariableTop(coll, varTop2, &status); |
3209 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); | 1810 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status); |
3210 varTop2 = ucol_getVariableTop(NULL, &status); | 1811 varTop2 = ucol_getVariableTop(NULL, &status); |
3211 ucol_restoreVariableTop(NULL, varTop2, &status); | 1812 ucol_restoreVariableTop(NULL, varTop2, &status); |
3212 if(status != U_INTERNAL_PROGRAM_ERROR) { | 1813 if(status != U_INTERNAL_PROGRAM_ERROR) { |
3213 log_err("Bad reaction to passed error!\n"); | 1814 log_err("Bad reaction to passed error!\n"); |
3214 } | 1815 } |
3215 uprv_free(src.source); | |
3216 ucol_close(coll); | 1816 ucol_close(coll); |
3217 } else { | 1817 } else { |
3218 log_data_err("Couldn't open UCA collator\n"); | 1818 log_data_err("Couldn't open UCA collator\n"); |
3219 } | 1819 } |
| 1820 } |
3220 | 1821 |
| 1822 static void TestMaxVariable() { |
| 1823 UErrorCode status = U_ZERO_ERROR; |
| 1824 UColReorderCode oldMax, max; |
| 1825 UCollator *coll; |
| 1826 |
| 1827 static const UChar nul = 0; |
| 1828 static const UChar space = 0x20; |
| 1829 static const UChar dot = 0x2e; /* punctuation */ |
| 1830 static const UChar degree = 0xb0; /* symbol */ |
| 1831 static const UChar dollar = 0x24; /* currency symbol */ |
| 1832 static const UChar zero = 0x30; /* digit */ |
| 1833 |
| 1834 coll = ucol_open("", &status); |
| 1835 if(U_FAILURE(status)) { |
| 1836 log_data_err("Couldn't open root collator\n"); |
| 1837 return; |
| 1838 } |
| 1839 |
| 1840 oldMax = ucol_getMaxVariable(coll); |
| 1841 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax); |
| 1842 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); |
| 1843 |
| 1844 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); |
| 1845 max = ucol_getMaxVariable(coll); |
| 1846 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max); |
| 1847 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE || |
| 1848 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1849 ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1850 ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1851 ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1852 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1853 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { |
| 1854 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status
)); |
| 1855 } |
| 1856 |
| 1857 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status); |
| 1858 max = ucol_getMaxVariable(coll); |
| 1859 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max); |
| 1860 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION || |
| 1861 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1862 !ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1863 ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1864 ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1865 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1866 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { |
| 1867 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(
status)); |
| 1868 } |
| 1869 |
| 1870 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status); |
| 1871 max = ucol_getMaxVariable(coll); |
| 1872 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max); |
| 1873 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL || |
| 1874 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1875 !ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1876 !ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1877 ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1878 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1879 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { |
| 1880 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(statu
s)); |
| 1881 } |
| 1882 |
| 1883 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status); |
| 1884 max = ucol_getMaxVariable(coll); |
| 1885 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max); |
| 1886 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY || |
| 1887 !ucol_equal(coll, &nul, 0, &space, 1) || |
| 1888 !ucol_equal(coll, &nul, 0, &dot, 1) || |
| 1889 !ucol_equal(coll, &nul, 0, °ree, 1) || |
| 1890 !ucol_equal(coll, &nul, 0, &dollar, 1) || |
| 1891 ucol_equal(coll, &nul, 0, &zero, 1) || |
| 1892 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { |
| 1893 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(sta
tus)); |
| 1894 } |
| 1895 |
| 1896 log_verbose("Test restoring maxVariable\n"); |
| 1897 status = U_ZERO_ERROR; |
| 1898 ucol_setMaxVariable(coll, oldMax, &status); |
| 1899 if(oldMax != ucol_getMaxVariable(coll)) { |
| 1900 log_err("Couldn't restore old maxVariable\n"); |
| 1901 } |
| 1902 |
| 1903 log_verbose("Testing calling with error set\n"); |
| 1904 status = U_INTERNAL_PROGRAM_ERROR; |
| 1905 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); |
| 1906 max = ucol_getMaxVariable(coll); |
| 1907 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) { |
| 1908 log_err("Bad reaction to passed error!\n"); |
| 1909 } |
| 1910 ucol_close(coll); |
3221 } | 1911 } |
3222 | 1912 |
3223 static void TestNonChars(void) { | 1913 static void TestNonChars(void) { |
3224 static const char *test[] = { | 1914 static const char *test[] = { |
3225 "\\u0000", /* ignorable */ | 1915 "\\u0000", /* ignorable */ |
3226 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei
ghts */ | 1916 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei
ghts */ |
3227 "\\uFDD0", "\\uFDEF", | 1917 "\\uFDD0", "\\uFDEF", |
3228 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like
unassigned, */ | 1918 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like
unassigned, */ |
3229 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ | 1919 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ |
3230 "\\U0003FFFE", "\\U0003FFFF", | 1920 "\\U0003FFFE", "\\U0003FFFF", |
(...skipping 464 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3695 * is not the weight of any character or string, | 2385 * is not the weight of any character or string, |
3696 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. | 2386 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. |
3697 */ | 2387 */ |
3698 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" | 2388 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" |
3699 | 2389 |
3700 static const struct { | 2390 static const struct { |
3701 const char *rules; | 2391 const char *rules; |
3702 const char *data[10]; | 2392 const char *data[10]; |
3703 const uint32_t len; | 2393 const uint32_t len; |
3704 } tests[] = { | 2394 } tests[] = { |
| 2395 #if 0 |
| 2396 /* "you cannot go before ...": The parser now sets an error for such nonsens
ical rules. */ |
3705 /* - all befores here amount to zero */ | 2397 /* - all befores here amount to zero */ |
3706 { "&[before 3][first tertiary ignorable]<<<a", | 2398 { "&[before 3][first tertiary ignorable]<<<a", |
3707 { "\\u0000", "a"}, 2 | 2399 { "\\u0000", "a"}, 2 |
3708 }, /* you cannot go before first tertiary ignorable */ | 2400 }, /* you cannot go before first tertiary ignorable */ |
3709 | 2401 |
3710 { "&[before 3][last tertiary ignorable]<<<a", | 2402 { "&[before 3][last tertiary ignorable]<<<a", |
3711 { "\\u0000", "a"}, 2 | 2403 { "\\u0000", "a"}, 2 |
3712 }, /* you cannot go before last tertiary ignorable */ | 2404 }, /* you cannot go before last tertiary ignorable */ |
3713 | 2405 #endif |
| 2406 /* |
| 2407 * However, there is a real secondary ignorable (artificial addition in Frac
tionalUCA.txt), |
| 2408 * and it *is* possible to "go before" that. |
| 2409 */ |
3714 { "&[before 3][first secondary ignorable]<<<a", | 2410 { "&[before 3][first secondary ignorable]<<<a", |
3715 { "\\u0000", "a"}, 2 | 2411 { "\\u0000", "a"}, 2 |
3716 }, /* you cannot go before first secondary ignorable */ | 2412 }, |
3717 | 2413 |
3718 { "&[before 3][last secondary ignorable]<<<a", | 2414 { "&[before 3][last secondary ignorable]<<<a", |
3719 { "\\u0000", "a"}, 2 | 2415 { "\\u0000", "a"}, 2 |
3720 }, /* you cannot go before first secondary ignorable */ | 2416 }, |
3721 | 2417 |
3722 /* 'normal' befores */ | 2418 /* 'normal' befores */ |
3723 | 2419 |
3724 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a
", | 2420 /* |
| 2421 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt, |
| 2422 * it is not possible to tailor &[first primary ignorable]<a or &[last prima
ry ignorable]<a |
| 2423 * because there is no tailoring space before that boundary. |
| 2424 * Made the tests work by tailoring to a space instead. |
| 2425 */ |
| 2426 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first pri
mary ignorable]<a */ |
3725 { "c", "b", "\\u0332", "a" }, 4 | 2427 { "c", "b", "\\u0332", "a" }, 4 |
3726 }, | 2428 }, |
3727 | 2429 |
3728 /* we don't have a code point that corresponds to | 2430 /* we don't have a code point that corresponds to |
3729 * the last primary ignorable | 2431 * the last primary ignorable |
3730 */ | 2432 */ |
3731 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", | 2433 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last prima
ry ignorable]<a */ |
3732 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 | 2434 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 |
3733 }, | 2435 }, |
3734 | 2436 |
3735 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", | 2437 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", |
3736 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 | 2438 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 |
3737 }, | 2439 }, |
3738 | 2440 |
3739 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", | 2441 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", |
3740 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_
REGULAR_CHAR_STRING }, 5 | 2442 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_
REGULAR_CHAR_STRING }, 5 |
3741 }, | 2443 }, |
3742 | 2444 |
3743 { "&[first regular]<a" | 2445 { "&[first regular]<a" |
3744 "&[before 1][first regular]<b", | 2446 "&[before 1][first regular]<b", |
3745 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 | 2447 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 |
3746 }, | 2448 }, |
3747 | 2449 |
3748 { "&[before 1][last regular]<b" | 2450 { "&[before 1][last regular]<b" |
3749 "&[last regular]<a", | 2451 "&[last regular]<a", |
3750 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" },
4 | 2452 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" },
4 |
3751 }, | 2453 }, |
3752 | 2454 |
3753 { "&[before 1][first implicit]<b" | 2455 { "&[before 1][first implicit]<b" |
3754 "&[first implicit]<a", | 2456 "&[first implicit]<a", |
3755 { "b", "\\u4e00", "a", "\\u4e01"}, 4 | 2457 { "b", "\\u4e00", "a", "\\u4e01"}, 4 |
3756 }, | 2458 }, |
3757 | 2459 #if 0 /* The current builder does not support tailoring to unassigned-implicit
CEs (seems unnecessary, adds complexity). */ |
3758 { "&[before 1][last implicit]<b" | 2460 { "&[before 1][last implicit]<b" |
3759 "&[last implicit]<a", | 2461 "&[last implicit]<a", |
3760 { "b", "\\U0010FFFD", "a" }, 3 | 2462 { "b", "\\U0010FFFD", "a" }, 3 |
3761 }, | 2463 }, |
3762 | 2464 #endif |
3763 { "&[last variable]<z" | 2465 { "&[last variable]<z" |
3764 "&[last primary ignorable]<x" | 2466 "&' '<x" /* was &[last primary ignorable]<x, see above */ |
3765 "&[last secondary ignorable]<<y" | 2467 "&[last secondary ignorable]<<y" |
3766 "&[last tertiary ignorable]<<<w" | 2468 "&[last tertiary ignorable]<<<w" |
3767 "&[top]<u", | 2469 "&[top]<u", |
3768 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"
}, 7 | 2470 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"
}, 7 |
3769 } | 2471 } |
3770 | 2472 |
3771 }; | 2473 }; |
3772 uint32_t i; | 2474 uint32_t i; |
3773 | 2475 |
3774 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | 2476 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4000 ucol_close(coll); | 2702 ucol_close(coll); |
4001 | 2703 |
4002 } | 2704 } |
4003 | 2705 |
4004 static void TestPartialSortKeyTermination(void) { | 2706 static void TestPartialSortKeyTermination(void) { |
4005 static const char* cases[] = { | 2707 static const char* cases[] = { |
4006 "\\u1234\\u1234\\udc00", | 2708 "\\u1234\\u1234\\udc00", |
4007 "\\udc00\\ud800\\ud800" | 2709 "\\udc00\\ud800\\ud800" |
4008 }; | 2710 }; |
4009 | 2711 |
4010 int32_t i = sizeof(UCollator); | 2712 int32_t i; |
4011 | 2713 |
4012 UErrorCode status = U_ZERO_ERROR; | 2714 UErrorCode status = U_ZERO_ERROR; |
4013 | 2715 |
4014 UCollator *coll = ucol_open("", &status); | 2716 UCollator *coll = ucol_open("", &status); |
4015 | 2717 |
4016 UCharIterator iter; | 2718 UCharIterator iter; |
4017 | 2719 |
4018 UChar currCase[256]; | 2720 UChar currCase[256]; |
4019 int32_t length = 0; | 2721 int32_t length = 0; |
4020 int32_t pKeyLen = 0; | 2722 int32_t pKeyLen = 0; |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4074 const UChar *sourceRules = NULL; | 2776 const UChar *sourceRules = NULL; |
4075 int32_t sourceRulesLen = 0; | 2777 int32_t sourceRulesLen = 0; |
4076 UParseError parseError; | 2778 UParseError parseError; |
4077 UColAttributeValue french = UCOL_OFF; | 2779 UColAttributeValue french = UCOL_OFF; |
4078 | 2780 |
4079 if(!ucol_equals(source, target)) { | 2781 if(!ucol_equals(source, target)) { |
4080 log_err("Same collators, different address not equal\n"); | 2782 log_err("Same collators, different address not equal\n"); |
4081 errorNo++; | 2783 errorNo++; |
4082 } | 2784 } |
4083 ucol_close(target); | 2785 ucol_close(target); |
4084 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status),
ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { | 2786 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &st
atus)) == 0) { |
4085 target = ucol_safeClone(source, NULL, NULL, &status); | 2787 target = ucol_safeClone(source, NULL, NULL, &status); |
4086 if(U_FAILURE(status)) { | 2788 if(U_FAILURE(status)) { |
4087 log_err("Error creating clone\n"); | 2789 log_err("Error creating clone\n"); |
4088 errorNo++; | 2790 errorNo++; |
4089 return errorNo; | 2791 return errorNo; |
4090 } | 2792 } |
4091 if(!ucol_equals(source, target)) { | 2793 if(!ucol_equals(source, target)) { |
4092 log_err("Collator different from it's clone\n"); | 2794 log_err("Collator different from it's clone\n"); |
4093 errorNo++; | 2795 errorNo++; |
4094 } | 2796 } |
(...skipping 14 matching lines...) Expand all Loading... |
4109 } | 2811 } |
4110 ucol_close(target); | 2812 ucol_close(target); |
4111 | 2813 |
4112 sourceRules = ucol_getRules(source, &sourceRulesLen); | 2814 sourceRules = ucol_getRules(source, &sourceRulesLen); |
4113 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_
DEFAULT, &parseError, &status); | 2815 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_
DEFAULT, &parseError, &status); |
4114 if(U_FAILURE(status)) { | 2816 if(U_FAILURE(status)) { |
4115 log_err("Error instantiating target from rules - %s\n", u_errorName(
status)); | 2817 log_err("Error instantiating target from rules - %s\n", u_errorName(
status)); |
4116 errorNo++; | 2818 errorNo++; |
4117 return errorNo; | 2819 return errorNo; |
4118 } | 2820 } |
4119 if(!ucol_equals(source, target)) { | 2821 /* Note: The tailoring rule string is an optional data item. */ |
| 2822 if(!ucol_equals(source, target) && sourceRulesLen != 0) { |
4120 log_err("Collator different from collator that was created from the
same rules\n"); | 2823 log_err("Collator different from collator that was created from the
same rules\n"); |
4121 errorNo++; | 2824 errorNo++; |
4122 } | 2825 } |
4123 ucol_close(target); | 2826 ucol_close(target); |
4124 } | 2827 } |
4125 return errorNo; | 2828 return errorNo; |
4126 } | 2829 } |
4127 | 2830 |
4128 | 2831 |
4129 static void TestEquals(void) { | 2832 static void TestEquals(void) { |
4130 /* ucol_equals is not currently a public API. There is a chance that it will
become | 2833 /* ucol_equals is not currently a public API. There is a chance that it will
become |
4131 * something like this, but currently it is only used by RuleBasedCollator::o
perator== | 2834 * something like this. |
4132 */ | 2835 */ |
4133 /* test whether the two collators instantiated from the same locale are equa
l */ | 2836 /* test whether the two collators instantiated from the same locale are equa
l */ |
4134 UErrorCode status = U_ZERO_ERROR; | 2837 UErrorCode status = U_ZERO_ERROR; |
4135 UParseError parseError; | 2838 UParseError parseError; |
4136 int32_t noOfLoc = uloc_countAvailable(); | 2839 int32_t noOfLoc = uloc_countAvailable(); |
4137 const char *locName = NULL; | 2840 const char *locName = NULL; |
4138 UCollator *source = NULL, *target = NULL; | 2841 UCollator *source = NULL, *target = NULL; |
4139 int32_t i = 0; | 2842 int32_t i = 0; |
4140 | 2843 |
4141 const char* rules[] = { | 2844 const char* rules[] = { |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4176 } | 2879 } |
4177 ucol_close(source); | 2880 ucol_close(source); |
4178 ucol_close(target); | 2881 ucol_close(target); |
4179 | 2882 |
4180 source = ucol_open("root", &status); | 2883 source = ucol_open("root", &status); |
4181 target = ucol_open("root", &status); | 2884 target = ucol_open("root", &status); |
4182 log_verbose("Testing root\n"); | 2885 log_verbose("Testing root\n"); |
4183 if(!ucol_equals(source, source)) { | 2886 if(!ucol_equals(source, source)) { |
4184 log_err("Same collator not equal\n"); | 2887 log_err("Same collator not equal\n"); |
4185 } | 2888 } |
4186 if(TestEqualsForCollator(locName, source, target)) { | 2889 if(TestEqualsForCollator("root", source, target)) { |
4187 log_err("Errors for root\n", locName); | 2890 log_err("Errors for root\n"); |
4188 } | 2891 } |
4189 ucol_close(source); | 2892 ucol_close(source); |
4190 | 2893 |
4191 for(i = 0; i<noOfLoc; i++) { | 2894 for(i = 0; i<noOfLoc; i++) { |
4192 status = U_ZERO_ERROR; | 2895 status = U_ZERO_ERROR; |
4193 locName = uloc_getAvailable(i); | 2896 locName = uloc_getAvailable(i); |
4194 /*if(hasCollationElements(locName)) {*/ | 2897 /*if(hasCollationElements(locName)) {*/ |
4195 log_verbose("Testing equality for locale %s\n", locName); | 2898 log_verbose("Testing equality for locale %s\n", locName); |
4196 source = ucol_open(locName, &status); | 2899 source = ucol_open(locName, &status); |
4197 target = ucol_open(locName, &status); | 2900 target = ucol_open(locName, &status); |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4392 ucol_close(coll); | 3095 ucol_close(coll); |
4393 | 3096 |
4394 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); | 3097 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); |
4395 } | 3098 } |
4396 | 3099 |
4397 static void TestPinyinProblem(void) { | 3100 static void TestPinyinProblem(void) { |
4398 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5
0" }; | 3101 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5
0" }; |
4399 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); | 3102 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); |
4400 } | 3103 } |
4401 | 3104 |
4402 #define TST_UCOL_MAX_INPUT 0x220001 | |
4403 #define topByte 0xFF000000; | |
4404 #define bottomByte 0xFF; | |
4405 #define fourBytes 0xFFFFFFFF; | |
4406 | |
4407 | |
4408 static void showImplicit(UChar32 i) { | |
4409 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { | |
4410 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); | |
4411 } | |
4412 } | |
4413 | |
4414 static void TestImplicitGeneration(void) { | |
4415 UErrorCode status = U_ZERO_ERROR; | |
4416 UChar32 last = 0; | |
4417 UChar32 current; | |
4418 UChar32 i = 0, j = 0; | |
4419 UChar32 roundtrip = 0; | |
4420 UChar32 lastBottom = 0; | |
4421 UChar32 currentBottom = 0; | |
4422 UChar32 lastTop = 0; | |
4423 UChar32 currentTop = 0; | |
4424 | |
4425 UCollator *coll = ucol_open("root", &status); | |
4426 if(U_FAILURE(status)) { | |
4427 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status))
; | |
4428 return; | |
4429 } | |
4430 | |
4431 uprv_uca_getRawFromImplicit(0xE20303E7); | |
4432 | |
4433 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { | |
4434 current = uprv_uca_getImplicitFromRaw(i) & fourBytes; | |
4435 | |
4436 /* check that it round-trips AND that all intervening ones are illegal*/ | |
4437 roundtrip = uprv_uca_getRawFromImplicit(current); | |
4438 if (roundtrip != i) { | |
4439 log_err("No roundtrip %08X\n", i); | |
4440 } | |
4441 if (last != 0) { | |
4442 for (j = last + 1; j < current; ++j) { | |
4443 roundtrip = uprv_uca_getRawFromImplicit(j); | |
4444 /* raise an error if it *doesn't* find an error*/ | |
4445 if (roundtrip != -1) { | |
4446 log_err("Fails to recognize illegal %08X\n", j); | |
4447 } | |
4448 } | |
4449 } | |
4450 /* now do other consistency checks*/ | |
4451 lastBottom = last & bottomByte; | |
4452 currentBottom = current & bottomByte; | |
4453 lastTop = last & topByte; | |
4454 currentTop = current & topByte; | |
4455 (void)lastBottom; /* Suppress set but not used warnings. */ | |
4456 (void)currentBottom; | |
4457 | |
4458 /* print out some values for spot-checking*/ | |
4459 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { | |
4460 showImplicit(i-3); | |
4461 showImplicit(i-2); | |
4462 showImplicit(i-1); | |
4463 showImplicit(i); | |
4464 showImplicit(i+1); | |
4465 showImplicit(i+2); | |
4466 } | |
4467 last = current; | |
4468 | |
4469 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { | |
4470 log_err("No raw <-> code point roundtrip for 0x%08X\n", i); | |
4471 } | |
4472 } | |
4473 showImplicit(TST_UCOL_MAX_INPUT-2); | |
4474 showImplicit(TST_UCOL_MAX_INPUT-1); | |
4475 showImplicit(TST_UCOL_MAX_INPUT); | |
4476 ucol_close(coll); | |
4477 } | |
4478 | |
4479 /** | 3105 /** |
4480 * Iterate through the given iterator, checking to see that all the strings | 3106 * Iterate through the given iterator, checking to see that all the strings |
4481 * in the expected array are present. | 3107 * in the expected array are present. |
4482 * @param expected array of strings we expect to see, or NULL | 3108 * @param expected array of strings we expect to see, or NULL |
4483 * @param expectedCount number of elements of expected, or 0 | 3109 * @param expectedCount number of elements of expected, or 0 |
4484 */ | 3110 */ |
4485 static int32_t checkUEnumeration(const char* msg, | 3111 static int32_t checkUEnumeration(const char* msg, |
4486 UEnumeration* iter, | 3112 UEnumeration* iter, |
4487 const char** expected, | 3113 const char** expected, |
4488 int32_t expectedCount) { | 3114 int32_t expectedCount) { |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4595 if (assertSuccess("getFunctionalEquivalent", &ec)) { | 3221 if (assertSuccess("getFunctionalEquivalent", &ec)) { |
4596 assertEquals("getFunctionalEquivalent(de)", "root", loc); | 3222 assertEquals("getFunctionalEquivalent(de)", "root", loc); |
4597 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", | 3223 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", |
4598 isAvailable == TRUE); | 3224 isAvailable == TRUE); |
4599 } | 3225 } |
4600 | 3226 |
4601 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", | 3227 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", |
4602 &isAvailable, &ec); | 3228 &isAvailable, &ec); |
4603 if (assertSuccess("getFunctionalEquivalent", &ec)) { | 3229 if (assertSuccess("getFunctionalEquivalent", &ec)) { |
4604 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); | 3230 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); |
4605 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", | 3231 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE", |
4606 isAvailable == TRUE); | 3232 isAvailable == FALSE); |
4607 } | 3233 } |
4608 } | 3234 } |
4609 | 3235 |
4610 /* supercedes TestJ784 */ | 3236 /* supercedes TestJ784 */ |
4611 static void TestBeforePinyin(void) { | 3237 static void TestBeforePinyin(void) { |
4612 const static char rules[] = { | 3238 const static char rules[] = { |
4613 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<
\\u00E0<<<\\u00C0" | 3239 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<
\\u00E0<<<\\u00C0" |
4614 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<
\\u00E8<<<\\u00C8" | 3240 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<
\\u00E8<<<\\u00C8" |
4615 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<
\\u00EC<<<\\u00CC" | 3241 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<
\\u00EC<<<\\u00CC" |
4616 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<
\\u00F2<<<\\u00D2" | 3242 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<
\\u00F2<<<\\u00D2" |
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4948 } | 3574 } |
4949 | 3575 |
4950 static void | 3576 static void |
4951 TestVI5913(void) | 3577 TestVI5913(void) |
4952 { | 3578 { |
4953 UErrorCode status = U_ZERO_ERROR; | 3579 UErrorCode status = U_ZERO_ERROR; |
4954 int32_t i, j; | 3580 int32_t i, j; |
4955 UCollator *coll =NULL; | 3581 UCollator *coll =NULL; |
4956 uint8_t resColl[100], expColl[100]; | 3582 uint8_t resColl[100], expColl[100]; |
4957 int32_t rLen, tLen, ruleLen, sLen, kLen; | 3583 int32_t rLen, tLen, ruleLen, sLen, kLen; |
4958 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypog
egrammeni*/ | 3584 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypog
egrammeni*/ |
4959 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ | 3585 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ |
4960 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circ
umflex.*/ | 3586 /* |
| 3587 * Note: Just tailoring &z<ae^ does not work as expected: |
| 3588 * The UCA spec requires for discontiguous contractions that they |
| 3589 * extend an *existing match* by one combining mark at a time. |
| 3590 * Therefore, ae must be a contraction so that the builder finds |
| 3591 * discontiguous contractions for ae^, for example with an intervening under
dot. |
| 3592 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302
, etc. |
| 3593 */ |
| 3594 UChar rule3[256]={ |
| 3595 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */ |
| 3596 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/ |
| 3597 0}; |
4961 static const UChar tData[][20]={ | 3598 static const UChar tData[][20]={ |
4962 {0x1EAC, 0}, | 3599 {0x1EAC, 0}, |
4963 {0x0041, 0x0323, 0x0302, 0}, | 3600 {0x0041, 0x0323, 0x0302, 0}, |
4964 {0x1EA0, 0x0302, 0}, | 3601 {0x1EA0, 0x0302, 0}, |
4965 {0x00C2, 0x0323, 0}, | 3602 {0x00C2, 0x0323, 0}, |
4966 {0x1ED8, 0}, /* O with dot and circumflex */ | 3603 {0x1ED8, 0}, /* O with dot and circumflex */ |
4967 {0x1ECC, 0x0302, 0}, | 3604 {0x1ECC, 0x0302, 0}, |
4968 {0x1EB7, 0}, | 3605 {0x1EB7, 0}, |
4969 {0x1EA1, 0x0306, 0}, | 3606 {0x1EA1, 0x0306, 0}, |
4970 }; | 3607 }; |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5091 } | 3728 } |
5092 } | 3729 } |
5093 } | 3730 } |
5094 ucol_close(coll); | 3731 ucol_close(coll); |
5095 | 3732 |
5096 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); | 3733 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); |
5097 ruleLen = u_strlen(rule3); | 3734 ruleLen = u_strlen(rule3); |
5098 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | 3735 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; |
5099 tLen = u_strlen(tailorData3[3]); | 3736 tLen = u_strlen(tailorData3[3]); |
5100 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); | 3737 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); |
| 3738 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3
], tLen), tLen); |
| 3739 for(i = 0; i<kLen; i++) { |
| 3740 log_verbose(" %02X", expColl[i]); |
| 3741 } |
5101 for (j=4; j<6; j++) { | 3742 for (j=4; j<6; j++) { |
5102 tLen = u_strlen(tailorData3[j]); | 3743 tLen = u_strlen(tailorData3[j]); |
5103 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); | 3744 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); |
5104 | 3745 |
5105 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=
0 ) { | 3746 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=
0 ) { |
5106 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo
rData[j], tLen); | 3747 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescs
trdup(tailorData3[j], tLen), tLen); |
5107 for(i = 0; i<rLen; i++) { | 3748 for(i = 0; i<rLen; i++) { |
5108 log_err(" %02X", resColl[i]); | 3749 log_err(" %02X", resColl[i]); |
5109 } | 3750 } |
5110 } | 3751 } |
5111 | 3752 |
5112 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], t
Len); | 3753 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailo
rData3[j], tLen), tLen); |
5113 for(i = 0; i<rLen; i++) { | 3754 for(i = 0; i<rLen; i++) { |
5114 log_verbose(" %02X", resColl[i]); | 3755 log_verbose(" %02X", resColl[i]); |
5115 } | 3756 } |
5116 } | 3757 } |
5117 ucol_close(coll); | 3758 ucol_close(coll); |
5118 } | 3759 } |
5119 | 3760 |
5120 static void | 3761 static void |
5121 TestTailor6179(void) | 3762 TestTailor6179(void) |
5122 { | 3763 { |
(...skipping 23 matching lines...) Expand all Loading... |
5146 }; | 3787 }; |
5147 static const UChar tData2[][4]={ | 3788 static const UChar tData2[][4]={ |
5148 {0x61, 0}, | 3789 {0x61, 0}, |
5149 {0x62, 0}, | 3790 {0x62, 0}, |
5150 { 0xFDD0,0x009E, 0} | 3791 { 0xFDD0,0x009E, 0} |
5151 }; | 3792 }; |
5152 | 3793 |
5153 /* | 3794 /* |
5154 * These values from FractionalUCA.txt will change, | 3795 * These values from FractionalUCA.txt will change, |
5155 * and need to be updated here. | 3796 * and need to be updated here. |
| 3797 * TODO: Make this not check for particular sort keys. |
| 3798 * Instead, test that we get CEs before & after other ignorables; see ticket
#6179. |
5156 */ | 3799 */ |
5157 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0}; | 3800 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0}; |
5158 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0}; | 3801 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0}; |
5159 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; | 3802 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0}; |
5160 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; | 3803 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0}; |
| 3804 |
| 3805 UParseError parseError; |
5161 | 3806 |
5162 /* Test [Last Primary ignorable] */ | 3807 /* Test [Last Primary ignorable] */ |
5163 | 3808 |
5164 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i
gnorable]<<b\n"); | 3809 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i
gnorable]<<b\n"); |
5165 ruleLen = u_strlen(rule1); | 3810 ruleLen = u_strlen(rule1); |
5166 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | 3811 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; |
5167 if (U_FAILURE(status)) { | 3812 if (U_FAILURE(status)) { |
5168 log_err_status(status, "Tailoring test: &[last primary ignorable] failed
! -> %s\n", u_errorName(status)); | 3813 log_err_status(status, "Tailoring test: &[last primary ignorable] failed
! -> %s\n", u_errorName(status)); |
5169 return; | 3814 return; |
5170 } | 3815 } |
(...skipping 13 matching lines...) Expand all Loading... |
5184 for(i = 0; i<rLen; i++) { | 3829 for(i = 0; i<rLen; i++) { |
5185 log_err(" %02X", resColl[i]); | 3830 log_err(" %02X", resColl[i]); |
5186 } | 3831 } |
5187 log_err("\n"); | 3832 log_err("\n"); |
5188 } | 3833 } |
5189 ucol_close(coll); | 3834 ucol_close(coll); |
5190 | 3835 |
5191 | 3836 |
5192 /* Test [Last Secondary ignorable] */ | 3837 /* Test [Last Secondary ignorable] */ |
5193 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second
ary ignorable]<<<b\n"); | 3838 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second
ary ignorable]<<<b\n"); |
5194 ruleLen = u_strlen(rule1); | 3839 ruleLen = u_strlen(rule2); |
5195 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | 3840 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError,
&status); |
5196 if (U_FAILURE(status)) { | 3841 if (U_FAILURE(status)) { |
5197 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u
_errorName(status)); | 3842 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u
_errorName(status)); |
| 3843 log_info(" offset=%d \"%s\" | \"%s\"\n", |
| 3844 parseError.offset, aescstrdup(parseError.preContext, -1), aescs
trdup(parseError.postContext, -1)); |
5198 return; | 3845 return; |
5199 } | 3846 } |
5200 tLen = u_strlen(tData2[0]); | 3847 tLen = u_strlen(tData2[0]); |
5201 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); | 3848 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); |
5202 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgn
CE, rLen) != 0) { | 3849 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgn
CE, rLen) != 0) { |
5203 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0
, tData2[0], rLen); | 3850 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0
, tData2[0], rLen); |
5204 for(i = 0; i<rLen; i++) { | 3851 for(i = 0; i<rLen; i++) { |
5205 log_err(" %02X", resColl[i]); | 3852 log_err(" %02X", resColl[i]); |
5206 } | 3853 } |
5207 log_err("\n"); | 3854 log_err("\n"); |
5208 } | 3855 } |
5209 if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see tic
ket #8982 */ | 3856 tLen = u_strlen(tData2[1]); |
5210 tLen = u_strlen(tData2[1]); | 3857 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); |
5211 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); | 3858 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryI
gnCE, rLen) != 0) { |
5212 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondar
yIgnCE, rLen) != 0) { | 3859 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1,
tData2[1], rLen); |
5213 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1
, tData2[1], rLen); | 3860 for(i = 0; i<rLen; i++) { |
5214 for(i = 0; i<rLen; i++) { | 3861 log_err(" %02X", resColl[i]); |
5215 log_err(" %02X", resColl[i]); | |
5216 } | |
5217 log_err("\n"); | |
5218 } | 3862 } |
| 3863 log_err("\n"); |
5219 } | 3864 } |
5220 ucol_close(coll); | 3865 ucol_close(coll); |
5221 } | 3866 } |
5222 | 3867 |
5223 static void | 3868 static void |
5224 TestUCAPrecontext(void) | 3869 TestUCAPrecontext(void) |
5225 { | 3870 { |
5226 UErrorCode status = U_ZERO_ERROR; | 3871 UErrorCode status = U_ZERO_ERROR; |
5227 int32_t i, j; | 3872 int32_t i, j; |
5228 UCollator *coll =NULL; | 3873 UCollator *coll =NULL; |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5575 for (rule_no = 0; rule_no < n_rules; ++rule_no) { | 4220 for (rule_no = 0; rule_no < n_rules; ++rule_no) { |
5576 | 4221 |
5577 length = u_unescape(str_rules[rule_no], rule, 500); | 4222 length = u_unescape(str_rules[rule_no], rule, 500); |
5578 if (length == 0) { | 4223 if (length == 0) { |
5579 log_err("ERROR: The rule cannot be unescaped: %s\n"); | 4224 log_err("ERROR: The rule cannot be unescaped: %s\n"); |
5580 return; | 4225 return; |
5581 } | 4226 } |
5582 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er
ror, &status); | 4227 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er
ror, &status); |
5583 if(U_FAILURE(status)){ | 4228 if(U_FAILURE(status)){ |
5584 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | 4229 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); |
| 4230 log_info(" offset=%d \"%s\" | \"%s\"\n", |
| 4231 parse_error.offset, |
| 4232 aescstrdup(parse_error.preContext, -1), |
| 4233 aescstrdup(parse_error.postContext, -1)); |
5585 return; | 4234 return; |
5586 } | 4235 } |
5587 log_verbose("Testing the <<* syntax\n"); | 4236 log_verbose("Testing the <<* syntax\n"); |
5588 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | 4237 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
5589 ucol_setStrength(myCollation, UCOL_TERTIARY); | 4238 ucol_setStrength(myCollation, UCOL_TERTIARY); |
5590 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { | 4239 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { |
5591 doTest(myCollation, | 4240 doTest(myCollation, |
5592 testcases[testcase_no].source, | 4241 testcases[testcase_no].source, |
5593 testcases[testcase_no].target, | 4242 testcases[testcase_no].target, |
5594 testcases[testcase_no].result | 4243 testcases[testcase_no].result |
(...skipping 25 matching lines...) Expand all Loading... |
5620 { {0x0061}, {0x0066}, UCOL
_LESS }, /* "a" < "f" */ | 4269 { {0x0061}, {0x0066}, UCOL
_LESS }, /* "a" < "f" */ |
5621 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL
_LESS }, /* "la" < "123" */ | 4270 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL
_LESS }, /* "la" < "123" */ |
5622 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL
_EQUAL }, /* "aaa" = "123" */ | 4271 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL
_EQUAL }, /* "aaa" = "123" */ |
5623 { {0x0062}, {0x007a}, UCOL
_LESS }, /* "b" < "z" */ | 4272 { {0x0062}, {0x007a}, UCOL
_LESS }, /* "b" < "z" */ |
5624 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL
_LESS }, /* "azm" = "2yc" */ | 4273 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL
_LESS }, /* "azm" = "2yc" */ |
5625 }; | 4274 }; |
5626 | 4275 |
5627 static int nRangeTestcases = LEN(rangeTestcases); | 4276 static int nRangeTestcases = LEN(rangeTestcases); |
5628 | 4277 |
5629 const static OneTestCase rangeTestcasesSupplemental[] = { | 4278 const static OneTestCase rangeTestcasesSupplemental[] = { |
5630 { {0xfffe}, {0xffff}, UCOL
_LESS }, /* U+FFFE < U+FFFF */ | 4279 { {0x4e00}, {0xfffb}, UCOL
_LESS }, /* U+4E00 < U+FFFB */ |
5631 { {0xffff}, {0xd800, 0xdc00}, UCOL
_LESS }, /* U+FFFF < U+10000 */ | 4280 { {0xfffb}, {0xd800, 0xdc00}, UCOL
_LESS }, /* U+FFFB < U+10000 */ |
5632 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+10000 < U+10001 */ | 4281 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+10000 < U+10001 */ |
5633 { {0xfffe}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+FFFE < U+10001 */ | 4282 { {0x4e00}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+4E00 < U+10001 */ |
5634 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ | 4283 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ |
5635 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ | 4284 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ |
5636 { {0xfffe}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+FFFE < U+10001 */ | 4285 { {0x4e00}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+4E00 < U+10001 */ |
5637 }; | 4286 }; |
5638 | 4287 |
5639 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); | 4288 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); |
5640 | 4289 |
5641 const static OneTestCase rangeTestcasesQwerty[] = { | 4290 const static OneTestCase rangeTestcasesQwerty[] = { |
5642 { {0x0071}, {0x0077}, UCOL
_LESS }, /* "q" < "w" */ | 4291 { {0x0071}, {0x0077}, UCOL
_LESS }, /* "q" < "w" */ |
5643 { {0x0077}, {0x0065}, UCOL
_LESS }, /* "w" < "e" */ | 4292 { {0x0077}, {0x0065}, UCOL
_LESS }, /* "w" < "e" */ |
5644 | 4293 |
5645 { {0x0079}, {0x0075}, UCOL
_LESS }, /* "y" < "u" */ | 4294 { {0x0079}, {0x0075}, UCOL
_LESS }, /* "y" < "u" */ |
5646 { {0x0071}, {0x0075}, UCOL
_LESS }, /* "q" << "u" */ | 4295 { {0x0071}, {0x0075}, UCOL
_LESS }, /* "q" << "u" */ |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5683 | 4332 |
5684 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\
u0033", | 4333 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\
u0033", |
5685 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\
u0032\\u0033'", | 4334 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\
u0032\\u0033'", |
5686 }; | 4335 }; |
5687 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); | 4336 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); |
5688 } | 4337 } |
5689 | 4338 |
5690 static void TestSameStrengthListSupplemental(void) | 4339 static void TestSameStrengthListSupplemental(void) |
5691 { | 4340 { |
5692 const char* strRules[] = { | 4341 const char* strRules[] = { |
5693 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", | 4342 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002", |
5694 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", | 4343 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", |
5695 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", | 4344 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002", |
5696 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", | 4345 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", |
5697 }; | 4346 }; |
5698 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); | 4347 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); |
5699 } | 4348 } |
5700 | 4349 |
5701 static void TestSameStrengthListQwerty(void) | 4350 static void TestSameStrengthListQwerty(void) |
5702 { | 4351 { |
5703 const char* strRules[] = { | 4352 const char* strRules[] = { |
5704 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ | 4353 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ |
5705 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ | 4354 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ |
5706 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00
74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", | 4355 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00
74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", |
(...skipping 27 matching lines...) Expand all Loading... |
5734 { | 4383 { |
5735 const char* strRules[] = { | 4384 const char* strRules[] = { |
5736 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", | 4385 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", |
5737 }; | 4386 }; |
5738 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); | 4387 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); |
5739 } | 4388 } |
5740 | 4389 |
5741 static void TestSameStrengthListSupplementalRanges(void) | 4390 static void TestSameStrengthListSupplementalRanges(void) |
5742 { | 4391 { |
5743 const char* strRules[] = { | 4392 const char* strRules[] = { |
5744 "&\\ufffe<*\\uffff-\\U00010002", | 4393 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them.
*/ |
| 4394 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002", |
5745 }; | 4395 }; |
5746 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); | 4396 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); |
5747 } | 4397 } |
5748 | 4398 |
5749 static void TestSpecialCharacters(void) | 4399 static void TestSpecialCharacters(void) |
5750 { | 4400 { |
5751 const char* strRules[] = { | 4401 const char* strRules[] = { |
5752 /* Normal */ | 4402 /* Normal */ |
5753 "&';'<'+'<','<'-'<'&'<'*'", | 4403 "&';'<'+'<','<'-'<'&'<'*'", |
5754 | 4404 |
(...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6034 /* | 4684 /* |
6035 * Test reordering API. | 4685 * Test reordering API. |
6036 */ | 4686 */ |
6037 static void TestReorderingAPI(void) | 4687 static void TestReorderingAPI(void) |
6038 { | 4688 { |
6039 UErrorCode status = U_ZERO_ERROR; | 4689 UErrorCode status = U_ZERO_ERROR; |
6040 UCollator *myCollation; | 4690 UCollator *myCollation; |
6041 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | 4691 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; |
6042 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_RE
ORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; | 4692 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_RE
ORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; |
6043 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR
IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; | 4693 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR
IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; |
| 4694 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE; |
6044 UCollationResult collResult; | 4695 UCollationResult collResult; |
6045 int32_t retrievedReorderCodesLength; | 4696 int32_t retrievedReorderCodesLength; |
6046 int32_t retrievedReorderCodes[10]; | 4697 int32_t retrievedReorderCodes[10]; |
6047 UChar greekString[] = { 0x03b1 }; | 4698 UChar greekString[] = { 0x03b1 }; |
6048 UChar punctuationString[] = { 0x203e }; | 4699 UChar punctuationString[] = { 0x203e }; |
6049 int loopIndex; | 4700 int loopIndex; |
6050 | 4701 |
6051 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | 4702 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); |
6052 | 4703 |
6053 /* build collator tertiary */ | 4704 /* build collator tertiary */ |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6111 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, 0); | 4762 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, 0); |
6112 return; | 4763 return; |
6113 } | 4764 } |
6114 | 4765 |
6115 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); | 4766 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); |
6116 if (collResult != UCOL_GREATER) { | 4767 if (collResult != UCOL_GREATER) { |
6117 log_err_status(status, "ERROR: collation result should have been UCOL_GR
EATER\n"); | 4768 log_err_status(status, "ERROR: collation result should have been UCOL_GR
EATER\n"); |
6118 return; | 4769 return; |
6119 } | 4770 } |
6120 | 4771 |
| 4772 /* clear the reordering using [NONE] */ |
| 4773 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status); |
| 4774 if (U_FAILURE(status)) { |
| 4775 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", m
yErrorName(status)); |
| 4776 return; |
| 4777 } |
| 4778 |
| 4779 /* get the reordering again */ |
| 4780 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); |
| 4781 if (retrievedReorderCodesLength != 0) { |
| 4782 log_err_status(status, |
| 4783 "ERROR: [NONE] retrieved reorder codes length was %d but
should have been 0\n", |
| 4784 retrievedReorderCodesLength); |
| 4785 return; |
| 4786 } |
| 4787 |
6121 /* test for error condition on duplicate reorder codes */ | 4788 /* test for error condition on duplicate reorder codes */ |
6122 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde
rCodes), &status); | 4789 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde
rCodes), &status); |
6123 if (!U_FAILURE(status)) { | 4790 if (!U_FAILURE(status)) { |
6124 log_err_status(status, "ERROR: setting duplicate reorder codes did not g
enerate a failure\n"); | 4791 log_err_status(status, "ERROR: setting duplicate reorder codes did not g
enerate a failure\n"); |
6125 return; | 4792 return; |
6126 } | 4793 } |
6127 | 4794 |
6128 status = U_ZERO_ERROR; | 4795 status = U_ZERO_ERROR; |
6129 /* test for reorder codes after a reset code */ | 4796 /* test for reorder codes after a reset code */ |
6130 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord
erCodesStartingWithDefault), &status); | 4797 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord
erCodesStartingWithDefault), &status); |
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6265 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | 4932 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { |
6266 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { | 4933 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { |
6267 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | 4934 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); |
6268 return; | 4935 return; |
6269 } | 4936 } |
6270 } | 4937 } |
6271 | 4938 |
6272 ucol_close(myCollation); | 4939 ucol_close(myCollation); |
6273 } | 4940 } |
6274 | 4941 |
6275 static int compareUScriptCodes(const void * a, const void * b) | 4942 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int
32_t expectedScript) { |
6276 { | 4943 int32_t i; |
6277 return ( *(int32_t*)a - *(int32_t*)b ); | 4944 for (i = 0; i < length; ++i) { |
| 4945 if (expectedScript == scripts[i]) { return TRUE; } |
| 4946 } |
| 4947 return FALSE; |
6278 } | 4948 } |
6279 | 4949 |
6280 static void TestEquivalentReorderingScripts(void) { | 4950 static void TestEquivalentReorderingScripts(void) { |
6281 UErrorCode status = U_ZERO_ERROR; | 4951 UErrorCode status = U_ZERO_ERROR; |
6282 int32_t equivalentScripts[50]; | 4952 int32_t equivalentScripts[100]; |
6283 int32_t equivalentScriptsLength; | 4953 int32_t length; |
6284 int loopIndex; | 4954 int i; |
6285 int32_t equivalentScriptsResult[] = { | 4955 int32_t prevScript; |
| 4956 /* At least these scripts are expected to be equivalent. There may be more.
*/ |
| 4957 static const int32_t expectedScripts[] = { |
6286 USCRIPT_BOPOMOFO, | 4958 USCRIPT_BOPOMOFO, |
6287 USCRIPT_LISU, | 4959 USCRIPT_LISU, |
6288 USCRIPT_LYCIAN, | 4960 USCRIPT_LYCIAN, |
6289 USCRIPT_CARIAN, | 4961 USCRIPT_CARIAN, |
6290 USCRIPT_LYDIAN, | 4962 USCRIPT_LYDIAN, |
6291 USCRIPT_YI, | 4963 USCRIPT_YI, |
6292 USCRIPT_OLD_ITALIC, | 4964 USCRIPT_OLD_ITALIC, |
6293 USCRIPT_GOTHIC, | 4965 USCRIPT_GOTHIC, |
6294 USCRIPT_DESERET, | 4966 USCRIPT_DESERET, |
6295 USCRIPT_SHAVIAN, | 4967 USCRIPT_SHAVIAN, |
6296 USCRIPT_OSMANYA, | 4968 USCRIPT_OSMANYA, |
6297 USCRIPT_LINEAR_B, | 4969 USCRIPT_LINEAR_B, |
6298 USCRIPT_CYPRIOT, | 4970 USCRIPT_CYPRIOT, |
6299 USCRIPT_OLD_SOUTH_ARABIAN, | 4971 USCRIPT_OLD_SOUTH_ARABIAN, |
6300 USCRIPT_AVESTAN, | 4972 USCRIPT_AVESTAN, |
6301 USCRIPT_IMPERIAL_ARAMAIC, | 4973 USCRIPT_IMPERIAL_ARAMAIC, |
6302 USCRIPT_INSCRIPTIONAL_PARTHIAN, | 4974 USCRIPT_INSCRIPTIONAL_PARTHIAN, |
6303 USCRIPT_INSCRIPTIONAL_PAHLAVI, | 4975 USCRIPT_INSCRIPTIONAL_PAHLAVI, |
6304 USCRIPT_UGARITIC, | 4976 USCRIPT_UGARITIC, |
6305 USCRIPT_OLD_PERSIAN, | 4977 USCRIPT_OLD_PERSIAN, |
6306 USCRIPT_CUNEIFORM, | 4978 USCRIPT_CUNEIFORM, |
6307 USCRIPT_EGYPTIAN_HIEROGLYPHS, | 4979 USCRIPT_EGYPTIAN_HIEROGLYPHS, |
6308 USCRIPT_PHONETIC_POLLARD, | 4980 USCRIPT_PHONETIC_POLLARD, |
6309 USCRIPT_SORA_SOMPENG, | 4981 USCRIPT_SORA_SOMPENG, |
6310 USCRIPT_MEROITIC_CURSIVE, | 4982 USCRIPT_MEROITIC_CURSIVE, |
6311 USCRIPT_MEROITIC_HIEROGLYPHS | 4983 USCRIPT_MEROITIC_HIEROGLYPHS |
6312 }; | 4984 }; |
6313 | 4985 |
6314 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t)
, compareUScriptCodes); | |
6315 | |
6316 /* UScript.GOTHIC */ | 4986 /* UScript.GOTHIC */ |
6317 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equ
ivalentScripts, LEN(equivalentScripts), &status); | 4987 length = ucol_getEquivalentReorderCodes( |
| 4988 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); |
6318 if (U_FAILURE(status)) { | 4989 if (U_FAILURE(status)) { |
6319 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n
", myErrorName(status)); | 4990 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code
s: %s\n", myErrorName(status)); |
6320 return; | 4991 return; |
6321 } | 4992 } |
6322 /* | 4993 if (length < LEN(expectedScripts)) { |
6323 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); | 4994 log_err("ERROR/Gothic: retrieved equivalent script length wrong: " |
6324 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); | 4995 "expected at least %d, was = %d\n", |
6325 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { | 4996 LEN(expectedScripts), length); |
6326 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]); | |
6327 } | 4997 } |
6328 */ | 4998 prevScript = -1; |
6329 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { | 4999 for (i = 0; i < length; ++i) { |
6330 log_err_status(status, "ERROR: retrieved equivalent script length wrong:
expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLeng
th); | 5000 int32_t script = equivalentScripts[i]; |
6331 return; | 5001 if (script <= prevScript) { |
| 5002 log_err("ERROR/Gothic: equivalent scripts out of order at index %d\n
", i); |
| 5003 } |
| 5004 prevScript = script; |
6332 } | 5005 } |
6333 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { | 5006 for (i = 0; i < LEN(expectedScripts); i++) { |
6334 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex])
{ | 5007 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { |
6335 log_err_status(status, "ERROR: equivalent scripts results don't matc
h: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScr
ipts[loopIndex]); | 5008 log_err("ERROR/Gothic: equivalent scripts do not contain %d\n", |
6336 return; | 5009 expectedScripts[i]); |
6337 } | 5010 } |
6338 } | 5011 } |
6339 | 5012 |
6340 /* UScript.SHAVIAN */ | 5013 /* UScript.SHAVIAN */ |
6341 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, eq
uivalentScripts, LEN(equivalentScripts), &status); | 5014 length = ucol_getEquivalentReorderCodes( |
| 5015 USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status)
; |
6342 if (U_FAILURE(status)) { | 5016 if (U_FAILURE(status)) { |
6343 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n
", myErrorName(status)); | 5017 log_err_status(status, "ERROR/Shavian: retrieving equivalent reorder cod
es: %s\n", myErrorName(status)); |
6344 return; | 5018 return; |
6345 } | 5019 } |
6346 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { | 5020 if (length < LEN(expectedScripts)) { |
6347 log_err_status(status, "ERROR: retrieved equivalent script length wrong:
expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLeng
th); | 5021 log_err("ERROR/Shavian: retrieved equivalent script length wrong: " |
6348 return; | 5022 "expected at least %d, was = %d\n", |
| 5023 LEN(expectedScripts), length); |
6349 } | 5024 } |
6350 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { | 5025 for (i = 0; i < LEN(expectedScripts); i++) { |
6351 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex])
{ | 5026 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { |
6352 log_err_status(status, "ERROR: equivalent scripts results don't matc
h: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScr
ipts[loopIndex]); | 5027 log_err("ERROR/Shavian: equivalent scripts do not contain %d\n", |
6353 return; | 5028 expectedScripts[i]); |
6354 } | 5029 } |
6355 } | 5030 } |
6356 } | 5031 } |
6357 | 5032 |
6358 static void TestReorderingAcrossCloning(void) | 5033 static void TestReorderingAcrossCloning(void) |
6359 { | 5034 { |
6360 UErrorCode status = U_ZERO_ERROR; | 5035 UErrorCode status = U_ZERO_ERROR; |
6361 UCollator *myCollation; | 5036 UCollator *myCollation; |
6362 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | 5037 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; |
6363 UCollator *clonedCollation; | 5038 UCollator *clonedCollation; |
(...skipping 483 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6847 USet* importTailoredSet; | 5522 USet* importTailoredSet; |
6848 | 5523 |
6849 | 5524 |
6850 vicoll = ucol_open("vi", &status); | 5525 vicoll = ucol_open("vi", &status); |
6851 if(U_FAILURE(status)){ | 5526 if(U_FAILURE(status)){ |
6852 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr
orName(status)); | 5527 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr
orName(status)); |
6853 return; | 5528 return; |
6854 } | 5529 } |
6855 | 5530 |
6856 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); | 5531 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); |
| 5532 if(viruleslength == 0) { |
| 5533 log_data_err("missing vi tailoring rule string\n"); |
| 5534 ucol_close(vicoll); |
| 5535 return; |
| 5536 } |
6857 escoll = ucol_open("es", &status); | 5537 escoll = ucol_open("es", &status); |
6858 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); | 5538 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); |
6859 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar
*)); | 5539 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar
*)); |
6860 viesrules[0] = 0; | 5540 viesrules[0] = 0; |
6861 u_strcat(viesrules, virules); | 5541 u_strcat(viesrules, virules); |
6862 u_strcat(viesrules, esrules); | 5542 u_strcat(viesrules, esrules); |
6863 viesruleslength = viruleslength + esruleslength; | 5543 viesruleslength = viruleslength + esruleslength; |
6864 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY
, &error, &status); | 5544 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY
, &error, &status); |
6865 | 5545 |
6866 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ | 5546 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6946 | 5626 |
6947 USet* tailoredSet; | 5627 USet* tailoredSet; |
6948 USet* importTailoredSet; | 5628 USet* importTailoredSet; |
6949 | 5629 |
6950 vicoll = ucol_open("vi", &status); | 5630 vicoll = ucol_open("vi", &status); |
6951 if(U_FAILURE(status)){ | 5631 if(U_FAILURE(status)){ |
6952 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | 5632 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); |
6953 return; | 5633 return; |
6954 } | 5634 } |
6955 virules = ucol_getRules(vicoll, &viruleslength); | 5635 virules = ucol_getRules(vicoll, &viruleslength); |
| 5636 if(viruleslength == 0) { |
| 5637 log_data_err("missing vi tailoring rule string\n"); |
| 5638 ucol_close(vicoll); |
| 5639 return; |
| 5640 } |
6956 /* decoll = ucol_open("de@collation=phonebook", &status); */ | 5641 /* decoll = ucol_open("de@collation=phonebook", &status); */ |
6957 decoll = ucol_open("de-u-co-phonebk", &status); | 5642 decoll = ucol_open("de-u-co-phonebk", &status); |
6958 if(U_FAILURE(status)){ | 5643 if(U_FAILURE(status)){ |
6959 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | 5644 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); |
6960 return; | 5645 return; |
6961 } | 5646 } |
6962 | 5647 |
6963 | 5648 |
6964 derules = ucol_getRules(decoll, &deruleslength); | 5649 derules = ucol_getRules(decoll, &deruleslength); |
6965 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar
*)); | 5650 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar
*)); |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7069 } LongUpperStrItem; | 5754 } LongUpperStrItem; |
7070 | 5755 |
7071 /* String pointers must be in reverse collation order of the corresponding strin
gs */ | 5756 /* String pointers must be in reverse collation order of the corresponding strin
gs */ |
7072 static const LongUpperStrItem longUpperStrItems[] = { | 5757 static const LongUpperStrItem longUpperStrItems[] = { |
7073 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, | 5758 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, |
7074 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, | 5759 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, |
7075 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, | 5760 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, |
7076 { NULL, 0 } | 5761 { NULL, 0 } |
7077 }; | 5762 }; |
7078 | 5763 |
7079 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with c
ollation changes */ | 5764 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */ |
7080 | 5765 |
7081 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup
tion */ | 5766 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup
tion */ |
7082 static void TestCaseLevelBufferOverflow(void) | 5767 static void TestCaseLevelBufferOverflow(void) |
7083 { | 5768 { |
7084 UErrorCode status = U_ZERO_ERROR; | 5769 UErrorCode status = U_ZERO_ERROR; |
7085 UCollator * ucol = ucol_open("root", &status); | 5770 UCollator * ucol = ucol_open("root", &status); |
7086 if ( U_SUCCESS(status) ) { | 5771 if ( U_SUCCESS(status) ) { |
7087 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); | 5772 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); |
7088 if ( U_SUCCESS(status) ) { | 5773 if ( U_SUCCESS(status) ) { |
7089 const LongUpperStrItem * itemPtr; | 5774 const LongUpperStrItem * itemPtr; |
(...skipping 17 matching lines...) Expand all Loading... |
7107 } | 5792 } |
7108 } else { | 5793 } else { |
7109 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o
n: %s\n", myErrorName(status)); | 5794 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o
n: %s\n", myErrorName(status)); |
7110 } | 5795 } |
7111 ucol_close(ucol); | 5796 ucol_close(ucol); |
7112 } else { | 5797 } else { |
7113 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(
status)); | 5798 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(
status)); |
7114 } | 5799 } |
7115 } | 5800 } |
7116 | 5801 |
| 5802 /* Test for #10595 */ |
| 5803 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66,
0}; /* Sa sa Ki, Takeshi */ |
| 5804 #define KEY_PART_SIZE 16 |
| 5805 |
| 5806 static void TestNextSortKeyPartJaIdentical(void) |
| 5807 { |
| 5808 UErrorCode status = U_ZERO_ERROR; |
| 5809 UCollator *coll; |
| 5810 uint8_t keyPart[KEY_PART_SIZE]; |
| 5811 UCharIterator iter; |
| 5812 uint32_t state[2] = {0, 0}; |
| 5813 int32_t keyPartLen; |
| 5814 |
| 5815 coll = ucol_open("ja", &status); |
| 5816 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); |
| 5817 if (U_FAILURE(status)) { |
| 5818 log_err_status(status, "ERROR: in creation of Japanese collator with ide
ntical strength: %s\n", myErrorName(status)); |
| 5819 return; |
| 5820 } |
| 5821 |
| 5822 uiter_setString(&iter, testJapaneseName, 5); |
| 5823 keyPartLen = KEY_PART_SIZE; |
| 5824 while (keyPartLen == KEY_PART_SIZE) { |
| 5825 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_
SIZE, &status); |
| 5826 if (U_FAILURE(status)) { |
| 5827 log_err_status(status, "ERROR: in iterating next sort key part: %s\n
", myErrorName(status)); |
| 5828 break; |
| 5829 } |
| 5830 } |
| 5831 |
| 5832 ucol_close(coll); |
| 5833 } |
7117 | 5834 |
7118 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) | 5835 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) |
7119 | 5836 |
7120 void addMiscCollTest(TestNode** root) | 5837 void addMiscCollTest(TestNode** root) |
7121 { | 5838 { |
7122 TEST(TestRuleOptions); | 5839 TEST(TestRuleOptions); |
7123 TEST(TestBeforePrefixFailure); | 5840 TEST(TestBeforePrefixFailure); |
7124 TEST(TestContractionClosure); | 5841 TEST(TestContractionClosure); |
7125 TEST(TestPrefixCompose); | 5842 TEST(TestPrefixCompose); |
7126 TEST(TestStrCollIdenticalPrefix); | 5843 TEST(TestStrCollIdenticalPrefix); |
7127 TEST(TestPrefix); | 5844 TEST(TestPrefix); |
7128 TEST(TestNewJapanese); | 5845 TEST(TestNewJapanese); |
7129 /*TEST(TestLimitations);*/ | 5846 /*TEST(TestLimitations);*/ |
7130 TEST(TestNonChars); | 5847 TEST(TestNonChars); |
7131 TEST(TestExtremeCompression); | 5848 TEST(TestExtremeCompression); |
7132 TEST(TestSurrogates); | 5849 TEST(TestSurrogates); |
7133 TEST(TestVariableTopSetting); | 5850 TEST(TestVariableTopSetting); |
| 5851 TEST(TestMaxVariable); |
7134 TEST(TestBocsuCoverage); | 5852 TEST(TestBocsuCoverage); |
7135 TEST(TestCyrillicTailoring); | 5853 TEST(TestCyrillicTailoring); |
7136 TEST(TestCase); | 5854 TEST(TestCase); |
7137 TEST(IncompleteCntTest); | 5855 TEST(IncompleteCntTest); |
7138 TEST(BlackBirdTest); | 5856 TEST(BlackBirdTest); |
7139 TEST(FunkyATest); | 5857 TEST(FunkyATest); |
7140 TEST(BillFairmanTest); | 5858 TEST(BillFairmanTest); |
7141 TEST(RamsRulesTest); | |
7142 TEST(IsTailoredTest); | |
7143 TEST(TestCollations); | |
7144 TEST(TestChMove); | 5859 TEST(TestChMove); |
7145 TEST(TestImplicitTailoring); | 5860 TEST(TestImplicitTailoring); |
7146 TEST(TestFCDProblem); | 5861 TEST(TestFCDProblem); |
7147 TEST(TestEmptyRule); | 5862 TEST(TestEmptyRule); |
7148 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by
TestBeforePinyin */ | 5863 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by
TestBeforePinyin */ |
7149 TEST(TestJ815); | 5864 TEST(TestJ815); |
7150 /*TEST(TestJ831);*/ /* we changed lv locale */ | 5865 /*TEST(TestJ831);*/ /* we changed lv locale */ |
7151 TEST(TestBefore); | 5866 TEST(TestBefore); |
7152 TEST(TestRedundantRules); | |
7153 TEST(TestExpansionSyntax); | |
7154 TEST(TestHangulTailoring); | 5867 TEST(TestHangulTailoring); |
7155 TEST(TestUCARules); | 5868 TEST(TestUCARules); |
7156 TEST(TestIncrementalNormalize); | 5869 TEST(TestIncrementalNormalize); |
7157 TEST(TestComposeDecompose); | 5870 TEST(TestComposeDecompose); |
7158 TEST(TestCompressOverlap); | 5871 TEST(TestCompressOverlap); |
7159 TEST(TestContraction); | 5872 TEST(TestContraction); |
7160 TEST(TestExpansion); | 5873 TEST(TestExpansion); |
7161 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys *
/ | 5874 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys *
/ |
7162 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported
*/ | 5875 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported
*/ |
7163 TEST(TestOptimize); | 5876 TEST(TestOptimize); |
7164 TEST(TestSuppressContractions); | 5877 TEST(TestSuppressContractions); |
7165 TEST(Alexis2); | 5878 TEST(Alexis2); |
7166 TEST(TestHebrewUCA); | 5879 TEST(TestHebrewUCA); |
7167 TEST(TestPartialSortKeyTermination); | 5880 TEST(TestPartialSortKeyTermination); |
7168 TEST(TestSettings); | 5881 TEST(TestSettings); |
7169 TEST(TestEquals); | 5882 TEST(TestEquals); |
7170 TEST(TestJ2726); | 5883 TEST(TestJ2726); |
7171 TEST(NullRule); | 5884 TEST(NullRule); |
7172 TEST(TestNumericCollation); | 5885 TEST(TestNumericCollation); |
7173 TEST(TestTibetanConformance); | 5886 TEST(TestTibetanConformance); |
7174 TEST(TestPinyinProblem); | 5887 TEST(TestPinyinProblem); |
7175 TEST(TestImplicitGeneration); | |
7176 TEST(TestSeparateTrees); | 5888 TEST(TestSeparateTrees); |
7177 TEST(TestBeforePinyin); | 5889 TEST(TestBeforePinyin); |
7178 TEST(TestBeforeTightening); | 5890 TEST(TestBeforeTightening); |
7179 /*TEST(TestMoreBefore);*/ | 5891 /*TEST(TestMoreBefore);*/ |
7180 TEST(TestTailorNULL); | 5892 TEST(TestTailorNULL); |
7181 TEST(TestUpperFirstQuaternary); | 5893 TEST(TestUpperFirstQuaternary); |
7182 TEST(TestJ4960); | 5894 TEST(TestJ4960); |
7183 TEST(TestJ5223); | 5895 TEST(TestJ5223); |
7184 TEST(TestJ5232); | 5896 TEST(TestJ5232); |
7185 TEST(TestJ5367); | 5897 TEST(TestJ5367); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7217 TEST(TestGreekFirstReorder); | 5929 TEST(TestGreekFirstReorder); |
7218 TEST(TestGreekLastReorder); | 5930 TEST(TestGreekLastReorder); |
7219 TEST(TestNonScriptReorder); | 5931 TEST(TestNonScriptReorder); |
7220 TEST(TestHaniReorder); | 5932 TEST(TestHaniReorder); |
7221 TEST(TestHaniReorderWithOtherRules); | 5933 TEST(TestHaniReorderWithOtherRules); |
7222 TEST(TestMultipleReorder); | 5934 TEST(TestMultipleReorder); |
7223 TEST(TestReorderingAcrossCloning); | 5935 TEST(TestReorderingAcrossCloning); |
7224 TEST(TestReorderWithNumericCollation); | 5936 TEST(TestReorderWithNumericCollation); |
7225 | 5937 |
7226 TEST(TestCaseLevelBufferOverflow); | 5938 TEST(TestCaseLevelBufferOverflow); |
| 5939 TEST(TestNextSortKeyPartJaIdentical); |
7227 } | 5940 } |
7228 | 5941 |
7229 #endif /* #if !UCONFIG_NO_COLLATION */ | 5942 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |