OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * | 3 * |
4 * Copyright (C) 1998-2013, International Business Machines | 4 * Copyright (C) 1998-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 * | 8 * |
9 * File parse.cpp | 9 * File parse.cpp |
10 * | 10 * |
11 * Modification History: | 11 * Modification History: |
12 * | 12 * |
13 * Date Name Description | 13 * Date Name Description |
14 * 05/26/99 stephen Creation. | 14 * 05/26/99 stephen Creation. |
15 * 02/25/00 weiv Overhaul to write udata | 15 * 02/25/00 weiv Overhaul to write udata |
16 * 5/10/01 Ram removed ustdio dependency | 16 * 5/10/01 Ram removed ustdio dependency |
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten | 17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten |
18 ******************************************************************************* | 18 ******************************************************************************* |
19 */ | 19 */ |
20 | 20 |
21 #include "ucol_imp.h" | 21 // Safer use of UnicodeString. |
| 22 #ifndef UNISTR_FROM_CHAR_EXPLICIT |
| 23 # define UNISTR_FROM_CHAR_EXPLICIT explicit |
| 24 #endif |
| 25 |
| 26 // Less important, but still a good idea. |
| 27 #ifndef UNISTR_FROM_STRING_EXPLICIT |
| 28 # define UNISTR_FROM_STRING_EXPLICIT explicit |
| 29 #endif |
| 30 |
22 #include "parse.h" | 31 #include "parse.h" |
23 #include "errmsg.h" | 32 #include "errmsg.h" |
24 #include "uhash.h" | 33 #include "uhash.h" |
25 #include "cmemory.h" | 34 #include "cmemory.h" |
26 #include "cstring.h" | 35 #include "cstring.h" |
27 #include "uinvchar.h" | 36 #include "uinvchar.h" |
28 #include "read.h" | 37 #include "read.h" |
29 #include "ustr.h" | 38 #include "ustr.h" |
30 #include "reslist.h" | 39 #include "reslist.h" |
31 #include "rbt_pars.h" | 40 #include "rbt_pars.h" |
32 #include "genrb.h" | 41 #include "genrb.h" |
33 #include "unicode/ustring.h" | 42 #include "unicode/ustring.h" |
34 #include "unicode/uscript.h" | 43 #include "unicode/uscript.h" |
| 44 #include "unicode/utf16.h" |
35 #include "unicode/putil.h" | 45 #include "unicode/putil.h" |
| 46 #include "collationbuilder.h" |
| 47 #include "collationdata.h" |
| 48 #include "collationdatareader.h" |
| 49 #include "collationdatawriter.h" |
| 50 #include "collationfastlatinbuilder.h" |
| 51 #include "collationinfo.h" |
| 52 #include "collationroot.h" |
| 53 #include "collationruleparser.h" |
| 54 #include "collationtailoring.h" |
36 #include <stdio.h> | 55 #include <stdio.h> |
37 | 56 |
38 /* Number of tokens to read ahead of the current stream position */ | 57 /* Number of tokens to read ahead of the current stream position */ |
39 #define MAX_LOOKAHEAD 3 | 58 #define MAX_LOOKAHEAD 3 |
40 | 59 |
41 #define CR 0x000D | 60 #define CR 0x000D |
42 #define LF 0x000A | 61 #define LF 0x000A |
43 #define SPACE 0x0020 | 62 #define SPACE 0x0020 |
44 #define TAB 0x0009 | 63 #define TAB 0x0009 |
45 #define ESCAPE 0x005C | 64 #define ESCAPE 0x005C |
46 #define HASH 0x0023 | 65 #define HASH 0x0023 |
47 #define QUOTE 0x0027 | 66 #define QUOTE 0x0027 |
48 #define ZERO 0x0030 | 67 #define ZERO 0x0030 |
49 #define STARTCOMMAND 0x005B | 68 #define STARTCOMMAND 0x005B |
50 #define ENDCOMMAND 0x005D | 69 #define ENDCOMMAND 0x005D |
51 #define OPENSQBRACKET 0x005B | 70 #define OPENSQBRACKET 0x005B |
52 #define CLOSESQBRACKET 0x005D | 71 #define CLOSESQBRACKET 0x005D |
53 | 72 |
| 73 using icu::LocalPointer; |
| 74 using icu::UnicodeString; |
| 75 |
54 struct Lookahead | 76 struct Lookahead |
55 { | 77 { |
56 enum ETokenType type; | 78 enum ETokenType type; |
57 struct UString value; | 79 struct UString value; |
58 struct UString comment; | 80 struct UString comment; |
59 uint32_t line; | 81 uint32_t line; |
60 }; | 82 }; |
61 | 83 |
62 /* keep in sync with token defines in read.h */ | 84 /* keep in sync with token defines in read.h */ |
63 const char *tokenNames[TOK_TOKEN_COUNT] = | 85 const char *tokenNames[TOK_TOKEN_COUNT] = |
(...skipping 13 matching lines...) Expand all Loading... |
77 | 99 |
78 typedef struct { | 100 typedef struct { |
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; | 101 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; |
80 uint32_t lookaheadPosition; | 102 uint32_t lookaheadPosition; |
81 UCHARBUF *buffer; | 103 UCHARBUF *buffer; |
82 struct SRBRoot *bundle; | 104 struct SRBRoot *bundle; |
83 const char *inputdir; | 105 const char *inputdir; |
84 uint32_t inputdirLength; | 106 uint32_t inputdirLength; |
85 const char *outputdir; | 107 const char *outputdir; |
86 uint32_t outputdirLength; | 108 uint32_t outputdirLength; |
| 109 const char *filename; |
87 UBool makeBinaryCollation; | 110 UBool makeBinaryCollation; |
| 111 UBool omitCollationRules; |
88 } ParseState; | 112 } ParseState; |
89 | 113 |
90 static UBool gOmitCollationRules = FALSE; | |
91 | |
92 typedef struct SResource * | 114 typedef struct SResource * |
93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const st
ruct UString* comment, UErrorCode *status); | 115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const st
ruct UString* comment, UErrorCode *status); |
94 | 116 |
95 static struct SResource *parseResource(ParseState* state, char *tag, const struc
t UString *comment, UErrorCode *status); | 117 static struct SResource *parseResource(ParseState* state, char *tag, const struc
t UString *comment, UErrorCode *status); |
96 | 118 |
97 /* The nature of the lookahead buffer: | 119 /* The nature of the lookahead buffer: |
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides | 120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides |
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. | 121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. |
100 When getToken is called, the current pointer is moved to the next slot and th
e | 122 When getToken is called, the current pointer is moved to the next slot and th
e |
101 old slot is filled with the next token from the reader by calling getNextToke
n. | 123 old slot is filled with the next token from the reader by calling getNextToke
n. |
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); | 338 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); |
317 | 339 |
318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); | 340 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); |
319 | 341 |
320 if (U_FAILURE(*status)) | 342 if (U_FAILURE(*status)) |
321 { | 343 { |
322 return NULL; | 344 return NULL; |
323 } | 345 } |
324 uprv_strcat(filename, cs); | 346 uprv_strcat(filename, cs); |
325 | 347 |
326 if(gOmitCollationRules) { | 348 if(state->omitCollationRules) { |
327 return res_none(); | 349 return res_none(); |
328 } | 350 } |
329 | 351 |
330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); | 352 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); |
331 | 353 |
332 if (U_FAILURE(*status)) { | 354 if (U_FAILURE(*status)) { |
333 error(line, "An error occured while opening the input file %s\n", filena
me); | 355 error(line, "An error occured while opening the input file %s\n", filena
me); |
334 return NULL; | 356 return NULL; |
335 } | 357 } |
336 | 358 |
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
617 if (U_FAILURE(*status)) | 639 if (U_FAILURE(*status)) |
618 { | 640 { |
619 res_close(result); | 641 res_close(result); |
620 return NULL; | 642 return NULL; |
621 } | 643 } |
622 } | 644 } |
623 | 645 |
624 return result; | 646 return result; |
625 } | 647 } |
626 | 648 |
627 typedef struct{ | 649 #if !UCONFIG_NO_COLLATION |
628 const char* inputDir; | 650 |
629 const char* outputDir; | 651 namespace { |
630 } GenrbData; | |
631 | 652 |
632 static struct SResource* resLookup(struct SResource* res, const char* key){ | 653 static struct SResource* resLookup(struct SResource* res, const char* key){ |
633 struct SResource *current = NULL; | 654 struct SResource *current = NULL; |
634 struct SResTable *list; | 655 struct SResTable *list; |
635 if (res == res_none()) { | 656 if (res == res_none()) { |
636 return NULL; | 657 return NULL; |
637 } | 658 } |
638 | 659 |
639 list = &(res->u.fTable); | 660 list = &(res->u.fTable); |
640 | 661 |
641 current = list->fFirst; | 662 current = list->fFirst; |
642 while (current != NULL) { | 663 while (current != NULL) { |
643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { | 664 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { |
644 return current; | 665 return current; |
645 } | 666 } |
646 current = current->fNext; | 667 current = current->fNext; |
647 } | 668 } |
648 return NULL; | 669 return NULL; |
649 } | 670 } |
650 | 671 |
651 static const UChar* importFromDataFile(void* context, const char* locale, const
char* type, int32_t* pLength, UErrorCode* status){ | 672 class GenrbImporter : public icu::CollationRuleParser::Importer { |
| 673 public: |
| 674 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out
) {} |
| 675 virtual ~GenrbImporter(); |
| 676 virtual void getRules( |
| 677 const char *localeID, const char *collationType, |
| 678 UnicodeString &rules, |
| 679 const char *&errorReason, UErrorCode &errorCode); |
| 680 |
| 681 private: |
| 682 const char *inputDir; |
| 683 const char *outputDir; |
| 684 }; |
| 685 |
| 686 GenrbImporter::~GenrbImporter() {} |
| 687 |
| 688 void |
| 689 GenrbImporter::getRules( |
| 690 const char *localeID, const char *collationType, |
| 691 UnicodeString &rules, |
| 692 const char *& /*errorReason*/, UErrorCode &errorCode) { |
652 struct SRBRoot *data = NULL; | 693 struct SRBRoot *data = NULL; |
653 UCHARBUF *ucbuf = NULL; | 694 UCHARBUF *ucbuf = NULL; |
654 GenrbData* genrbdata = (GenrbData*) context; | 695 int localeLength = strlen(localeID); |
655 int localeLength = strlen(locale); | |
656 char* filename = (char*)uprv_malloc(localeLength+5); | 696 char* filename = (char*)uprv_malloc(localeLength+5); |
657 char *inputDirBuf = NULL; | 697 char *inputDirBuf = NULL; |
658 char *openFileName = NULL; | 698 char *openFileName = NULL; |
659 const char* cp = ""; | 699 const char* cp = ""; |
660 UChar* urules = NULL; | |
661 int32_t urulesLength = 0; | |
662 int32_t i = 0; | 700 int32_t i = 0; |
663 int32_t dirlen = 0; | 701 int32_t dirlen = 0; |
664 int32_t filelen = 0; | 702 int32_t filelen = 0; |
665 struct SResource* root; | 703 struct SResource* root; |
666 struct SResource* collations; | 704 struct SResource* collations; |
667 struct SResource* collation; | 705 struct SResource* collation; |
668 struct SResource* sequence; | 706 struct SResource* sequence; |
669 | 707 |
670 memcpy(filename, locale, localeLength); | 708 memcpy(filename, localeID, localeLength); |
671 for(i = 0; i < localeLength; i++){ | 709 for(i = 0; i < localeLength; i++){ |
672 if(filename[i] == '-'){ | 710 if(filename[i] == '-'){ |
673 filename[i] = '_'; | 711 filename[i] = '_'; |
674 } | 712 } |
675 } | 713 } |
676 filename[localeLength] = '.'; | 714 filename[localeLength] = '.'; |
677 filename[localeLength+1] = 't'; | 715 filename[localeLength+1] = 't'; |
678 filename[localeLength+2] = 'x'; | 716 filename[localeLength+2] = 'x'; |
679 filename[localeLength+3] = 't'; | 717 filename[localeLength+3] = 't'; |
680 filename[localeLength+4] = 0; | 718 filename[localeLength+4] = 0; |
681 | 719 |
682 | 720 |
683 if (status==NULL || U_FAILURE(*status)) { | 721 if (U_FAILURE(errorCode)) { |
684 return NULL; | 722 return; |
685 } | 723 } |
686 if(filename==NULL){ | 724 if(filename==NULL){ |
687 *status=U_ILLEGAL_ARGUMENT_ERROR; | 725 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
688 return NULL; | 726 return; |
689 }else{ | 727 }else{ |
690 filelen = (int32_t)uprv_strlen(filename); | 728 filelen = (int32_t)uprv_strlen(filename); |
691 } | 729 } |
692 if(genrbdata->inputDir == NULL) { | 730 if(inputDir == NULL) { |
693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); | 731 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); |
694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); | 732 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); |
695 openFileName[0] = '\0'; | 733 openFileName[0] = '\0'; |
696 if (filenameBegin != NULL) { | 734 if (filenameBegin != NULL) { |
697 /* | 735 /* |
698 * When a filename ../../../data/root.txt is specified, | 736 * When a filename ../../../data/root.txt is specified, |
699 * we presume that the input directory is ../../../data | 737 * we presume that the input directory is ../../../data |
700 * This is very important when the resource file includes | 738 * This is very important when the resource file includes |
701 * another file, like UCARules.txt or thaidict.brk. | 739 * another file, like UCARules.txt or thaidict.brk. |
702 */ | 740 */ |
703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); | 741 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); |
704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filena
me, filenameSize); | 742 inputDirBuf = (char *)uprv_malloc(filenameSize); |
705 | 743 |
706 /* test for NULL */ | 744 /* test for NULL */ |
707 if(inputDirBuf == NULL) { | 745 if(inputDirBuf == NULL) { |
708 *status = U_MEMORY_ALLOCATION_ERROR; | 746 errorCode = U_MEMORY_ALLOCATION_ERROR; |
709 goto finish; | 747 goto finish; |
710 } | 748 } |
711 | 749 |
| 750 uprv_strncpy(inputDirBuf, filename, filenameSize); |
712 inputDirBuf[filenameSize - 1] = 0; | 751 inputDirBuf[filenameSize - 1] = 0; |
713 genrbdata->inputDir = inputDirBuf; | 752 inputDir = inputDirBuf; |
714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); | 753 dirlen = (int32_t)uprv_strlen(inputDir); |
715 } | 754 } |
716 }else{ | 755 }else{ |
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); | 756 dirlen = (int32_t)uprv_strlen(inputDir); |
718 | 757 |
719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { | 758 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { |
720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); | 759 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); |
721 | 760 |
722 /* test for NULL */ | 761 /* test for NULL */ |
723 if(openFileName == NULL) { | 762 if(openFileName == NULL) { |
724 *status = U_MEMORY_ALLOCATION_ERROR; | 763 errorCode = U_MEMORY_ALLOCATION_ERROR; |
725 goto finish; | 764 goto finish; |
726 } | 765 } |
727 | 766 |
728 openFileName[0] = '\0'; | 767 openFileName[0] = '\0'; |
729 /* | 768 /* |
730 * append the input dir to openFileName if the first char in | 769 * append the input dir to openFileName if the first char in |
731 * filename is not file seperation char and the last char input dire
ctory is not '.'. | 770 * filename is not file seperation char and the last char input dire
ctory is not '.'. |
732 * This is to support : | 771 * This is to support : |
733 * genrb -s. /home/icu/data | 772 * genrb -s. /home/icu/data |
734 * genrb -s. icu/data | 773 * genrb -s. icu/data |
735 * The user cannot mix notations like | 774 * The user cannot mix notations like |
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant | 775 * genrb -s. /icu/data --- the absolute path specified. -s redundant |
737 * user should use | 776 * user should use |
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir | 777 * genrb -s. icu/data --- start from CWD and look in icu/data dir |
739 */ | 778 */ |
740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-
1] !='.')){ | 779 if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ |
741 uprv_strcpy(openFileName, genrbdata->inputDir); | 780 uprv_strcpy(openFileName, inputDir); |
742 openFileName[dirlen] = U_FILE_SEP_CHAR; | 781 openFileName[dirlen] = U_FILE_SEP_CHAR; |
743 } | 782 } |
744 openFileName[dirlen + 1] = '\0'; | 783 openFileName[dirlen + 1] = '\0'; |
745 } else { | 784 } else { |
746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); | 785 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); |
747 | 786 |
748 /* test for NULL */ | 787 /* test for NULL */ |
749 if(openFileName == NULL) { | 788 if(openFileName == NULL) { |
750 *status = U_MEMORY_ALLOCATION_ERROR; | 789 errorCode = U_MEMORY_ALLOCATION_ERROR; |
751 goto finish; | 790 goto finish; |
752 } | 791 } |
753 | 792 |
754 uprv_strcpy(openFileName, genrbdata->inputDir); | 793 uprv_strcpy(openFileName, inputDir); |
755 | 794 |
756 } | 795 } |
757 } | 796 } |
758 uprv_strcat(openFileName, filename); | 797 uprv_strcat(openFileName, filename); |
759 /* printf("%s\n", openFileName); */ | 798 /* printf("%s\n", openFileName); */ |
760 *status = U_ZERO_ERROR; | 799 errorCode = U_ZERO_ERROR; |
761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); | 800 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode); |
762 | 801 |
763 if(*status == U_FILE_ACCESS_ERROR) { | 802 if(errorCode == U_FILE_ACCESS_ERROR) { |
764 | 803 |
765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filena
me : openFileName); | 804 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filena
me : openFileName); |
766 goto finish; | 805 goto finish; |
767 } | 806 } |
768 if (ucbuf == NULL || U_FAILURE(*status)) { | 807 if (ucbuf == NULL || U_FAILURE(errorCode)) { |
769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", open
FileName == NULL ? filename : openFileName,u_errorName(*status)); | 808 fprintf(stderr, "An error occured processing file %s. Error: %s\n", open
FileName == NULL ? filename : openFileName,u_errorName(errorCode)); |
770 goto finish; | 809 goto finish; |
771 } | 810 } |
772 | 811 |
773 /* Parse the data into an SRBRoot */ | 812 /* Parse the data into an SRBRoot */ |
774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status
); | 813 data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode)
; |
| 814 if (U_FAILURE(errorCode)) { |
| 815 goto finish; |
| 816 } |
775 | 817 |
776 root = data->fRoot; | 818 root = data->fRoot; |
777 collations = resLookup(root, "collations"); | 819 collations = resLookup(root, "collations"); |
778 if (collations != NULL) { | 820 if (collations != NULL) { |
779 collation = resLookup(collations, type); | 821 collation = resLookup(collations, collationType); |
780 if (collation != NULL) { | 822 if (collation != NULL) { |
781 sequence = resLookup(collation, "Sequence"); | 823 sequence = resLookup(collation, "Sequence"); |
782 if (sequence != NULL) { | 824 if (sequence != NULL) { |
783 urules = sequence->u.fString.fChars; | 825 // No string pointer aliasing so that we need not hold onto the resour
ce bundle. |
784 urulesLength = sequence->u.fString.fLength; | 826 rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength); |
785 *pLength = urulesLength; | |
786 } | 827 } |
787 } | 828 } |
788 } | 829 } |
789 | 830 |
790 finish: | 831 finish: |
791 if (inputDirBuf != NULL) { | 832 if (inputDirBuf != NULL) { |
792 uprv_free(inputDirBuf); | 833 uprv_free(inputDirBuf); |
793 } | 834 } |
794 | 835 |
795 if (openFileName != NULL) { | 836 if (openFileName != NULL) { |
796 uprv_free(openFileName); | 837 uprv_free(openFileName); |
797 } | 838 } |
798 | 839 |
799 if(ucbuf) { | 840 if(ucbuf) { |
800 ucbuf_close(ucbuf); | 841 ucbuf_close(ucbuf); |
801 } | 842 } |
802 | |
803 return urules; | |
804 } | 843 } |
805 | 844 |
806 // Quick-and-dirty escaping function. | 845 // Quick-and-dirty escaping function. |
807 // Assumes that we are on an ASCII-based platform. | 846 // Assumes that we are on an ASCII-based platform. |
808 static void | 847 static void |
809 escape(const UChar *s, char *buffer) { | 848 escape(const UChar *s, char *buffer) { |
810 int32_t length = u_strlen(s); | 849 int32_t length = u_strlen(s); |
811 int32_t i = 0; | 850 int32_t i = 0; |
812 for (;;) { | 851 for (;;) { |
813 UChar32 c; | 852 UChar32 c; |
814 U16_NEXT(s, i, length, c); | 853 U16_NEXT(s, i, length, c); |
815 if (c == 0) { | 854 if (c == 0) { |
816 *buffer = 0; | 855 *buffer = 0; |
817 return; | 856 return; |
818 } else if (0x20 <= c && c <= 0x7e) { | 857 } else if (0x20 <= c && c <= 0x7e) { |
819 // printable ASCII | 858 // printable ASCII |
820 *buffer++ = (char)c; // assumes ASCII-based platform | 859 *buffer++ = (char)c; // assumes ASCII-based platform |
821 } else { | 860 } else { |
822 buffer += sprintf(buffer, "\\u%04X", (int)c); | 861 buffer += sprintf(buffer, "\\u%04X", (int)c); |
823 } | 862 } |
824 } | 863 } |
825 } | 864 } |
826 | 865 |
| 866 } // namespace |
| 867 |
| 868 #endif // !UCONFIG_NO_COLLATION |
| 869 |
827 static struct SResource * | 870 static struct SResource * |
828 addCollation(ParseState* state, struct SResource *result, uint32_t startline, U
ErrorCode *status) | 871 addCollation(ParseState* state, struct SResource *result, const char *collation
Type, |
| 872 uint32_t startline, UErrorCode *status) |
829 { | 873 { |
| 874 // TODO: Use LocalPointer for result, or make caller close it when there is
a failure. |
830 struct SResource *member = NULL; | 875 struct SResource *member = NULL; |
831 struct UString *tokenValue; | 876 struct UString *tokenValue; |
832 struct UString comment; | 877 struct UString comment; |
833 enum ETokenType token; | 878 enum ETokenType token; |
834 char subtag[1024]; | 879 char subtag[1024]; |
| 880 UnicodeString rules; |
| 881 UBool haveRules = FALSE; |
835 UVersionInfo version; | 882 UVersionInfo version; |
836 uint32_t line; | 883 uint32_t line; |
837 GenrbData genrbdata; | 884 |
838 /* '{' . (name resource)* '}' */ | 885 /* '{' . (name resource)* '}' */ |
839 version[0]=0; version[1]=0; version[2]=0; version[3]=0; | 886 version[0]=0; version[1]=0; version[2]=0; version[3]=0; |
840 | 887 |
841 for (;;) | 888 for (;;) |
842 { | 889 { |
843 ustr_init(&comment); | 890 ustr_init(&comment); |
844 token = getToken(state, &tokenValue, &comment, &line, status); | 891 token = getToken(state, &tokenValue, &comment, &line, status); |
845 | 892 |
846 if (token == TOK_CLOSE_BRACE) | 893 if (token == TOK_CLOSE_BRACE) |
847 { | 894 { |
848 return result; | 895 break; |
849 } | 896 } |
850 | 897 |
851 if (token != TOK_STRING) | 898 if (token != TOK_STRING) |
852 { | 899 { |
853 res_close(result); | 900 res_close(result); |
854 *status = U_INVALID_FORMAT_ERROR; | 901 *status = U_INVALID_FORMAT_ERROR; |
855 | 902 |
856 if (token == TOK_EOF) | 903 if (token == TOK_EOF) |
857 { | 904 { |
858 error(startline, "unterminated table"); | 905 error(startline, "unterminated table"); |
(...skipping 14 matching lines...) Expand all Loading... |
873 return NULL; | 920 return NULL; |
874 } | 921 } |
875 | 922 |
876 member = parseResource(state, subtag, NULL, status); | 923 member = parseResource(state, subtag, NULL, status); |
877 | 924 |
878 if (U_FAILURE(*status)) | 925 if (U_FAILURE(*status)) |
879 { | 926 { |
880 res_close(result); | 927 res_close(result); |
881 return NULL; | 928 return NULL; |
882 } | 929 } |
883 | 930 if (result == NULL) |
884 if (uprv_strcmp(subtag, "Version") == 0) | 931 { |
| 932 // Ignore the parsed resources, continue parsing. |
| 933 } |
| 934 else if (uprv_strcmp(subtag, "Version") == 0) |
885 { | 935 { |
886 char ver[40]; | 936 char ver[40]; |
887 int32_t length = member->u.fString.fLength; | 937 int32_t length = member->u.fString.fLength; |
888 | 938 |
889 if (length >= (int32_t) sizeof(ver)) | 939 if (length >= (int32_t) sizeof(ver)) |
890 { | 940 { |
891 length = (int32_t) sizeof(ver) - 1; | 941 length = (int32_t) sizeof(ver) - 1; |
892 } | 942 } |
893 | 943 |
894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 fo
r copying NULL */ | 944 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 fo
r copying NULL */ |
895 u_versionFromString(version, ver); | 945 u_versionFromString(version, ver); |
896 | 946 |
897 table_add(result, member, line, status); | 947 table_add(result, member, line, status); |
898 | 948 member = NULL; |
899 } | |
900 else if (uprv_strcmp(subtag, "Override") == 0) | |
901 { | |
902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue,
u_strlen(trueValue)) == 0); | |
903 table_add(result, member, line, status); | |
904 | |
905 } | 949 } |
906 else if(uprv_strcmp(subtag, "%%CollationBin")==0) | 950 else if(uprv_strcmp(subtag, "%%CollationBin")==0) |
907 { | 951 { |
908 /* discard duplicate %%CollationBin if any*/ | 952 /* discard duplicate %%CollationBin if any*/ |
909 } | 953 } |
910 else if (uprv_strcmp(subtag, "Sequence") == 0) | 954 else if (uprv_strcmp(subtag, "Sequence") == 0) |
911 { | 955 { |
912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO | 956 rules.setTo(member->u.fString.fChars, member->u.fString.fLength); |
913 warning(line, "Not building collation elements because of UCONFIG_NO
_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); | 957 haveRules = TRUE; |
914 #else | 958 // Defer building the collator until we have seen |
915 if(state->makeBinaryCollation) { | 959 // all sub-elements of the collation table, including the Version. |
916 | |
917 /* do the collation elements */ | |
918 int32_t len = 0; | |
919 uint8_t *data = NULL; | |
920 UCollator *coll = NULL; | |
921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIM
IT - UCOL_REORDER_CODE_FIRST)]; | |
922 int32_t reorderCodeCount; | |
923 int32_t reorderCodeIndex; | |
924 UParseError parseError; | |
925 | |
926 genrbdata.inputDir = state->inputdir; | |
927 genrbdata.outputDir = state->outputdir; | |
928 | |
929 UErrorCode intStatus = U_ZERO_ERROR; | |
930 uprv_memset(&parseError, 0, sizeof(parseError)); | |
931 coll = ucol_openRulesForImport(member->u.fString.fChars, member-
>u.fString.fLength, | |
932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&
parseError, importFromDataFile, &genrbdata, &intStatus); | |
933 | |
934 if (U_SUCCESS(intStatus) && coll != NULL) | |
935 { | |
936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); | |
937 data = (uint8_t *)uprv_malloc(len); | |
938 intStatus = U_ZERO_ERROR; | |
939 len = ucol_cloneBinary(coll, data, len, &intStatus); | |
940 | |
941 /* tailoring rules version */ | |
942 /* This is wrong! */ | |
943 /*coll->dataInfo.dataVersion[1] = version[0];*/ | |
944 /* Copy tailoring version. Builder version already */ | |
945 /* set in ucol_openRules */ | |
946 ((UCATableHeader *)data)->version[1] = version[0]; | |
947 ((UCATableHeader *)data)->version[2] = version[1]; | |
948 ((UCATableHeader *)data)->version[3] = version[2]; | |
949 | |
950 if (U_SUCCESS(intStatus) && data != NULL) | |
951 { | |
952 struct SResource *collationBin = bin_open(state->bundle,
"%%CollationBin", len, data, NULL, NULL, status); | |
953 table_add(result, collationBin, line, status); | |
954 uprv_free(data); | |
955 | |
956 reorderCodeCount = ucol_getReorderCodes( | |
957 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORD
ER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus); | |
958 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) { | |
959 struct SResource *reorderCodeRes = intvector_open(st
ate->bundle, "%%ReorderCodes", NULL, status); | |
960 for (reorderCodeIndex = 0; reorderCodeIndex < reorde
rCodeCount; reorderCodeIndex++) { | |
961 intvector_add(reorderCodeRes, reorderCodes[reord
erCodeIndex], status); | |
962 } | |
963 table_add(result, reorderCodeRes, line, status); | |
964 } | |
965 } | |
966 else | |
967 { | |
968 warning(line, "could not obtain rules from collator"); | |
969 if(isStrict()){ | |
970 *status = U_INVALID_FORMAT_ERROR; | |
971 return NULL; | |
972 } | |
973 } | |
974 | |
975 ucol_close(coll); | |
976 } | |
977 else | |
978 { | |
979 if(intStatus == U_FILE_ACCESS_ERROR) { | |
980 error(startline, "Collation could not be built- U_FILE_A
CCESS_ERROR. Make sure ICU's data has been built and is loading properly."); | |
981 *status = intStatus; | |
982 return NULL; | |
983 } | |
984 char preBuffer[100], postBuffer[100]; | |
985 escape(parseError.preContext, preBuffer); | |
986 escape(parseError.postContext, postBuffer); | |
987 warning(line, | |
988 "%%%%CollationBin could not be constructed from Coll
ationElements\n" | |
989 " check context, check that the FractionalUCA.txt U
CA version " | |
990 "matches the current UCD version\n" | |
991 " UErrorCode=%s UParseError={ line=%d offset=%d pr
e=<> post=<> }", | |
992 u_errorName(intStatus), | |
993 parseError.line, | |
994 parseError.offset, | |
995 preBuffer, | |
996 postBuffer); | |
997 if(isStrict()){ | |
998 *status = intStatus; | |
999 return NULL; | |
1000 } | |
1001 } | |
1002 } else { | |
1003 if(isVerbose()) { | |
1004 printf("Not building Collation binary\n"); | |
1005 } | |
1006 } | |
1007 #endif | |
1008 /* in order to achieve smaller data files, we can direct genrb */ | 960 /* in order to achieve smaller data files, we can direct genrb */ |
1009 /* to omit collation rules */ | 961 /* to omit collation rules */ |
1010 if(gOmitCollationRules) { | 962 if(!state->omitCollationRules) { |
1011 bundle_closeString(state->bundle, member); | |
1012 } else { | |
1013 table_add(result, member, line, status); | 963 table_add(result, member, line, status); |
| 964 member = NULL; |
1014 } | 965 } |
1015 } | 966 } |
| 967 else // Just copy non-special items. |
| 968 { |
| 969 table_add(result, member, line, status); |
| 970 member = NULL; |
| 971 } |
| 972 res_close(member); // TODO: use LocalPointer |
1016 if (U_FAILURE(*status)) | 973 if (U_FAILURE(*status)) |
1017 { | 974 { |
1018 res_close(result); | 975 res_close(result); |
1019 return NULL; | 976 return NULL; |
1020 } | 977 } |
1021 } | 978 } |
1022 | 979 |
1023 // Reached the end without a TOK_CLOSE_BRACE. Should be an error. | 980 if (!haveRules) { return result; } |
1024 *status = U_INTERNAL_PROGRAM_ERROR; | 981 |
1025 return NULL; | 982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO |
| 983 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATI
ON and/or UCONFIG_NO_FILE_IO, see uconfig.h"); |
| 984 (void)collationType; |
| 985 #else |
| 986 // CLDR ticket #3949, ICU ticket #8082: |
| 987 // Do not build collation binary data for for-import-only "private" collatio
n rule strings. |
| 988 if (uprv_strncmp(collationType, "private-", 8) == 0) { |
| 989 if(isVerbose()) { |
| 990 printf("Not building %s~%s collation binary\n", state->filename, col
lationType); |
| 991 } |
| 992 return result; |
| 993 } |
| 994 |
| 995 if(!state->makeBinaryCollation) { |
| 996 if(isVerbose()) { |
| 997 printf("Not building %s~%s collation binary\n", state->filename, col
lationType); |
| 998 } |
| 999 return result; |
| 1000 } |
| 1001 UErrorCode intStatus = U_ZERO_ERROR; |
| 1002 UParseError parseError; |
| 1003 uprv_memset(&parseError, 0, sizeof(parseError)); |
| 1004 GenrbImporter importer(state->inputdir, state->outputdir); |
| 1005 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus)
; |
| 1006 if(U_FAILURE(intStatus)) { |
| 1007 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorNa
me(intStatus)); |
| 1008 res_close(result); |
| 1009 return NULL; // TODO: use LocalUResourceBundlePointer for result |
| 1010 } |
| 1011 icu::CollationBuilder builder(base, intStatus); |
| 1012 if(uprv_strncmp(collationType, "search", 6) == 0) { |
| 1013 builder.disableFastLatin(); // build fast-Latin table unless search col
lator |
| 1014 } |
| 1015 LocalPointer<icu::CollationTailoring> t( |
| 1016 builder.parseAndBuild(rules, version, &importer, &parseError, intSta
tus)); |
| 1017 if(U_FAILURE(intStatus)) { |
| 1018 const char *reason = builder.getErrorReason(); |
| 1019 if(reason == NULL) { reason = ""; } |
| 1020 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld:
%s %s", |
| 1021 state->filename, collationType, |
| 1022 (long)parseError.offset, u_errorName(intStatus), reason); |
| 1023 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { |
| 1024 // Print pre- and post-context. |
| 1025 char preBuffer[100], postBuffer[100]; |
| 1026 escape(parseError.preContext, preBuffer); |
| 1027 escape(parseError.postContext, postBuffer); |
| 1028 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, pos
tBuffer); |
| 1029 } |
| 1030 if(isStrict()) { |
| 1031 *status = intStatus; |
| 1032 res_close(result); |
| 1033 return NULL; |
| 1034 } |
| 1035 } |
| 1036 icu::LocalMemory<uint8_t> buffer; |
| 1037 int32_t capacity = 100000; |
| 1038 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity); |
| 1039 if(dest == NULL) { |
| 1040 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\
n", |
| 1041 (long)capacity); |
| 1042 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1043 res_close(result); |
| 1044 return NULL; |
| 1045 } |
| 1046 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1]; |
| 1047 int32_t totalSize = icu::CollationDataWriter::writeTailoring( |
| 1048 *t, *t->settings, indexes, dest, capacity, intStatus); |
| 1049 if(intStatus == U_BUFFER_OVERFLOW_ERROR) { |
| 1050 intStatus = U_ZERO_ERROR; |
| 1051 capacity = totalSize; |
| 1052 dest = buffer.allocateInsteadAndCopy(capacity); |
| 1053 if(dest == NULL) { |
| 1054 fprintf(stderr, "memory allocation (%ld bytes) for file contents fai
led\n", |
| 1055 (long)capacity); |
| 1056 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1057 res_close(result); |
| 1058 return NULL; |
| 1059 } |
| 1060 totalSize = icu::CollationDataWriter::writeTailoring( |
| 1061 *t, *t->settings, indexes, dest, capacity, intStatus); |
| 1062 } |
| 1063 if(U_FAILURE(intStatus)) { |
| 1064 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n", |
| 1065 u_errorName(intStatus)); |
| 1066 res_close(result); |
| 1067 return NULL; |
| 1068 } |
| 1069 if(isVerbose()) { |
| 1070 printf("%s~%s collation tailoring part sizes:\n", state->filename, colla
tionType); |
| 1071 icu::CollationInfo::printSizes(totalSize, indexes); |
| 1072 } |
| 1073 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", t
otalSize, dest, NULL, NULL, status); |
| 1074 table_add(result, collationBin, line, status); |
| 1075 if (U_FAILURE(*status)) { |
| 1076 res_close(result); |
| 1077 return NULL; |
| 1078 } |
| 1079 #endif |
| 1080 return result; |
| 1081 } |
| 1082 |
| 1083 static UBool |
| 1084 keepCollationType(const char * /*type*/) { |
| 1085 return TRUE; |
1026 } | 1086 } |
1027 | 1087 |
1028 static struct SResource * | 1088 static struct SResource * |
1029 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n
ewCollation, UErrorCode *status) | 1089 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n
ewCollation, UErrorCode *status) |
1030 { | 1090 { |
1031 struct SResource *result = NULL; | 1091 struct SResource *result = NULL; |
1032 struct SResource *member = NULL; | 1092 struct SResource *member = NULL; |
1033 struct SResource *collationRes = NULL; | 1093 struct SResource *collationRes = NULL; |
1034 struct UString *tokenValue; | 1094 struct UString *tokenValue; |
1035 struct UString comment; | 1095 struct UString comment; |
1036 enum ETokenType token; | 1096 enum ETokenType token; |
1037 char subtag[1024], typeKeyword[1024]; | 1097 char subtag[1024], typeKeyword[1024]; |
1038 uint32_t line; | 1098 uint32_t line; |
1039 | 1099 |
1040 result = table_open(state->bundle, tag, NULL, status); | 1100 result = table_open(state->bundle, tag, NULL, status); |
1041 | 1101 |
1042 if (result == NULL || U_FAILURE(*status)) | 1102 if (result == NULL || U_FAILURE(*status)) |
1043 { | 1103 { |
1044 return NULL; | 1104 return NULL; |
1045 } | 1105 } |
1046 if(isVerbose()){ | 1106 if(isVerbose()){ |
1047 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)"
: tag, (int)startline); | 1107 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)"
: tag, (int)startline); |
1048 } | 1108 } |
1049 if(!newCollation) { | 1109 if(!newCollation) { |
1050 return addCollation(state, result, startline, status); | 1110 return addCollation(state, result, "(no type)", startline, status); |
1051 } | 1111 } |
1052 else { | 1112 else { |
1053 for(;;) { | 1113 for(;;) { |
1054 ustr_init(&comment); | 1114 ustr_init(&comment); |
1055 token = getToken(state, &tokenValue, &comment, &line, status); | 1115 token = getToken(state, &tokenValue, &comment, &line, status); |
1056 | 1116 |
1057 if (token == TOK_CLOSE_BRACE) | 1117 if (token == TOK_CLOSE_BRACE) |
1058 { | 1118 { |
1059 return result; | 1119 return result; |
1060 } | 1120 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1097 table_add(result, member, line, status); | 1157 table_add(result, member, line, status); |
1098 } | 1158 } |
1099 else | 1159 else |
1100 { | 1160 { |
1101 token = peekToken(state, 0, &tokenValue, &line, &comment, status
); | 1161 token = peekToken(state, 0, &tokenValue, &line, &comment, status
); |
1102 /* this probably needs to be refactored or recursively use the p
arser */ | 1162 /* this probably needs to be refactored or recursively use the p
arser */ |
1103 /* first we assume that our collation table won't have the expli
cit type */ | 1163 /* first we assume that our collation table won't have the expli
cit type */ |
1104 /* then, we cannot handle aliases */ | 1164 /* then, we cannot handle aliases */ |
1105 if(token == TOK_OPEN_BRACE) { | 1165 if(token == TOK_OPEN_BRACE) { |
1106 token = getToken(state, &tokenValue, &comment, &line, status
); | 1166 token = getToken(state, &tokenValue, &comment, &line, status
); |
1107 collationRes = table_open(state->bundle, subtag, NULL, statu
s); | 1167 if (keepCollationType(subtag)) { |
1108 collationRes = addCollation(state, collationRes, startline,
status); /* need to parse the collation data regardless */ | 1168 collationRes = table_open(state->bundle, subtag, NULL, s
tatus); |
1109 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0
) { | 1169 } else { |
| 1170 collationRes = NULL; |
| 1171 } |
| 1172 // need to parse the collation data regardless |
| 1173 collationRes = addCollation(state, collationRes, subtag, sta
rtline, status); |
| 1174 if (collationRes != NULL) { |
1110 table_add(result, collationRes, startline, status); | 1175 table_add(result, collationRes, startline, status); |
1111 } | 1176 } |
1112 } else if(token == TOK_COLON) { /* right now, we'll just try to
see if we have aliases */ | 1177 } else if(token == TOK_COLON) { /* right now, we'll just try to
see if we have aliases */ |
1113 /* we could have a table too */ | 1178 /* we could have a table too */ |
1114 token = peekToken(state, 1, &tokenValue, &line, &comment, st
atus); | 1179 token = peekToken(state, 1, &tokenValue, &line, &comment, st
atus); |
1115 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(to
kenValue->fChars) + 1); | 1180 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(to
kenValue->fChars) + 1); |
1116 if(uprv_strcmp(typeKeyword, "alias") == 0) { | 1181 if(uprv_strcmp(typeKeyword, "alias") == 0) { |
1117 member = parseResource(state, subtag, NULL, status); | 1182 member = parseResource(state, subtag, NULL, status); |
1118 if (U_FAILURE(*status)) | 1183 if (U_FAILURE(*status)) |
1119 { | 1184 { |
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1791 U_STRING_DECL(k_type_include, "include", 7); | 1856 U_STRING_DECL(k_type_include, "include", 7); |
1792 | 1857 |
1793 /* Various non-standard processing plugins that create one or more special resou
rces. */ | 1858 /* Various non-standard processing plugins that create one or more special resou
rces. */ |
1794 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); | 1859 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); |
1795 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); | 1860 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); |
1796 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); | 1861 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); |
1797 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); | 1862 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); |
1798 | 1863 |
1799 typedef enum EResourceType | 1864 typedef enum EResourceType |
1800 { | 1865 { |
1801 RT_UNKNOWN, | 1866 RESTYPE_UNKNOWN, |
1802 RT_STRING, | 1867 RESTYPE_STRING, |
1803 RT_BINARY, | 1868 RESTYPE_BINARY, |
1804 RT_TABLE, | 1869 RESTYPE_TABLE, |
1805 RT_TABLE_NO_FALLBACK, | 1870 RESTYPE_TABLE_NO_FALLBACK, |
1806 RT_INTEGER, | 1871 RESTYPE_INTEGER, |
1807 RT_ARRAY, | 1872 RESTYPE_ARRAY, |
1808 RT_ALIAS, | 1873 RESTYPE_ALIAS, |
1809 RT_INTVECTOR, | 1874 RESTYPE_INTVECTOR, |
1810 RT_IMPORT, | 1875 RESTYPE_IMPORT, |
1811 RT_INCLUDE, | 1876 RESTYPE_INCLUDE, |
1812 RT_PROCESS_UCA_RULES, | 1877 RESTYPE_PROCESS_UCA_RULES, |
1813 RT_PROCESS_COLLATION, | 1878 RESTYPE_PROCESS_COLLATION, |
1814 RT_PROCESS_TRANSLITERATOR, | 1879 RESTYPE_PROCESS_TRANSLITERATOR, |
1815 RT_PROCESS_DEPENDENCY, | 1880 RESTYPE_PROCESS_DEPENDENCY, |
1816 RT_RESERVED | 1881 RESTYPE_RESERVED |
1817 } EResourceType; | 1882 } EResourceType; |
1818 | 1883 |
1819 static struct { | 1884 static struct { |
1820 const char *nameChars; /* only used for debugging */ | 1885 const char *nameChars; /* only used for debugging */ |
1821 const UChar *nameUChars; | 1886 const UChar *nameUChars; |
1822 ParseResourceFunction *parseFunction; | 1887 ParseResourceFunction *parseFunction; |
1823 } gResourceTypes[] = { | 1888 } gResourceTypes[] = { |
1824 {"Unknown", NULL, NULL}, | 1889 {"Unknown", NULL, NULL}, |
1825 {"string", k_type_string, parseString}, | 1890 {"string", k_type_string, parseString}, |
1826 {"binary", k_type_binary, parseBinary}, | 1891 {"binary", k_type_binary, parseBinary}, |
1827 {"table", k_type_table, parseTable}, | 1892 {"table", k_type_table, parseTable}, |
1828 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will
never be called */ | 1893 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will
never be called */ |
1829 {"integer", k_type_integer, parseInteger}, | 1894 {"integer", k_type_integer, parseInteger}, |
1830 {"array", k_type_array, parseArray}, | 1895 {"array", k_type_array, parseArray}, |
1831 {"alias", k_type_alias, parseAlias}, | 1896 {"alias", k_type_alias, parseAlias}, |
1832 {"intvector", k_type_intvector, parseIntVector}, | 1897 {"intvector", k_type_intvector, parseIntVector}, |
1833 {"import", k_type_import, parseImport}, | 1898 {"import", k_type_import, parseImport}, |
1834 {"include", k_type_include, parseInclude}, | 1899 {"include", k_type_include, parseInclude}, |
1835 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, | 1900 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, |
1836 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet
*/}, | 1901 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet
*/}, |
1837 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterato
r}, | 1902 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterato
r}, |
1838 {"process(dependency)", k_type_plugin_dependency, parseDependency}, | 1903 {"process(dependency)", k_type_plugin_dependency, parseDependency}, |
1839 {"reserved", NULL, NULL} | 1904 {"reserved", NULL, NULL} |
1840 }; | 1905 }; |
1841 | 1906 |
1842 void initParser(UBool omitCollationRules) | 1907 void initParser() |
1843 { | 1908 { |
1844 U_STRING_INIT(k_type_string, "string", 6); | 1909 U_STRING_INIT(k_type_string, "string", 6); |
1845 U_STRING_INIT(k_type_binary, "binary", 6); | 1910 U_STRING_INIT(k_type_binary, "binary", 6); |
1846 U_STRING_INIT(k_type_bin, "bin", 3); | 1911 U_STRING_INIT(k_type_bin, "bin", 3); |
1847 U_STRING_INIT(k_type_table, "table", 5); | 1912 U_STRING_INIT(k_type_table, "table", 5); |
1848 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17)
; | 1913 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17)
; |
1849 U_STRING_INIT(k_type_int, "int", 3); | 1914 U_STRING_INIT(k_type_int, "int", 3); |
1850 U_STRING_INIT(k_type_integer, "integer", 7); | 1915 U_STRING_INIT(k_type_integer, "integer", 7); |
1851 U_STRING_INIT(k_type_array, "array", 5); | 1916 U_STRING_INIT(k_type_array, "array", 5); |
1852 U_STRING_INIT(k_type_alias, "alias", 5); | 1917 U_STRING_INIT(k_type_alias, "alias", 5); |
1853 U_STRING_INIT(k_type_intvector, "intvector", 9); | 1918 U_STRING_INIT(k_type_intvector, "intvector", 9); |
1854 U_STRING_INIT(k_type_import, "import", 6); | 1919 U_STRING_INIT(k_type_import, "import", 6); |
1855 U_STRING_INIT(k_type_include, "include", 7); | 1920 U_STRING_INIT(k_type_include, "include", 7); |
1856 | 1921 |
1857 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18)
; | 1922 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18)
; |
1858 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18)
; | 1923 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18)
; |
1859 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23)
; | 1924 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23)
; |
1860 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19)
; | 1925 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19)
; |
1861 | |
1862 gOmitCollationRules = omitCollationRules; | |
1863 } | 1926 } |
1864 | 1927 |
1865 static inline UBool isTable(enum EResourceType type) { | 1928 static inline UBool isTable(enum EResourceType type) { |
1866 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); | 1929 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK); |
1867 } | 1930 } |
1868 | 1931 |
1869 static enum EResourceType | 1932 static enum EResourceType |
1870 parseResourceType(ParseState* state, UErrorCode *status) | 1933 parseResourceType(ParseState* state, UErrorCode *status) |
1871 { | 1934 { |
1872 struct UString *tokenValue; | 1935 struct UString *tokenValue; |
1873 struct UString comment; | 1936 struct UString comment; |
1874 enum EResourceType result = RT_UNKNOWN; | 1937 enum EResourceType result = RESTYPE_UNKNOWN; |
1875 uint32_t line=0; | 1938 uint32_t line=0; |
1876 ustr_init(&comment); | 1939 ustr_init(&comment); |
1877 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); | 1940 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); |
1878 | 1941 |
1879 if (U_FAILURE(*status)) | 1942 if (U_FAILURE(*status)) |
1880 { | 1943 { |
1881 return RT_UNKNOWN; | 1944 return RESTYPE_UNKNOWN; |
1882 } | 1945 } |
1883 | 1946 |
1884 *status = U_ZERO_ERROR; | 1947 *status = U_ZERO_ERROR; |
1885 | 1948 |
1886 /* Search for normal types */ | 1949 /* Search for normal types */ |
1887 result=RT_UNKNOWN; | 1950 result=RESTYPE_UNKNOWN; |
1888 while ((result=(EResourceType)(result+1)) < RT_RESERVED) { | 1951 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) { |
1889 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0
) { | 1952 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0
) { |
1890 break; | 1953 break; |
1891 } | 1954 } |
1892 } | 1955 } |
1893 /* Now search for the aliases */ | 1956 /* Now search for the aliases */ |
1894 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { | 1957 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { |
1895 result = RT_INTEGER; | 1958 result = RESTYPE_INTEGER; |
1896 } | 1959 } |
1897 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { | 1960 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { |
1898 result = RT_BINARY; | 1961 result = RESTYPE_BINARY; |
1899 } | 1962 } |
1900 else if (result == RT_RESERVED) { | 1963 else if (result == RESTYPE_RESERVED) { |
1901 char tokenBuffer[1024]; | 1964 char tokenBuffer[1024]; |
1902 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); | 1965 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); |
1903 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; | 1966 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; |
1904 *status = U_INVALID_FORMAT_ERROR; | 1967 *status = U_INVALID_FORMAT_ERROR; |
1905 error(line, "unknown resource type '%s'", tokenBuffer); | 1968 error(line, "unknown resource type '%s'", tokenBuffer); |
1906 } | 1969 } |
1907 | 1970 |
1908 return result; | 1971 return result; |
1909 } | 1972 } |
1910 | 1973 |
1911 /* parse a non-top-level resource */ | 1974 /* parse a non-top-level resource */ |
1912 static struct SResource * | 1975 static struct SResource * |
1913 parseResource(ParseState* state, char *tag, const struct UString *comment, UErro
rCode *status) | 1976 parseResource(ParseState* state, char *tag, const struct UString *comment, UErro
rCode *status) |
1914 { | 1977 { |
1915 enum ETokenType token; | 1978 enum ETokenType token; |
1916 enum EResourceType resType = RT_UNKNOWN; | 1979 enum EResourceType resType = RESTYPE_UNKNOWN; |
1917 ParseResourceFunction *parseFunction = NULL; | 1980 ParseResourceFunction *parseFunction = NULL; |
1918 struct UString *tokenValue; | 1981 struct UString *tokenValue; |
1919 uint32_t startline; | 1982 uint32_t startline; |
1920 uint32_t line; | 1983 uint32_t line; |
1921 | 1984 |
1922 | 1985 |
1923 token = getToken(state, &tokenValue, NULL, &startline, status); | 1986 token = getToken(state, &tokenValue, NULL, &startline, status); |
1924 | 1987 |
1925 if(isVerbose()){ | 1988 if(isVerbose()){ |
1926 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (i
nt)startline); | 1989 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (i
nt)startline); |
(...skipping 28 matching lines...) Expand all Loading... |
1955 case TOK_OPEN_BRACE: | 2018 case TOK_OPEN_BRACE: |
1956 break; | 2019 break; |
1957 | 2020 |
1958 default: | 2021 default: |
1959 *status = U_INVALID_FORMAT_ERROR; | 2022 *status = U_INVALID_FORMAT_ERROR; |
1960 error(startline, "syntax error while reading a resource, expected '{' or
':'"); | 2023 error(startline, "syntax error while reading a resource, expected '{' or
':'"); |
1961 return NULL; | 2024 return NULL; |
1962 } | 2025 } |
1963 | 2026 |
1964 | 2027 |
1965 if (resType == RT_UNKNOWN) | 2028 if (resType == RESTYPE_UNKNOWN) |
1966 { | 2029 { |
1967 /* No explicit type, so try to work it out. At this point, we've read t
he first '{'. | 2030 /* No explicit type, so try to work it out. At this point, we've read t
he first '{'. |
1968 We could have any of the following: | 2031 We could have any of the following: |
1969 { { => array (nested) | 2032 { { => array (nested) |
1970 { :/} => array | 2033 { :/} => array |
1971 { string , => string array | 2034 { string , => string array |
1972 | 2035 |
1973 { string { => table | 2036 { string { => table |
1974 | 2037 |
1975 { string :/{ => table | 2038 { string :/{ => table |
1976 { string } => string | 2039 { string } => string |
1977 */ | 2040 */ |
1978 | 2041 |
1979 token = peekToken(state, 0, NULL, &line, NULL,status); | 2042 token = peekToken(state, 0, NULL, &line, NULL,status); |
1980 | 2043 |
1981 if (U_FAILURE(*status)) | 2044 if (U_FAILURE(*status)) |
1982 { | 2045 { |
1983 return NULL; | 2046 return NULL; |
1984 } | 2047 } |
1985 | 2048 |
1986 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BR
ACE ) | 2049 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BR
ACE ) |
1987 { | 2050 { |
1988 resType = RT_ARRAY; | 2051 resType = RESTYPE_ARRAY; |
1989 } | 2052 } |
1990 else if (token == TOK_STRING) | 2053 else if (token == TOK_STRING) |
1991 { | 2054 { |
1992 token = peekToken(state, 1, NULL, &line, NULL, status); | 2055 token = peekToken(state, 1, NULL, &line, NULL, status); |
1993 | 2056 |
1994 if (U_FAILURE(*status)) | 2057 if (U_FAILURE(*status)) |
1995 { | 2058 { |
1996 return NULL; | 2059 return NULL; |
1997 } | 2060 } |
1998 | 2061 |
1999 switch (token) | 2062 switch (token) |
2000 { | 2063 { |
2001 case TOK_COMMA: resType = RT_ARRAY; break; | 2064 case TOK_COMMA: resType = RESTYPE_ARRAY; break; |
2002 case TOK_OPEN_BRACE: resType = RT_TABLE; break; | 2065 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break; |
2003 case TOK_CLOSE_BRACE: resType = RT_STRING; break; | 2066 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break; |
2004 case TOK_COLON: resType = RT_TABLE; break; | 2067 case TOK_COLON: resType = RESTYPE_TABLE; break; |
2005 default: | 2068 default: |
2006 *status = U_INVALID_FORMAT_ERROR; | 2069 *status = U_INVALID_FORMAT_ERROR; |
2007 error(line, "Unexpected token after string, expected ',', '{' or
'}'"); | 2070 error(line, "Unexpected token after string, expected ',', '{' or
'}'"); |
2008 return NULL; | 2071 return NULL; |
2009 } | 2072 } |
2010 } | 2073 } |
2011 else | 2074 else |
2012 { | 2075 { |
2013 *status = U_INVALID_FORMAT_ERROR; | 2076 *status = U_INVALID_FORMAT_ERROR; |
2014 error(line, "Unexpected token after '{'"); | 2077 error(line, "Unexpected token after '{'"); |
2015 return NULL; | 2078 return NULL; |
2016 } | 2079 } |
2017 | 2080 |
2018 /* printf("Type guessed as %s\n", resourceNames[resType]); */ | 2081 /* printf("Type guessed as %s\n", resourceNames[resType]); */ |
2019 } else if(resType == RT_TABLE_NO_FALLBACK) { | 2082 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) { |
2020 *status = U_INVALID_FORMAT_ERROR; | 2083 *status = U_INVALID_FORMAT_ERROR; |
2021 error(startline, "error: %s resource type not valid except on top bundle
level", gResourceTypes[resType].nameChars); | 2084 error(startline, "error: %s resource type not valid except on top bundle
level", gResourceTypes[resType].nameChars); |
2022 return NULL; | 2085 return NULL; |
2023 } | 2086 } |
2024 | 2087 |
2025 | 2088 |
2026 /* We should now know what we need to parse next, so call the appropriate pa
rser | 2089 /* We should now know what we need to parse next, so call the appropriate pa
rser |
2027 function and return. */ | 2090 function and return. */ |
2028 parseFunction = gResourceTypes[resType].parseFunction; | 2091 parseFunction = gResourceTypes[resType].parseFunction; |
2029 if (parseFunction != NULL) { | 2092 if (parseFunction != NULL) { |
2030 return parseFunction(state, tag, startline, comment, status); | 2093 return parseFunction(state, tag, startline, comment, status); |
2031 } | 2094 } |
2032 else { | 2095 else { |
2033 *status = U_INTERNAL_PROGRAM_ERROR; | 2096 *status = U_INTERNAL_PROGRAM_ERROR; |
2034 error(startline, "internal error: %s resource type found and not handled
", gResourceTypes[resType].nameChars); | 2097 error(startline, "internal error: %s resource type found and not handled
", gResourceTypes[resType].nameChars); |
2035 } | 2098 } |
2036 | 2099 |
2037 return NULL; | 2100 return NULL; |
2038 } | 2101 } |
2039 | 2102 |
2040 /* parse the top-level resource */ | 2103 /* parse the top-level resource */ |
2041 struct SRBRoot * | 2104 struct SRBRoot * |
2042 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBina
ryCollation, | 2105 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *fi
lename, |
2043 UErrorCode *status) | 2106 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status) |
2044 { | 2107 { |
2045 struct UString *tokenValue; | 2108 struct UString *tokenValue; |
2046 struct UString comment; | 2109 struct UString comment; |
2047 uint32_t line; | 2110 uint32_t line; |
2048 enum EResourceType bundleType; | 2111 enum EResourceType bundleType; |
2049 enum ETokenType token; | 2112 enum ETokenType token; |
2050 ParseState state; | 2113 ParseState state; |
2051 uint32_t i; | 2114 uint32_t i; |
2052 | 2115 |
2053 | 2116 |
2054 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) | 2117 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) |
2055 { | 2118 { |
2056 ustr_init(&state.lookahead[i].value); | 2119 ustr_init(&state.lookahead[i].value); |
2057 ustr_init(&state.lookahead[i].comment); | 2120 ustr_init(&state.lookahead[i].comment); |
2058 } | 2121 } |
2059 | 2122 |
2060 initLookahead(&state, buf, status); | 2123 initLookahead(&state, buf, status); |
2061 | 2124 |
2062 state.inputdir = inputDir; | 2125 state.inputdir = inputDir; |
2063 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(stat
e.inputdir) : 0; | 2126 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(stat
e.inputdir) : 0; |
2064 state.outputdir = outputDir; | 2127 state.outputdir = outputDir; |
2065 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(st
ate.outputdir) : 0; | 2128 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(st
ate.outputdir) : 0; |
| 2129 state.filename = filename; |
2066 state.makeBinaryCollation = makeBinaryCollation; | 2130 state.makeBinaryCollation = makeBinaryCollation; |
| 2131 state.omitCollationRules = omitCollationRules; |
2067 | 2132 |
2068 ustr_init(&comment); | 2133 ustr_init(&comment); |
2069 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); | 2134 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); |
2070 | 2135 |
2071 state.bundle = bundle_open(&comment, FALSE, status); | 2136 state.bundle = bundle_open(&comment, FALSE, status); |
2072 | 2137 |
2073 if (state.bundle == NULL || U_FAILURE(*status)) | 2138 if (state.bundle == NULL || U_FAILURE(*status)) |
2074 { | 2139 { |
2075 return NULL; | 2140 return NULL; |
2076 } | 2141 } |
(...skipping 16 matching lines...) Expand all Loading... |
2093 *status=U_PARSE_ERROR; | 2158 *status=U_PARSE_ERROR; |
2094 error(line, "parse error. Stopped parsing with %s", u_errorName(*st
atus)); | 2159 error(line, "parse error. Stopped parsing with %s", u_errorName(*st
atus)); |
2095 } | 2160 } |
2096 } | 2161 } |
2097 else | 2162 else |
2098 { | 2163 { |
2099 /* not a colon */ | 2164 /* not a colon */ |
2100 if(token==TOK_OPEN_BRACE) | 2165 if(token==TOK_OPEN_BRACE) |
2101 { | 2166 { |
2102 *status=U_ZERO_ERROR; | 2167 *status=U_ZERO_ERROR; |
2103 bundleType=RT_TABLE; | 2168 bundleType=RESTYPE_TABLE; |
2104 } | 2169 } |
2105 else | 2170 else |
2106 { | 2171 { |
2107 /* neither colon nor open brace */ | 2172 /* neither colon nor open brace */ |
2108 *status=U_PARSE_ERROR; | 2173 *status=U_PARSE_ERROR; |
2109 bundleType=RT_UNKNOWN; | 2174 bundleType=RESTYPE_UNKNOWN; |
2110 error(line, "parse error, did not find open-brace '{' or colon ':',
stopped with %s", u_errorName(*status)); | 2175 error(line, "parse error, did not find open-brace '{' or colon ':',
stopped with %s", u_errorName(*status)); |
2111 } | 2176 } |
2112 } | 2177 } |
2113 | 2178 |
2114 if (U_FAILURE(*status)) | 2179 if (U_FAILURE(*status)) |
2115 { | 2180 { |
2116 bundle_close(state.bundle, status); | 2181 bundle_close(state.bundle, status); |
2117 return NULL; | 2182 return NULL; |
2118 } | 2183 } |
2119 | 2184 |
2120 if(bundleType==RT_TABLE_NO_FALLBACK) { | 2185 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) { |
2121 /* | 2186 /* |
2122 * Parse a top-level table with the table(nofallback) declaration. | 2187 * Parse a top-level table with the table(nofallback) declaration. |
2123 * This is the same as a regular table, but also sets the | 2188 * This is the same as a regular table, but also sets the |
2124 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . | 2189 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . |
2125 */ | 2190 */ |
2126 state.bundle->noFallback=TRUE; | 2191 state.bundle->noFallback=TRUE; |
2127 } | 2192 } |
2128 /* top-level tables need not handle special table names like "collations" */ | 2193 /* top-level tables need not handle special table names like "collations" */ |
2129 realParseTable(&state, state.bundle->fRoot, NULL, line, status); | 2194 realParseTable(&state, state.bundle->fRoot, NULL, line, status); |
2130 if(dependencyArray!=NULL){ | 2195 if(dependencyArray!=NULL){ |
(...skipping 13 matching lines...) Expand all Loading... |
2144 if(isStrict()){ | 2209 if(isStrict()){ |
2145 *status = U_INVALID_FORMAT_ERROR; | 2210 *status = U_INVALID_FORMAT_ERROR; |
2146 return NULL; | 2211 return NULL; |
2147 } | 2212 } |
2148 } | 2213 } |
2149 | 2214 |
2150 cleanupLookahead(&state); | 2215 cleanupLookahead(&state); |
2151 ustr_deinit(&comment); | 2216 ustr_deinit(&comment); |
2152 return state.bundle; | 2217 return state.bundle; |
2153 } | 2218 } |
OLD | NEW |