Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: source/tools/genrb/parse.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/tools/genrb/parse.h ('k') | source/tools/genrb/prscmnts.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * 3 *
4 * Copyright (C) 1998-2013, International Business Machines 4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ******************************************************************************* 7 *******************************************************************************
8 * 8 *
9 * File parse.cpp 9 * File parse.cpp
10 * 10 *
11 * Modification History: 11 * Modification History:
12 * 12 *
13 * Date Name Description 13 * Date Name Description
14 * 05/26/99 stephen Creation. 14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata 15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency 16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten 17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 ******************************************************************************* 18 *******************************************************************************
19 */ 19 */
20 20
21 #include "ucol_imp.h" 21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30
22 #include "parse.h" 31 #include "parse.h"
23 #include "errmsg.h" 32 #include "errmsg.h"
24 #include "uhash.h" 33 #include "uhash.h"
25 #include "cmemory.h" 34 #include "cmemory.h"
26 #include "cstring.h" 35 #include "cstring.h"
27 #include "uinvchar.h" 36 #include "uinvchar.h"
28 #include "read.h" 37 #include "read.h"
29 #include "ustr.h" 38 #include "ustr.h"
30 #include "reslist.h" 39 #include "reslist.h"
31 #include "rbt_pars.h" 40 #include "rbt_pars.h"
32 #include "genrb.h" 41 #include "genrb.h"
33 #include "unicode/ustring.h" 42 #include "unicode/ustring.h"
34 #include "unicode/uscript.h" 43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
35 #include "unicode/putil.h" 45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
36 #include <stdio.h> 55 #include <stdio.h>
37 56
38 /* Number of tokens to read ahead of the current stream position */ 57 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD 3 58 #define MAX_LOOKAHEAD 3
40 59
41 #define CR 0x000D 60 #define CR 0x000D
42 #define LF 0x000A 61 #define LF 0x000A
43 #define SPACE 0x0020 62 #define SPACE 0x0020
44 #define TAB 0x0009 63 #define TAB 0x0009
45 #define ESCAPE 0x005C 64 #define ESCAPE 0x005C
46 #define HASH 0x0023 65 #define HASH 0x0023
47 #define QUOTE 0x0027 66 #define QUOTE 0x0027
48 #define ZERO 0x0030 67 #define ZERO 0x0030
49 #define STARTCOMMAND 0x005B 68 #define STARTCOMMAND 0x005B
50 #define ENDCOMMAND 0x005D 69 #define ENDCOMMAND 0x005D
51 #define OPENSQBRACKET 0x005B 70 #define OPENSQBRACKET 0x005B
52 #define CLOSESQBRACKET 0x005D 71 #define CLOSESQBRACKET 0x005D
53 72
73 using icu::LocalPointer;
74 using icu::UnicodeString;
75
54 struct Lookahead 76 struct Lookahead
55 { 77 {
56 enum ETokenType type; 78 enum ETokenType type;
57 struct UString value; 79 struct UString value;
58 struct UString comment; 80 struct UString comment;
59 uint32_t line; 81 uint32_t line;
60 }; 82 };
61 83
62 /* keep in sync with token defines in read.h */ 84 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] = 85 const char *tokenNames[TOK_TOKEN_COUNT] =
(...skipping 13 matching lines...) Expand all
77 99
78 typedef struct { 100 typedef struct {
79 struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 101 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 uint32_t lookaheadPosition; 102 uint32_t lookaheadPosition;
81 UCHARBUF *buffer; 103 UCHARBUF *buffer;
82 struct SRBRoot *bundle; 104 struct SRBRoot *bundle;
83 const char *inputdir; 105 const char *inputdir;
84 uint32_t inputdirLength; 106 uint32_t inputdirLength;
85 const char *outputdir; 107 const char *outputdir;
86 uint32_t outputdirLength; 108 uint32_t outputdirLength;
109 const char *filename;
87 UBool makeBinaryCollation; 110 UBool makeBinaryCollation;
111 UBool omitCollationRules;
88 } ParseState; 112 } ParseState;
89 113
90 static UBool gOmitCollationRules = FALSE;
91
92 typedef struct SResource * 114 typedef struct SResource *
93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const st ruct UString* comment, UErrorCode *status); 115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const st ruct UString* comment, UErrorCode *status);
94 116
95 static struct SResource *parseResource(ParseState* state, char *tag, const struc t UString *comment, UErrorCode *status); 117 static struct SResource *parseResource(ParseState* state, char *tag, const struc t UString *comment, UErrorCode *status);
96 118
97 /* The nature of the lookahead buffer: 119 /* The nature of the lookahead buffer:
98 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
99 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100 When getToken is called, the current pointer is moved to the next slot and th e 122 When getToken is called, the current pointer is moved to the next slot and th e
101 old slot is filled with the next token from the reader by calling getNextToke n. 123 old slot is filled with the next token from the reader by calling getNextToke n.
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 338 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317 339
318 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 340 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
319 341
320 if (U_FAILURE(*status)) 342 if (U_FAILURE(*status))
321 { 343 {
322 return NULL; 344 return NULL;
323 } 345 }
324 uprv_strcat(filename, cs); 346 uprv_strcat(filename, cs);
325 347
326 if(gOmitCollationRules) { 348 if(state->omitCollationRules) {
327 return res_none(); 349 return res_none();
328 } 350 }
329 351
330 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 352 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331 353
332 if (U_FAILURE(*status)) { 354 if (U_FAILURE(*status)) {
333 error(line, "An error occured while opening the input file %s\n", filena me); 355 error(line, "An error occured while opening the input file %s\n", filena me);
334 return NULL; 356 return NULL;
335 } 357 }
336 358
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 if (U_FAILURE(*status)) 639 if (U_FAILURE(*status))
618 { 640 {
619 res_close(result); 641 res_close(result);
620 return NULL; 642 return NULL;
621 } 643 }
622 } 644 }
623 645
624 return result; 646 return result;
625 } 647 }
626 648
627 typedef struct{ 649 #if !UCONFIG_NO_COLLATION
628 const char* inputDir; 650
629 const char* outputDir; 651 namespace {
630 } GenrbData;
631 652
632 static struct SResource* resLookup(struct SResource* res, const char* key){ 653 static struct SResource* resLookup(struct SResource* res, const char* key){
633 struct SResource *current = NULL; 654 struct SResource *current = NULL;
634 struct SResTable *list; 655 struct SResTable *list;
635 if (res == res_none()) { 656 if (res == res_none()) {
636 return NULL; 657 return NULL;
637 } 658 }
638 659
639 list = &(res->u.fTable); 660 list = &(res->u.fTable);
640 661
641 current = list->fFirst; 662 current = list->fFirst;
642 while (current != NULL) { 663 while (current != NULL) {
643 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) { 664 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
644 return current; 665 return current;
645 } 666 }
646 current = current->fNext; 667 current = current->fNext;
647 } 668 }
648 return NULL; 669 return NULL;
649 } 670 }
650 671
651 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){ 672 class GenrbImporter : public icu::CollationRuleParser::Importer {
673 public:
674 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out ) {}
675 virtual ~GenrbImporter();
676 virtual void getRules(
677 const char *localeID, const char *collationType,
678 UnicodeString &rules,
679 const char *&errorReason, UErrorCode &errorCode);
680
681 private:
682 const char *inputDir;
683 const char *outputDir;
684 };
685
686 GenrbImporter::~GenrbImporter() {}
687
688 void
689 GenrbImporter::getRules(
690 const char *localeID, const char *collationType,
691 UnicodeString &rules,
692 const char *& /*errorReason*/, UErrorCode &errorCode) {
652 struct SRBRoot *data = NULL; 693 struct SRBRoot *data = NULL;
653 UCHARBUF *ucbuf = NULL; 694 UCHARBUF *ucbuf = NULL;
654 GenrbData* genrbdata = (GenrbData*) context; 695 int localeLength = strlen(localeID);
655 int localeLength = strlen(locale);
656 char* filename = (char*)uprv_malloc(localeLength+5); 696 char* filename = (char*)uprv_malloc(localeLength+5);
657 char *inputDirBuf = NULL; 697 char *inputDirBuf = NULL;
658 char *openFileName = NULL; 698 char *openFileName = NULL;
659 const char* cp = ""; 699 const char* cp = "";
660 UChar* urules = NULL;
661 int32_t urulesLength = 0;
662 int32_t i = 0; 700 int32_t i = 0;
663 int32_t dirlen = 0; 701 int32_t dirlen = 0;
664 int32_t filelen = 0; 702 int32_t filelen = 0;
665 struct SResource* root; 703 struct SResource* root;
666 struct SResource* collations; 704 struct SResource* collations;
667 struct SResource* collation; 705 struct SResource* collation;
668 struct SResource* sequence; 706 struct SResource* sequence;
669 707
670 memcpy(filename, locale, localeLength); 708 memcpy(filename, localeID, localeLength);
671 for(i = 0; i < localeLength; i++){ 709 for(i = 0; i < localeLength; i++){
672 if(filename[i] == '-'){ 710 if(filename[i] == '-'){
673 filename[i] = '_'; 711 filename[i] = '_';
674 } 712 }
675 } 713 }
676 filename[localeLength] = '.'; 714 filename[localeLength] = '.';
677 filename[localeLength+1] = 't'; 715 filename[localeLength+1] = 't';
678 filename[localeLength+2] = 'x'; 716 filename[localeLength+2] = 'x';
679 filename[localeLength+3] = 't'; 717 filename[localeLength+3] = 't';
680 filename[localeLength+4] = 0; 718 filename[localeLength+4] = 0;
681 719
682 720
683 if (status==NULL || U_FAILURE(*status)) { 721 if (U_FAILURE(errorCode)) {
684 return NULL; 722 return;
685 } 723 }
686 if(filename==NULL){ 724 if(filename==NULL){
687 *status=U_ILLEGAL_ARGUMENT_ERROR; 725 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
688 return NULL; 726 return;
689 }else{ 727 }else{
690 filelen = (int32_t)uprv_strlen(filename); 728 filelen = (int32_t)uprv_strlen(filename);
691 } 729 }
692 if(genrbdata->inputDir == NULL) { 730 if(inputDir == NULL) {
693 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR); 731 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
694 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 732 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
695 openFileName[0] = '\0'; 733 openFileName[0] = '\0';
696 if (filenameBegin != NULL) { 734 if (filenameBegin != NULL) {
697 /* 735 /*
698 * When a filename ../../../data/root.txt is specified, 736 * When a filename ../../../data/root.txt is specified,
699 * we presume that the input directory is ../../../data 737 * we presume that the input directory is ../../../data
700 * This is very important when the resource file includes 738 * This is very important when the resource file includes
701 * another file, like UCARules.txt or thaidict.brk. 739 * another file, like UCARules.txt or thaidict.brk.
702 */ 740 */
703 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1); 741 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
704 inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filena me, filenameSize); 742 inputDirBuf = (char *)uprv_malloc(filenameSize);
705 743
706 /* test for NULL */ 744 /* test for NULL */
707 if(inputDirBuf == NULL) { 745 if(inputDirBuf == NULL) {
708 *status = U_MEMORY_ALLOCATION_ERROR; 746 errorCode = U_MEMORY_ALLOCATION_ERROR;
709 goto finish; 747 goto finish;
710 } 748 }
711 749
750 uprv_strncpy(inputDirBuf, filename, filenameSize);
712 inputDirBuf[filenameSize - 1] = 0; 751 inputDirBuf[filenameSize - 1] = 0;
713 genrbdata->inputDir = inputDirBuf; 752 inputDir = inputDirBuf;
714 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 753 dirlen = (int32_t)uprv_strlen(inputDir);
715 } 754 }
716 }else{ 755 }else{
717 dirlen = (int32_t)uprv_strlen(genrbdata->inputDir); 756 dirlen = (int32_t)uprv_strlen(inputDir);
718 757
719 if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 758 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
720 openFileName = (char *) uprv_malloc(dirlen + filelen + 2); 759 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721 760
722 /* test for NULL */ 761 /* test for NULL */
723 if(openFileName == NULL) { 762 if(openFileName == NULL) {
724 *status = U_MEMORY_ALLOCATION_ERROR; 763 errorCode = U_MEMORY_ALLOCATION_ERROR;
725 goto finish; 764 goto finish;
726 } 765 }
727 766
728 openFileName[0] = '\0'; 767 openFileName[0] = '\0';
729 /* 768 /*
730 * append the input dir to openFileName if the first char in 769 * append the input dir to openFileName if the first char in
731 * filename is not file seperation char and the last char input dire ctory is not '.'. 770 * filename is not file seperation char and the last char input dire ctory is not '.'.
732 * This is to support : 771 * This is to support :
733 * genrb -s. /home/icu/data 772 * genrb -s. /home/icu/data
734 * genrb -s. icu/data 773 * genrb -s. icu/data
735 * The user cannot mix notations like 774 * The user cannot mix notations like
736 * genrb -s. /icu/data --- the absolute path specified. -s redundant 775 * genrb -s. /icu/data --- the absolute path specified. -s redundant
737 * user should use 776 * user should use
738 * genrb -s. icu/data --- start from CWD and look in icu/data dir 777 * genrb -s. icu/data --- start from CWD and look in icu/data dir
739 */ 778 */
740 if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen- 1] !='.')){ 779 if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
741 uprv_strcpy(openFileName, genrbdata->inputDir); 780 uprv_strcpy(openFileName, inputDir);
742 openFileName[dirlen] = U_FILE_SEP_CHAR; 781 openFileName[dirlen] = U_FILE_SEP_CHAR;
743 } 782 }
744 openFileName[dirlen + 1] = '\0'; 783 openFileName[dirlen + 1] = '\0';
745 } else { 784 } else {
746 openFileName = (char *) uprv_malloc(dirlen + filelen + 1); 785 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747 786
748 /* test for NULL */ 787 /* test for NULL */
749 if(openFileName == NULL) { 788 if(openFileName == NULL) {
750 *status = U_MEMORY_ALLOCATION_ERROR; 789 errorCode = U_MEMORY_ALLOCATION_ERROR;
751 goto finish; 790 goto finish;
752 } 791 }
753 792
754 uprv_strcpy(openFileName, genrbdata->inputDir); 793 uprv_strcpy(openFileName, inputDir);
755 794
756 } 795 }
757 } 796 }
758 uprv_strcat(openFileName, filename); 797 uprv_strcat(openFileName, filename);
759 /* printf("%s\n", openFileName); */ 798 /* printf("%s\n", openFileName); */
760 *status = U_ZERO_ERROR; 799 errorCode = U_ZERO_ERROR;
761 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status); 800 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
762 801
763 if(*status == U_FILE_ACCESS_ERROR) { 802 if(errorCode == U_FILE_ACCESS_ERROR) {
764 803
765 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filena me : openFileName); 804 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filena me : openFileName);
766 goto finish; 805 goto finish;
767 } 806 }
768 if (ucbuf == NULL || U_FAILURE(*status)) { 807 if (ucbuf == NULL || U_FAILURE(errorCode)) {
769 fprintf(stderr, "An error occured processing file %s. Error: %s\n", open FileName == NULL ? filename : openFileName,u_errorName(*status)); 808 fprintf(stderr, "An error occured processing file %s. Error: %s\n", open FileName == NULL ? filename : openFileName,u_errorName(errorCode));
770 goto finish; 809 goto finish;
771 } 810 }
772 811
773 /* Parse the data into an SRBRoot */ 812 /* Parse the data into an SRBRoot */
774 data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status ); 813 data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode) ;
814 if (U_FAILURE(errorCode)) {
815 goto finish;
816 }
775 817
776 root = data->fRoot; 818 root = data->fRoot;
777 collations = resLookup(root, "collations"); 819 collations = resLookup(root, "collations");
778 if (collations != NULL) { 820 if (collations != NULL) {
779 collation = resLookup(collations, type); 821 collation = resLookup(collations, collationType);
780 if (collation != NULL) { 822 if (collation != NULL) {
781 sequence = resLookup(collation, "Sequence"); 823 sequence = resLookup(collation, "Sequence");
782 if (sequence != NULL) { 824 if (sequence != NULL) {
783 urules = sequence->u.fString.fChars; 825 // No string pointer aliasing so that we need not hold onto the resour ce bundle.
784 urulesLength = sequence->u.fString.fLength; 826 rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
785 *pLength = urulesLength;
786 } 827 }
787 } 828 }
788 } 829 }
789 830
790 finish: 831 finish:
791 if (inputDirBuf != NULL) { 832 if (inputDirBuf != NULL) {
792 uprv_free(inputDirBuf); 833 uprv_free(inputDirBuf);
793 } 834 }
794 835
795 if (openFileName != NULL) { 836 if (openFileName != NULL) {
796 uprv_free(openFileName); 837 uprv_free(openFileName);
797 } 838 }
798 839
799 if(ucbuf) { 840 if(ucbuf) {
800 ucbuf_close(ucbuf); 841 ucbuf_close(ucbuf);
801 } 842 }
802
803 return urules;
804 } 843 }
805 844
806 // Quick-and-dirty escaping function. 845 // Quick-and-dirty escaping function.
807 // Assumes that we are on an ASCII-based platform. 846 // Assumes that we are on an ASCII-based platform.
808 static void 847 static void
809 escape(const UChar *s, char *buffer) { 848 escape(const UChar *s, char *buffer) {
810 int32_t length = u_strlen(s); 849 int32_t length = u_strlen(s);
811 int32_t i = 0; 850 int32_t i = 0;
812 for (;;) { 851 for (;;) {
813 UChar32 c; 852 UChar32 c;
814 U16_NEXT(s, i, length, c); 853 U16_NEXT(s, i, length, c);
815 if (c == 0) { 854 if (c == 0) {
816 *buffer = 0; 855 *buffer = 0;
817 return; 856 return;
818 } else if (0x20 <= c && c <= 0x7e) { 857 } else if (0x20 <= c && c <= 0x7e) {
819 // printable ASCII 858 // printable ASCII
820 *buffer++ = (char)c; // assumes ASCII-based platform 859 *buffer++ = (char)c; // assumes ASCII-based platform
821 } else { 860 } else {
822 buffer += sprintf(buffer, "\\u%04X", (int)c); 861 buffer += sprintf(buffer, "\\u%04X", (int)c);
823 } 862 }
824 } 863 }
825 } 864 }
826 865
866 } // namespace
867
868 #endif // !UCONFIG_NO_COLLATION
869
827 static struct SResource * 870 static struct SResource *
828 addCollation(ParseState* state, struct SResource *result, uint32_t startline, U ErrorCode *status) 871 addCollation(ParseState* state, struct SResource *result, const char *collation Type,
872 uint32_t startline, UErrorCode *status)
829 { 873 {
874 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
830 struct SResource *member = NULL; 875 struct SResource *member = NULL;
831 struct UString *tokenValue; 876 struct UString *tokenValue;
832 struct UString comment; 877 struct UString comment;
833 enum ETokenType token; 878 enum ETokenType token;
834 char subtag[1024]; 879 char subtag[1024];
880 UnicodeString rules;
881 UBool haveRules = FALSE;
835 UVersionInfo version; 882 UVersionInfo version;
836 uint32_t line; 883 uint32_t line;
837 GenrbData genrbdata; 884
838 /* '{' . (name resource)* '}' */ 885 /* '{' . (name resource)* '}' */
839 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 886 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840 887
841 for (;;) 888 for (;;)
842 { 889 {
843 ustr_init(&comment); 890 ustr_init(&comment);
844 token = getToken(state, &tokenValue, &comment, &line, status); 891 token = getToken(state, &tokenValue, &comment, &line, status);
845 892
846 if (token == TOK_CLOSE_BRACE) 893 if (token == TOK_CLOSE_BRACE)
847 { 894 {
848 return result; 895 break;
849 } 896 }
850 897
851 if (token != TOK_STRING) 898 if (token != TOK_STRING)
852 { 899 {
853 res_close(result); 900 res_close(result);
854 *status = U_INVALID_FORMAT_ERROR; 901 *status = U_INVALID_FORMAT_ERROR;
855 902
856 if (token == TOK_EOF) 903 if (token == TOK_EOF)
857 { 904 {
858 error(startline, "unterminated table"); 905 error(startline, "unterminated table");
(...skipping 14 matching lines...) Expand all
873 return NULL; 920 return NULL;
874 } 921 }
875 922
876 member = parseResource(state, subtag, NULL, status); 923 member = parseResource(state, subtag, NULL, status);
877 924
878 if (U_FAILURE(*status)) 925 if (U_FAILURE(*status))
879 { 926 {
880 res_close(result); 927 res_close(result);
881 return NULL; 928 return NULL;
882 } 929 }
883 930 if (result == NULL)
884 if (uprv_strcmp(subtag, "Version") == 0) 931 {
932 // Ignore the parsed resources, continue parsing.
933 }
934 else if (uprv_strcmp(subtag, "Version") == 0)
885 { 935 {
886 char ver[40]; 936 char ver[40];
887 int32_t length = member->u.fString.fLength; 937 int32_t length = member->u.fString.fLength;
888 938
889 if (length >= (int32_t) sizeof(ver)) 939 if (length >= (int32_t) sizeof(ver))
890 { 940 {
891 length = (int32_t) sizeof(ver) - 1; 941 length = (int32_t) sizeof(ver) - 1;
892 } 942 }
893 943
894 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 fo r copying NULL */ 944 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 fo r copying NULL */
895 u_versionFromString(version, ver); 945 u_versionFromString(version, ver);
896 946
897 table_add(result, member, line, status); 947 table_add(result, member, line, status);
898 948 member = NULL;
899 }
900 else if (uprv_strcmp(subtag, "Override") == 0)
901 {
902 // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
903 table_add(result, member, line, status);
904
905 } 949 }
906 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 950 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
907 { 951 {
908 /* discard duplicate %%CollationBin if any*/ 952 /* discard duplicate %%CollationBin if any*/
909 } 953 }
910 else if (uprv_strcmp(subtag, "Sequence") == 0) 954 else if (uprv_strcmp(subtag, "Sequence") == 0)
911 { 955 {
912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO 956 rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
913 warning(line, "Not building collation elements because of UCONFIG_NO _COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h"); 957 haveRules = TRUE;
914 #else 958 // Defer building the collator until we have seen
915 if(state->makeBinaryCollation) { 959 // all sub-elements of the collation table, including the Version.
916
917 /* do the collation elements */
918 int32_t len = 0;
919 uint8_t *data = NULL;
920 UCollator *coll = NULL;
921 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIM IT - UCOL_REORDER_CODE_FIRST)];
922 int32_t reorderCodeCount;
923 int32_t reorderCodeIndex;
924 UParseError parseError;
925
926 genrbdata.inputDir = state->inputdir;
927 genrbdata.outputDir = state->outputdir;
928
929 UErrorCode intStatus = U_ZERO_ERROR;
930 uprv_memset(&parseError, 0, sizeof(parseError));
931 coll = ucol_openRulesForImport(member->u.fString.fChars, member- >u.fString.fLength,
932 UCOL_OFF, UCOL_DEFAULT_STRENGTH,& parseError, importFromDataFile, &genrbdata, &intStatus);
933
934 if (U_SUCCESS(intStatus) && coll != NULL)
935 {
936 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
937 data = (uint8_t *)uprv_malloc(len);
938 intStatus = U_ZERO_ERROR;
939 len = ucol_cloneBinary(coll, data, len, &intStatus);
940
941 /* tailoring rules version */
942 /* This is wrong! */
943 /*coll->dataInfo.dataVersion[1] = version[0];*/
944 /* Copy tailoring version. Builder version already */
945 /* set in ucol_openRules */
946 ((UCATableHeader *)data)->version[1] = version[0];
947 ((UCATableHeader *)data)->version[2] = version[1];
948 ((UCATableHeader *)data)->version[3] = version[2];
949
950 if (U_SUCCESS(intStatus) && data != NULL)
951 {
952 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
953 table_add(result, collationBin, line, status);
954 uprv_free(data);
955
956 reorderCodeCount = ucol_getReorderCodes(
957 coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORD ER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
958 if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
959 struct SResource *reorderCodeRes = intvector_open(st ate->bundle, "%%ReorderCodes", NULL, status);
960 for (reorderCodeIndex = 0; reorderCodeIndex < reorde rCodeCount; reorderCodeIndex++) {
961 intvector_add(reorderCodeRes, reorderCodes[reord erCodeIndex], status);
962 }
963 table_add(result, reorderCodeRes, line, status);
964 }
965 }
966 else
967 {
968 warning(line, "could not obtain rules from collator");
969 if(isStrict()){
970 *status = U_INVALID_FORMAT_ERROR;
971 return NULL;
972 }
973 }
974
975 ucol_close(coll);
976 }
977 else
978 {
979 if(intStatus == U_FILE_ACCESS_ERROR) {
980 error(startline, "Collation could not be built- U_FILE_A CCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
981 *status = intStatus;
982 return NULL;
983 }
984 char preBuffer[100], postBuffer[100];
985 escape(parseError.preContext, preBuffer);
986 escape(parseError.postContext, postBuffer);
987 warning(line,
988 "%%%%CollationBin could not be constructed from Coll ationElements\n"
989 " check context, check that the FractionalUCA.txt U CA version "
990 "matches the current UCD version\n"
991 " UErrorCode=%s UParseError={ line=%d offset=%d pr e=<> post=<> }",
992 u_errorName(intStatus),
993 parseError.line,
994 parseError.offset,
995 preBuffer,
996 postBuffer);
997 if(isStrict()){
998 *status = intStatus;
999 return NULL;
1000 }
1001 }
1002 } else {
1003 if(isVerbose()) {
1004 printf("Not building Collation binary\n");
1005 }
1006 }
1007 #endif
1008 /* in order to achieve smaller data files, we can direct genrb */ 960 /* in order to achieve smaller data files, we can direct genrb */
1009 /* to omit collation rules */ 961 /* to omit collation rules */
1010 if(gOmitCollationRules) { 962 if(!state->omitCollationRules) {
1011 bundle_closeString(state->bundle, member);
1012 } else {
1013 table_add(result, member, line, status); 963 table_add(result, member, line, status);
964 member = NULL;
1014 } 965 }
1015 } 966 }
967 else // Just copy non-special items.
968 {
969 table_add(result, member, line, status);
970 member = NULL;
971 }
972 res_close(member); // TODO: use LocalPointer
1016 if (U_FAILURE(*status)) 973 if (U_FAILURE(*status))
1017 { 974 {
1018 res_close(result); 975 res_close(result);
1019 return NULL; 976 return NULL;
1020 } 977 }
1021 } 978 }
1022 979
1023 // Reached the end without a TOK_CLOSE_BRACE. Should be an error. 980 if (!haveRules) { return result; }
1024 *status = U_INTERNAL_PROGRAM_ERROR; 981
1025 return NULL; 982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
983 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATI ON and/or UCONFIG_NO_FILE_IO, see uconfig.h");
984 (void)collationType;
985 #else
986 // CLDR ticket #3949, ICU ticket #8082:
987 // Do not build collation binary data for for-import-only "private" collatio n rule strings.
988 if (uprv_strncmp(collationType, "private-", 8) == 0) {
989 if(isVerbose()) {
990 printf("Not building %s~%s collation binary\n", state->filename, col lationType);
991 }
992 return result;
993 }
994
995 if(!state->makeBinaryCollation) {
996 if(isVerbose()) {
997 printf("Not building %s~%s collation binary\n", state->filename, col lationType);
998 }
999 return result;
1000 }
1001 UErrorCode intStatus = U_ZERO_ERROR;
1002 UParseError parseError;
1003 uprv_memset(&parseError, 0, sizeof(parseError));
1004 GenrbImporter importer(state->inputdir, state->outputdir);
1005 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus) ;
1006 if(U_FAILURE(intStatus)) {
1007 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorNa me(intStatus));
1008 res_close(result);
1009 return NULL; // TODO: use LocalUResourceBundlePointer for result
1010 }
1011 icu::CollationBuilder builder(base, intStatus);
1012 if(uprv_strncmp(collationType, "search", 6) == 0) {
1013 builder.disableFastLatin(); // build fast-Latin table unless search col lator
1014 }
1015 LocalPointer<icu::CollationTailoring> t(
1016 builder.parseAndBuild(rules, version, &importer, &parseError, intSta tus));
1017 if(U_FAILURE(intStatus)) {
1018 const char *reason = builder.getErrorReason();
1019 if(reason == NULL) { reason = ""; }
1020 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1021 state->filename, collationType,
1022 (long)parseError.offset, u_errorName(intStatus), reason);
1023 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1024 // Print pre- and post-context.
1025 char preBuffer[100], postBuffer[100];
1026 escape(parseError.preContext, preBuffer);
1027 escape(parseError.postContext, postBuffer);
1028 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, pos tBuffer);
1029 }
1030 if(isStrict()) {
1031 *status = intStatus;
1032 res_close(result);
1033 return NULL;
1034 }
1035 }
1036 icu::LocalMemory<uint8_t> buffer;
1037 int32_t capacity = 100000;
1038 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1039 if(dest == NULL) {
1040 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\ n",
1041 (long)capacity);
1042 *status = U_MEMORY_ALLOCATION_ERROR;
1043 res_close(result);
1044 return NULL;
1045 }
1046 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1047 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1048 *t, *t->settings, indexes, dest, capacity, intStatus);
1049 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1050 intStatus = U_ZERO_ERROR;
1051 capacity = totalSize;
1052 dest = buffer.allocateInsteadAndCopy(capacity);
1053 if(dest == NULL) {
1054 fprintf(stderr, "memory allocation (%ld bytes) for file contents fai led\n",
1055 (long)capacity);
1056 *status = U_MEMORY_ALLOCATION_ERROR;
1057 res_close(result);
1058 return NULL;
1059 }
1060 totalSize = icu::CollationDataWriter::writeTailoring(
1061 *t, *t->settings, indexes, dest, capacity, intStatus);
1062 }
1063 if(U_FAILURE(intStatus)) {
1064 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1065 u_errorName(intStatus));
1066 res_close(result);
1067 return NULL;
1068 }
1069 if(isVerbose()) {
1070 printf("%s~%s collation tailoring part sizes:\n", state->filename, colla tionType);
1071 icu::CollationInfo::printSizes(totalSize, indexes);
1072 }
1073 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", t otalSize, dest, NULL, NULL, status);
1074 table_add(result, collationBin, line, status);
1075 if (U_FAILURE(*status)) {
1076 res_close(result);
1077 return NULL;
1078 }
1079 #endif
1080 return result;
1081 }
1082
1083 static UBool
1084 keepCollationType(const char * /*type*/) {
1085 return TRUE;
1026 } 1086 }
1027 1087
1028 static struct SResource * 1088 static struct SResource *
1029 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n ewCollation, UErrorCode *status) 1089 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n ewCollation, UErrorCode *status)
1030 { 1090 {
1031 struct SResource *result = NULL; 1091 struct SResource *result = NULL;
1032 struct SResource *member = NULL; 1092 struct SResource *member = NULL;
1033 struct SResource *collationRes = NULL; 1093 struct SResource *collationRes = NULL;
1034 struct UString *tokenValue; 1094 struct UString *tokenValue;
1035 struct UString comment; 1095 struct UString comment;
1036 enum ETokenType token; 1096 enum ETokenType token;
1037 char subtag[1024], typeKeyword[1024]; 1097 char subtag[1024], typeKeyword[1024];
1038 uint32_t line; 1098 uint32_t line;
1039 1099
1040 result = table_open(state->bundle, tag, NULL, status); 1100 result = table_open(state->bundle, tag, NULL, status);
1041 1101
1042 if (result == NULL || U_FAILURE(*status)) 1102 if (result == NULL || U_FAILURE(*status))
1043 { 1103 {
1044 return NULL; 1104 return NULL;
1045 } 1105 }
1046 if(isVerbose()){ 1106 if(isVerbose()){
1047 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1107 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1048 } 1108 }
1049 if(!newCollation) { 1109 if(!newCollation) {
1050 return addCollation(state, result, startline, status); 1110 return addCollation(state, result, "(no type)", startline, status);
1051 } 1111 }
1052 else { 1112 else {
1053 for(;;) { 1113 for(;;) {
1054 ustr_init(&comment); 1114 ustr_init(&comment);
1055 token = getToken(state, &tokenValue, &comment, &line, status); 1115 token = getToken(state, &tokenValue, &comment, &line, status);
1056 1116
1057 if (token == TOK_CLOSE_BRACE) 1117 if (token == TOK_CLOSE_BRACE)
1058 { 1118 {
1059 return result; 1119 return result;
1060 } 1120 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1097 table_add(result, member, line, status); 1157 table_add(result, member, line, status);
1098 } 1158 }
1099 else 1159 else
1100 { 1160 {
1101 token = peekToken(state, 0, &tokenValue, &line, &comment, status ); 1161 token = peekToken(state, 0, &tokenValue, &line, &comment, status );
1102 /* this probably needs to be refactored or recursively use the p arser */ 1162 /* this probably needs to be refactored or recursively use the p arser */
1103 /* first we assume that our collation table won't have the expli cit type */ 1163 /* first we assume that our collation table won't have the expli cit type */
1104 /* then, we cannot handle aliases */ 1164 /* then, we cannot handle aliases */
1105 if(token == TOK_OPEN_BRACE) { 1165 if(token == TOK_OPEN_BRACE) {
1106 token = getToken(state, &tokenValue, &comment, &line, status ); 1166 token = getToken(state, &tokenValue, &comment, &line, status );
1107 collationRes = table_open(state->bundle, subtag, NULL, statu s); 1167 if (keepCollationType(subtag)) {
1108 collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */ 1168 collationRes = table_open(state->bundle, subtag, NULL, s tatus);
1109 if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0 ) { 1169 } else {
1170 collationRes = NULL;
1171 }
1172 // need to parse the collation data regardless
1173 collationRes = addCollation(state, collationRes, subtag, sta rtline, status);
1174 if (collationRes != NULL) {
1110 table_add(result, collationRes, startline, status); 1175 table_add(result, collationRes, startline, status);
1111 } 1176 }
1112 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 1177 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1113 /* we could have a table too */ 1178 /* we could have a table too */
1114 token = peekToken(state, 1, &tokenValue, &line, &comment, st atus); 1179 token = peekToken(state, 1, &tokenValue, &line, &comment, st atus);
1115 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(to kenValue->fChars) + 1); 1180 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(to kenValue->fChars) + 1);
1116 if(uprv_strcmp(typeKeyword, "alias") == 0) { 1181 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1117 member = parseResource(state, subtag, NULL, status); 1182 member = parseResource(state, subtag, NULL, status);
1118 if (U_FAILURE(*status)) 1183 if (U_FAILURE(*status))
1119 { 1184 {
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after
1791 U_STRING_DECL(k_type_include, "include", 7); 1856 U_STRING_DECL(k_type_include, "include", 7);
1792 1857
1793 /* Various non-standard processing plugins that create one or more special resou rces. */ 1858 /* Various non-standard processing plugins that create one or more special resou rces. */
1794 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1859 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1795 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1860 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1796 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1861 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1797 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1862 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1798 1863
1799 typedef enum EResourceType 1864 typedef enum EResourceType
1800 { 1865 {
1801 RT_UNKNOWN, 1866 RESTYPE_UNKNOWN,
1802 RT_STRING, 1867 RESTYPE_STRING,
1803 RT_BINARY, 1868 RESTYPE_BINARY,
1804 RT_TABLE, 1869 RESTYPE_TABLE,
1805 RT_TABLE_NO_FALLBACK, 1870 RESTYPE_TABLE_NO_FALLBACK,
1806 RT_INTEGER, 1871 RESTYPE_INTEGER,
1807 RT_ARRAY, 1872 RESTYPE_ARRAY,
1808 RT_ALIAS, 1873 RESTYPE_ALIAS,
1809 RT_INTVECTOR, 1874 RESTYPE_INTVECTOR,
1810 RT_IMPORT, 1875 RESTYPE_IMPORT,
1811 RT_INCLUDE, 1876 RESTYPE_INCLUDE,
1812 RT_PROCESS_UCA_RULES, 1877 RESTYPE_PROCESS_UCA_RULES,
1813 RT_PROCESS_COLLATION, 1878 RESTYPE_PROCESS_COLLATION,
1814 RT_PROCESS_TRANSLITERATOR, 1879 RESTYPE_PROCESS_TRANSLITERATOR,
1815 RT_PROCESS_DEPENDENCY, 1880 RESTYPE_PROCESS_DEPENDENCY,
1816 RT_RESERVED 1881 RESTYPE_RESERVED
1817 } EResourceType; 1882 } EResourceType;
1818 1883
1819 static struct { 1884 static struct {
1820 const char *nameChars; /* only used for debugging */ 1885 const char *nameChars; /* only used for debugging */
1821 const UChar *nameUChars; 1886 const UChar *nameUChars;
1822 ParseResourceFunction *parseFunction; 1887 ParseResourceFunction *parseFunction;
1823 } gResourceTypes[] = { 1888 } gResourceTypes[] = {
1824 {"Unknown", NULL, NULL}, 1889 {"Unknown", NULL, NULL},
1825 {"string", k_type_string, parseString}, 1890 {"string", k_type_string, parseString},
1826 {"binary", k_type_binary, parseBinary}, 1891 {"binary", k_type_binary, parseBinary},
1827 {"table", k_type_table, parseTable}, 1892 {"table", k_type_table, parseTable},
1828 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1893 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1829 {"integer", k_type_integer, parseInteger}, 1894 {"integer", k_type_integer, parseInteger},
1830 {"array", k_type_array, parseArray}, 1895 {"array", k_type_array, parseArray},
1831 {"alias", k_type_alias, parseAlias}, 1896 {"alias", k_type_alias, parseAlias},
1832 {"intvector", k_type_intvector, parseIntVector}, 1897 {"intvector", k_type_intvector, parseIntVector},
1833 {"import", k_type_import, parseImport}, 1898 {"import", k_type_import, parseImport},
1834 {"include", k_type_include, parseInclude}, 1899 {"include", k_type_include, parseInclude},
1835 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1900 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1836 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1901 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1837 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterato r}, 1902 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterato r},
1838 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1903 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1839 {"reserved", NULL, NULL} 1904 {"reserved", NULL, NULL}
1840 }; 1905 };
1841 1906
1842 void initParser(UBool omitCollationRules) 1907 void initParser()
1843 { 1908 {
1844 U_STRING_INIT(k_type_string, "string", 6); 1909 U_STRING_INIT(k_type_string, "string", 6);
1845 U_STRING_INIT(k_type_binary, "binary", 6); 1910 U_STRING_INIT(k_type_binary, "binary", 6);
1846 U_STRING_INIT(k_type_bin, "bin", 3); 1911 U_STRING_INIT(k_type_bin, "bin", 3);
1847 U_STRING_INIT(k_type_table, "table", 5); 1912 U_STRING_INIT(k_type_table, "table", 5);
1848 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17) ; 1913 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17) ;
1849 U_STRING_INIT(k_type_int, "int", 3); 1914 U_STRING_INIT(k_type_int, "int", 3);
1850 U_STRING_INIT(k_type_integer, "integer", 7); 1915 U_STRING_INIT(k_type_integer, "integer", 7);
1851 U_STRING_INIT(k_type_array, "array", 5); 1916 U_STRING_INIT(k_type_array, "array", 5);
1852 U_STRING_INIT(k_type_alias, "alias", 5); 1917 U_STRING_INIT(k_type_alias, "alias", 5);
1853 U_STRING_INIT(k_type_intvector, "intvector", 9); 1918 U_STRING_INIT(k_type_intvector, "intvector", 9);
1854 U_STRING_INIT(k_type_import, "import", 6); 1919 U_STRING_INIT(k_type_import, "import", 6);
1855 U_STRING_INIT(k_type_include, "include", 7); 1920 U_STRING_INIT(k_type_include, "include", 7);
1856 1921
1857 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18) ; 1922 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18) ;
1858 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18) ; 1923 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18) ;
1859 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23) ; 1924 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23) ;
1860 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19) ; 1925 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19) ;
1861
1862 gOmitCollationRules = omitCollationRules;
1863 } 1926 }
1864 1927
1865 static inline UBool isTable(enum EResourceType type) { 1928 static inline UBool isTable(enum EResourceType type) {
1866 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); 1929 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1867 } 1930 }
1868 1931
1869 static enum EResourceType 1932 static enum EResourceType
1870 parseResourceType(ParseState* state, UErrorCode *status) 1933 parseResourceType(ParseState* state, UErrorCode *status)
1871 { 1934 {
1872 struct UString *tokenValue; 1935 struct UString *tokenValue;
1873 struct UString comment; 1936 struct UString comment;
1874 enum EResourceType result = RT_UNKNOWN; 1937 enum EResourceType result = RESTYPE_UNKNOWN;
1875 uint32_t line=0; 1938 uint32_t line=0;
1876 ustr_init(&comment); 1939 ustr_init(&comment);
1877 expect(state, TOK_STRING, &tokenValue, &comment, &line, status); 1940 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1878 1941
1879 if (U_FAILURE(*status)) 1942 if (U_FAILURE(*status))
1880 { 1943 {
1881 return RT_UNKNOWN; 1944 return RESTYPE_UNKNOWN;
1882 } 1945 }
1883 1946
1884 *status = U_ZERO_ERROR; 1947 *status = U_ZERO_ERROR;
1885 1948
1886 /* Search for normal types */ 1949 /* Search for normal types */
1887 result=RT_UNKNOWN; 1950 result=RESTYPE_UNKNOWN;
1888 while ((result=(EResourceType)(result+1)) < RT_RESERVED) { 1951 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1889 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0 ) { 1952 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0 ) {
1890 break; 1953 break;
1891 } 1954 }
1892 } 1955 }
1893 /* Now search for the aliases */ 1956 /* Now search for the aliases */
1894 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1957 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1895 result = RT_INTEGER; 1958 result = RESTYPE_INTEGER;
1896 } 1959 }
1897 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1960 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1898 result = RT_BINARY; 1961 result = RESTYPE_BINARY;
1899 } 1962 }
1900 else if (result == RT_RESERVED) { 1963 else if (result == RESTYPE_RESERVED) {
1901 char tokenBuffer[1024]; 1964 char tokenBuffer[1024];
1902 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1965 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1903 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1966 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1904 *status = U_INVALID_FORMAT_ERROR; 1967 *status = U_INVALID_FORMAT_ERROR;
1905 error(line, "unknown resource type '%s'", tokenBuffer); 1968 error(line, "unknown resource type '%s'", tokenBuffer);
1906 } 1969 }
1907 1970
1908 return result; 1971 return result;
1909 } 1972 }
1910 1973
1911 /* parse a non-top-level resource */ 1974 /* parse a non-top-level resource */
1912 static struct SResource * 1975 static struct SResource *
1913 parseResource(ParseState* state, char *tag, const struct UString *comment, UErro rCode *status) 1976 parseResource(ParseState* state, char *tag, const struct UString *comment, UErro rCode *status)
1914 { 1977 {
1915 enum ETokenType token; 1978 enum ETokenType token;
1916 enum EResourceType resType = RT_UNKNOWN; 1979 enum EResourceType resType = RESTYPE_UNKNOWN;
1917 ParseResourceFunction *parseFunction = NULL; 1980 ParseResourceFunction *parseFunction = NULL;
1918 struct UString *tokenValue; 1981 struct UString *tokenValue;
1919 uint32_t startline; 1982 uint32_t startline;
1920 uint32_t line; 1983 uint32_t line;
1921 1984
1922 1985
1923 token = getToken(state, &tokenValue, NULL, &startline, status); 1986 token = getToken(state, &tokenValue, NULL, &startline, status);
1924 1987
1925 if(isVerbose()){ 1988 if(isVerbose()){
1926 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (i nt)startline); 1989 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (i nt)startline);
(...skipping 28 matching lines...) Expand all
1955 case TOK_OPEN_BRACE: 2018 case TOK_OPEN_BRACE:
1956 break; 2019 break;
1957 2020
1958 default: 2021 default:
1959 *status = U_INVALID_FORMAT_ERROR; 2022 *status = U_INVALID_FORMAT_ERROR;
1960 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 2023 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1961 return NULL; 2024 return NULL;
1962 } 2025 }
1963 2026
1964 2027
1965 if (resType == RT_UNKNOWN) 2028 if (resType == RESTYPE_UNKNOWN)
1966 { 2029 {
1967 /* No explicit type, so try to work it out. At this point, we've read t he first '{'. 2030 /* No explicit type, so try to work it out. At this point, we've read t he first '{'.
1968 We could have any of the following: 2031 We could have any of the following:
1969 { { => array (nested) 2032 { { => array (nested)
1970 { :/} => array 2033 { :/} => array
1971 { string , => string array 2034 { string , => string array
1972 2035
1973 { string { => table 2036 { string { => table
1974 2037
1975 { string :/{ => table 2038 { string :/{ => table
1976 { string } => string 2039 { string } => string
1977 */ 2040 */
1978 2041
1979 token = peekToken(state, 0, NULL, &line, NULL,status); 2042 token = peekToken(state, 0, NULL, &line, NULL,status);
1980 2043
1981 if (U_FAILURE(*status)) 2044 if (U_FAILURE(*status))
1982 { 2045 {
1983 return NULL; 2046 return NULL;
1984 } 2047 }
1985 2048
1986 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BR ACE ) 2049 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BR ACE )
1987 { 2050 {
1988 resType = RT_ARRAY; 2051 resType = RESTYPE_ARRAY;
1989 } 2052 }
1990 else if (token == TOK_STRING) 2053 else if (token == TOK_STRING)
1991 { 2054 {
1992 token = peekToken(state, 1, NULL, &line, NULL, status); 2055 token = peekToken(state, 1, NULL, &line, NULL, status);
1993 2056
1994 if (U_FAILURE(*status)) 2057 if (U_FAILURE(*status))
1995 { 2058 {
1996 return NULL; 2059 return NULL;
1997 } 2060 }
1998 2061
1999 switch (token) 2062 switch (token)
2000 { 2063 {
2001 case TOK_COMMA: resType = RT_ARRAY; break; 2064 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
2002 case TOK_OPEN_BRACE: resType = RT_TABLE; break; 2065 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
2003 case TOK_CLOSE_BRACE: resType = RT_STRING; break; 2066 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
2004 case TOK_COLON: resType = RT_TABLE; break; 2067 case TOK_COLON: resType = RESTYPE_TABLE; break;
2005 default: 2068 default:
2006 *status = U_INVALID_FORMAT_ERROR; 2069 *status = U_INVALID_FORMAT_ERROR;
2007 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 2070 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2008 return NULL; 2071 return NULL;
2009 } 2072 }
2010 } 2073 }
2011 else 2074 else
2012 { 2075 {
2013 *status = U_INVALID_FORMAT_ERROR; 2076 *status = U_INVALID_FORMAT_ERROR;
2014 error(line, "Unexpected token after '{'"); 2077 error(line, "Unexpected token after '{'");
2015 return NULL; 2078 return NULL;
2016 } 2079 }
2017 2080
2018 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 2081 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2019 } else if(resType == RT_TABLE_NO_FALLBACK) { 2082 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2020 *status = U_INVALID_FORMAT_ERROR; 2083 *status = U_INVALID_FORMAT_ERROR;
2021 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 2084 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2022 return NULL; 2085 return NULL;
2023 } 2086 }
2024 2087
2025 2088
2026 /* We should now know what we need to parse next, so call the appropriate pa rser 2089 /* We should now know what we need to parse next, so call the appropriate pa rser
2027 function and return. */ 2090 function and return. */
2028 parseFunction = gResourceTypes[resType].parseFunction; 2091 parseFunction = gResourceTypes[resType].parseFunction;
2029 if (parseFunction != NULL) { 2092 if (parseFunction != NULL) {
2030 return parseFunction(state, tag, startline, comment, status); 2093 return parseFunction(state, tag, startline, comment, status);
2031 } 2094 }
2032 else { 2095 else {
2033 *status = U_INTERNAL_PROGRAM_ERROR; 2096 *status = U_INTERNAL_PROGRAM_ERROR;
2034 error(startline, "internal error: %s resource type found and not handled ", gResourceTypes[resType].nameChars); 2097 error(startline, "internal error: %s resource type found and not handled ", gResourceTypes[resType].nameChars);
2035 } 2098 }
2036 2099
2037 return NULL; 2100 return NULL;
2038 } 2101 }
2039 2102
2040 /* parse the top-level resource */ 2103 /* parse the top-level resource */
2041 struct SRBRoot * 2104 struct SRBRoot *
2042 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBina ryCollation, 2105 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *fi lename,
2043 UErrorCode *status) 2106 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2044 { 2107 {
2045 struct UString *tokenValue; 2108 struct UString *tokenValue;
2046 struct UString comment; 2109 struct UString comment;
2047 uint32_t line; 2110 uint32_t line;
2048 enum EResourceType bundleType; 2111 enum EResourceType bundleType;
2049 enum ETokenType token; 2112 enum ETokenType token;
2050 ParseState state; 2113 ParseState state;
2051 uint32_t i; 2114 uint32_t i;
2052 2115
2053 2116
2054 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 2117 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2055 { 2118 {
2056 ustr_init(&state.lookahead[i].value); 2119 ustr_init(&state.lookahead[i].value);
2057 ustr_init(&state.lookahead[i].comment); 2120 ustr_init(&state.lookahead[i].comment);
2058 } 2121 }
2059 2122
2060 initLookahead(&state, buf, status); 2123 initLookahead(&state, buf, status);
2061 2124
2062 state.inputdir = inputDir; 2125 state.inputdir = inputDir;
2063 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(stat e.inputdir) : 0; 2126 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(stat e.inputdir) : 0;
2064 state.outputdir = outputDir; 2127 state.outputdir = outputDir;
2065 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(st ate.outputdir) : 0; 2128 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(st ate.outputdir) : 0;
2129 state.filename = filename;
2066 state.makeBinaryCollation = makeBinaryCollation; 2130 state.makeBinaryCollation = makeBinaryCollation;
2131 state.omitCollationRules = omitCollationRules;
2067 2132
2068 ustr_init(&comment); 2133 ustr_init(&comment);
2069 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status); 2134 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2070 2135
2071 state.bundle = bundle_open(&comment, FALSE, status); 2136 state.bundle = bundle_open(&comment, FALSE, status);
2072 2137
2073 if (state.bundle == NULL || U_FAILURE(*status)) 2138 if (state.bundle == NULL || U_FAILURE(*status))
2074 { 2139 {
2075 return NULL; 2140 return NULL;
2076 } 2141 }
(...skipping 16 matching lines...) Expand all
2093 *status=U_PARSE_ERROR; 2158 *status=U_PARSE_ERROR;
2094 error(line, "parse error. Stopped parsing with %s", u_errorName(*st atus)); 2159 error(line, "parse error. Stopped parsing with %s", u_errorName(*st atus));
2095 } 2160 }
2096 } 2161 }
2097 else 2162 else
2098 { 2163 {
2099 /* not a colon */ 2164 /* not a colon */
2100 if(token==TOK_OPEN_BRACE) 2165 if(token==TOK_OPEN_BRACE)
2101 { 2166 {
2102 *status=U_ZERO_ERROR; 2167 *status=U_ZERO_ERROR;
2103 bundleType=RT_TABLE; 2168 bundleType=RESTYPE_TABLE;
2104 } 2169 }
2105 else 2170 else
2106 { 2171 {
2107 /* neither colon nor open brace */ 2172 /* neither colon nor open brace */
2108 *status=U_PARSE_ERROR; 2173 *status=U_PARSE_ERROR;
2109 bundleType=RT_UNKNOWN; 2174 bundleType=RESTYPE_UNKNOWN;
2110 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 2175 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2111 } 2176 }
2112 } 2177 }
2113 2178
2114 if (U_FAILURE(*status)) 2179 if (U_FAILURE(*status))
2115 { 2180 {
2116 bundle_close(state.bundle, status); 2181 bundle_close(state.bundle, status);
2117 return NULL; 2182 return NULL;
2118 } 2183 }
2119 2184
2120 if(bundleType==RT_TABLE_NO_FALLBACK) { 2185 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2121 /* 2186 /*
2122 * Parse a top-level table with the table(nofallback) declaration. 2187 * Parse a top-level table with the table(nofallback) declaration.
2123 * This is the same as a regular table, but also sets the 2188 * This is the same as a regular table, but also sets the
2124 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 2189 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2125 */ 2190 */
2126 state.bundle->noFallback=TRUE; 2191 state.bundle->noFallback=TRUE;
2127 } 2192 }
2128 /* top-level tables need not handle special table names like "collations" */ 2193 /* top-level tables need not handle special table names like "collations" */
2129 realParseTable(&state, state.bundle->fRoot, NULL, line, status); 2194 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2130 if(dependencyArray!=NULL){ 2195 if(dependencyArray!=NULL){
(...skipping 13 matching lines...) Expand all
2144 if(isStrict()){ 2209 if(isStrict()){
2145 *status = U_INVALID_FORMAT_ERROR; 2210 *status = U_INVALID_FORMAT_ERROR;
2146 return NULL; 2211 return NULL;
2147 } 2212 }
2148 } 2213 }
2149 2214
2150 cleanupLookahead(&state); 2215 cleanupLookahead(&state);
2151 ustr_deinit(&comment); 2216 ustr_deinit(&comment);
2152 return state.bundle; 2217 return state.bundle;
2153 } 2218 }
OLDNEW
« no previous file with comments | « source/tools/genrb/parse.h ('k') | source/tools/genrb/prscmnts.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698