Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: icu46/source/tools/gennorm2/gennorm2.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu46/source/tools/gennorm2/Makefile.in ('k') | icu46/source/tools/gennorm2/gennorm2.vcxproj » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: gennorm2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov25
14 * created by: Markus W. Scherer
15 *
16 * This program reads text files that define Unicode normalization,
17 * parses them, and builds a binary data file.
18 */
19
20 #include "unicode/utypes.h"
21 #include "n2builder.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "unicode/errorcode.h"
27 #include "unicode/localpointer.h"
28 #include "unicode/putil.h"
29 #include "unicode/uchar.h"
30 #include "unicode/unistr.h"
31 #include "charstr.h"
32 #include "normalizer2impl.h"
33 #include "toolutil.h"
34 #include "uoptions.h"
35 #include "uparse.h"
36
37 #if UCONFIG_NO_NORMALIZATION
38 #include "unewdata.h"
39 #endif
40
41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
42
43 U_NAMESPACE_BEGIN
44
45 UBool beVerbose=FALSE, haveCopyright=TRUE;
46
47 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
48
49 #if !UCONFIG_NO_NORMALIZATION
50 void parseFile(FILE *f, Normalizer2DataBuilder &builder);
51 #endif
52
53 /* -------------------------------------------------------------------------- */
54
55 enum {
56 HELP_H,
57 HELP_QUESTION_MARK,
58 VERBOSE,
59 COPYRIGHT,
60 SOURCEDIR,
61 OUTPUT_FILENAME,
62 UNICODE_VERSION,
63 OPT_FAST
64 };
65
66 static UOption options[]={
67 UOPTION_HELP_H,
68 UOPTION_HELP_QUESTION_MARK,
69 UOPTION_VERBOSE,
70 UOPTION_COPYRIGHT,
71 UOPTION_SOURCEDIR,
72 UOPTION_DEF("output", 'o', UOPT_REQUIRES_ARG),
73 UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG),
74 UOPTION_DEF("fast", '\1', UOPT_NO_ARG)
75 };
76
77 extern "C" int
78 main(int argc, char* argv[]) {
79 U_MAIN_INIT_ARGS(argc, argv);
80
81 /* preset then read command line options */
82 options[SOURCEDIR].value="";
83 options[UNICODE_VERSION].value=U_UNICODE_VERSION;
84 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[HELP_H]), option s);
85
86 /* error handling, printing usage message */
87 if(argc<0) {
88 fprintf(stderr,
89 "error in command line argument \"%s\"\n",
90 argv[-argc]);
91 }
92 if(!options[OUTPUT_FILENAME].doesOccur) {
93 argc=-1;
94 }
95 if( argc<2 ||
96 options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur
97 ) {
98 /*
99 * Broken into chunks because the C89 standard says the minimum
100 * required supported string length is 509 bytes.
101 */
102 fprintf(stderr,
103 "Usage: %s [-options] infiles+ -o outputfilename\n"
104 "\n"
105 "Reads the infiles with normalization data and\n"
106 "creates a binary file (outputfilename) with the data.\n"
107 "\n",
108 argv[0]);
109 fprintf(stderr,
110 "Options:\n"
111 "\t-h or -? or --help this usage text\n"
112 "\t-v or --verbose verbose output\n"
113 "\t-c or --copyright include a copyright notice\n"
114 "\t-u or --unicode Unicode version, followed by the version like 5.2.0\n");
115 fprintf(stderr,
116 "\t-s or --sourcedir source directory, followed by the path\n"
117 "\t-o or --output output filename\n");
118 fprintf(stderr,
119 "\t --fast optimize the .nrm file for fast normalization ,\n"
120 "\t which might increase its size (Writes fully decomposed\n"
121 "\t regular mappings instead of delta mappings.\n "
122 "\t You should measure the runtime speed to make sure that\n"
123 "\t this is a good trade-off.)\n");
124 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
125 }
126
127 beVerbose=options[VERBOSE].doesOccur;
128 haveCopyright=options[COPYRIGHT].doesOccur;
129
130 IcuToolErrorCode errorCode("gennorm2/main()");
131
132 #if UCONFIG_NO_NORMALIZATION
133
134 fprintf(stderr,
135 "gennorm2 writes a dummy binary data file "
136 "because UCONFIG_NO_NORMALIZATION is set, \n"
137 "see icu/source/common/unicode/uconfig.h\n");
138 udata_createDummy(NULL, NULL, options[OUTPUT_FILENAME].value, errorCode);
139 // Should not return an error since this is the expected behaviour if UCONFI G_NO_NORMALIZATION is on.
140 // return U_UNSUPPORTED_ERROR;
141 return 0;
142
143 #else
144
145 LocalPointer<Normalizer2DataBuilder> builder(new Normalizer2DataBuilder(erro rCode));
146 errorCode.assertSuccess();
147
148 builder->setUnicodeVersion(options[UNICODE_VERSION].value);
149
150 if(options[OPT_FAST].doesOccur) {
151 builder->setOptimization(Normalizer2DataBuilder::OPTIMIZE_FAST);
152 }
153
154 // prepare the filename beginning with the source dir
155 CharString filename(options[SOURCEDIR].value, errorCode);
156 int32_t pathLength=filename.length();
157 if( pathLength>0 &&
158 filename[pathLength-1]!=U_FILE_SEP_CHAR &&
159 filename[pathLength-1]!=U_FILE_ALT_SEP_CHAR
160 ) {
161 filename.append(U_FILE_SEP_CHAR, errorCode);
162 pathLength=filename.length();
163 }
164
165 for(int i=1; i<argc; ++i) {
166 printf("gennorm2: processing %s\n", argv[i]);
167 filename.append(argv[i], errorCode);
168 LocalStdioFilePointer f(fopen(filename.data(), "r"));
169 if(f==NULL) {
170 fprintf(stderr, "gennorm2 error: unable to open %s\n", filename.data ());
171 exit(U_FILE_ACCESS_ERROR);
172 }
173 builder->setOverrideHandling(Normalizer2DataBuilder::OVERRIDE_PREVIOUS);
174 parseFile(f.getAlias(), *builder);
175 filename.truncate(pathLength);
176 }
177
178 builder->writeBinaryFile(options[OUTPUT_FILENAME].value);
179
180 return errorCode.get();
181
182 #endif
183 }
184
185 #if !UCONFIG_NO_NORMALIZATION
186
187 void parseFile(FILE *f, Normalizer2DataBuilder &builder) {
188 IcuToolErrorCode errorCode("gennorm2/parseFile()");
189 char line[300];
190 uint32_t startCP, endCP;
191 while(NULL!=fgets(line, (int)sizeof(line), f)) {
192 char *comment=(char *)strchr(line, '#');
193 if(comment!=NULL) {
194 *comment=0;
195 }
196 u_rtrim(line);
197 if(line[0]==0) {
198 continue; // skip empty and comment-only lines
199 }
200 if(line[0]=='*') {
201 continue; // reserved syntax
202 }
203 const char *delimiter;
204 int32_t rangeLength=
205 u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimite r, errorCode);
206 if(errorCode.isFailure()) {
207 fprintf(stderr, "gennorm2 error: parsing code point range from %s\n" , line);
208 exit(errorCode.reset());
209 }
210 delimiter=u_skipWhitespace(delimiter);
211 if(*delimiter==':') {
212 const char *s=u_skipWhitespace(delimiter+1);
213 char *end;
214 unsigned long value=strtoul(s, &end, 10);
215 if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
216 fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
217 exit(U_PARSE_ERROR);
218 }
219 for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
220 builder.setCC(c, (uint8_t)value);
221 }
222 continue;
223 }
224 if(*delimiter=='-') {
225 if(*u_skipWhitespace(delimiter+1)!=0) {
226 fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", l ine);
227 exit(U_PARSE_ERROR);
228 }
229 for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
230 builder.removeMapping(c);
231 }
232 continue;
233 }
234 if(*delimiter=='=' || *delimiter=='>') {
235 UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
236 int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode);
237 if(errorCode.isFailure()) {
238 fprintf(stderr, "gennorm2 error: parsing mapping string from %s\ n", line);
239 exit(errorCode.reset());
240 }
241 UnicodeString mapping(FALSE, uchars, length);
242 if(*delimiter=='=') {
243 if(rangeLength!=1) {
244 fprintf(stderr,
245 "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
246 line);
247 exit(U_PARSE_ERROR);
248 }
249 builder.setRoundTripMapping((UChar32)startCP, mapping);
250 } else {
251 for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
252 builder.setOneWayMapping(c, mapping);
253 }
254 }
255 continue;
256 }
257 fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
258 exit(U_PARSE_ERROR);
259 }
260 }
261
262 #endif // !UCONFIG_NO_NORMALIZATION
263
264 U_NAMESPACE_END
265
266 /*
267 * Hey, Emacs, please set the following:
268 *
269 * Local Variables:
270 * indent-tabs-mode: nil
271 * End:
272 *
273 */
OLDNEW
« no previous file with comments | « icu46/source/tools/gennorm2/Makefile.in ('k') | icu46/source/tools/gennorm2/gennorm2.vcxproj » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698