OLD | NEW |
---|---|
(Empty) | |
1 Add code support for ICU. | |
2 | |
3 diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c | |
4 index b86a547..0f41df9 100644 | |
5 --- a/third_party/libxml/encoding.c | |
6 +++ b/third_party/libxml/encoding.c | |
7 @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; | |
8 static int xmlCharEncodingAliasesNb = 0; | |
9 static int xmlCharEncodingAliasesMax = 0; | |
10 | |
11 -#ifdef LIBXML_ICONV_ENABLED | |
12 +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | |
13 #if 0 | |
14 #define DEBUG_ENCODING /* Define this to get encoding traces */ | |
15 #endif | |
16 @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) | |
17 NULL, 0, val, NULL, NULL, 0, 0, msg, val); | |
18 } | |
19 | |
20 +#ifdef LIBXML_ICU_ENABLED | |
21 +static uconv_t* | |
22 +openIcuConverter(const char* name, int toUnicode) | |
23 +{ | |
24 + UErrorCode status = U_ZERO_ERROR; | |
25 + uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | |
26 + if (conv == NULL) | |
27 + return NULL; | |
28 + | |
29 + conv->uconv = ucnv_open(name, &status); | |
30 + if (U_FAILURE(status)) | |
31 + goto error; | |
32 + | |
33 + status = U_ZERO_ERROR; | |
34 + if (toUnicode) { | |
35 + ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, | |
36 + NULL, NULL, NULL, &status); | |
37 + } | |
38 + else { | |
39 + ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, | |
40 + NULL, NULL, NULL, &status); | |
41 + } | |
42 + if (U_FAILURE(status)) | |
43 + goto error; | |
44 + | |
45 + status = U_ZERO_ERROR; | |
46 + conv->utf8 = ucnv_open("UTF-8", &status); | |
47 + if (U_SUCCESS(status)) | |
48 + return conv; | |
49 + | |
50 +error: | |
51 + if (conv->uconv) | |
52 + ucnv_close(conv->uconv); | |
53 + xmlFree(conv); | |
54 + return NULL; | |
55 +} | |
56 + | |
57 +static void | |
58 +closeIcuConverter(uconv_t *conv) | |
59 +{ | |
60 + if (conv != NULL) { | |
61 + ucnv_close(conv->uconv); | |
62 + ucnv_close(conv->utf8); | |
63 + xmlFree(conv); | |
64 + } | |
65 +} | |
66 +#endif /* LIBXML_ICU_ENABLED */ | |
67 + | |
68 /************************************************************************ | |
69 * * | |
70 * Conversions To/From UTF8 encoding * | |
71 @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, | |
72 #ifdef LIBXML_ICONV_ENABLED | |
73 handler->iconv_in = NULL; | |
74 handler->iconv_out = NULL; | |
75 -#endif /* LIBXML_ICONV_ENABLED */ | |
76 +#endif | |
77 +#ifdef LIBXML_ICU_ENABLED | |
78 + handler->uconv_in = NULL; | |
79 + handler->uconv_out = NULL; | |
80 +#endif | |
81 | |
82 /* | |
83 * registers and returns the handler. | |
84 @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { | |
85 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | |
86 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | |
87 #endif /* LIBXML_OUTPUT_ENABLED */ | |
88 -#ifndef LIBXML_ICONV_ENABLED | |
89 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
90 #ifdef LIBXML_ISO8859X_ENABLED | |
91 xmlRegisterCharEncodingHandlersISO8859x (); | |
92 #endif | |
93 @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { | |
94 xmlCharEncodingHandlerPtr enc; | |
95 iconv_t icv_in, icv_out; | |
96 #endif /* LIBXML_ICONV_ENABLED */ | |
97 +#ifdef LIBXML_ICU_ENABLED | |
98 + xmlCharEncodingHandlerPtr enc; | |
99 + uconv_t *ucv_in, *ucv_out; | |
100 +#endif /* LIBXML_ICU_ENABLED */ | |
101 char upper[100]; | |
102 int i; | |
103 | |
104 @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { | |
105 "iconv : problems with filters for '%s'\n", name); | |
106 } | |
107 #endif /* LIBXML_ICONV_ENABLED */ | |
108 +#ifdef LIBXML_ICU_ENABLED | |
109 + /* check whether icu can handle this */ | |
110 + ucv_in = openIcuConverter(name, 1); | |
111 + ucv_out = openIcuConverter(name, 0); | |
112 + if (ucv_in != NULL && ucv_out != NULL) { | |
113 + enc = (xmlCharEncodingHandlerPtr) | |
114 + xmlMalloc(sizeof(xmlCharEncodingHandler)); | |
115 + if (enc == NULL) { | |
116 + closeIcuConverter(ucv_in); | |
117 + closeIcuConverter(ucv_out); | |
118 + return(NULL); | |
119 + } | |
120 + enc->name = xmlMemStrdup(name); | |
121 + enc->input = NULL; | |
122 + enc->output = NULL; | |
123 + enc->uconv_in = ucv_in; | |
124 + enc->uconv_out = ucv_out; | |
125 +#ifdef DEBUG_ENCODING | |
126 + xmlGenericError(xmlGenericErrorContext, | |
127 + "Found ICU converter handler for encoding %s\n", name); | |
128 +#endif | |
129 + return enc; | |
130 + } else if (ucv_in != NULL || ucv_out != NULL) { | |
131 + closeIcuConverter(ucv_in); | |
132 + closeIcuConverter(ucv_out); | |
133 + xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | |
134 + "ICU converter : problems with filters for '%s'\n", name); | |
135 + } | |
136 +#endif /* LIBXML_ICU_ENABLED */ | |
137 | |
138 #ifdef DEBUG_ENCODING | |
139 xmlGenericError(xmlGenericErrorContext, | |
140 @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outl en, | |
141 | |
142 /************************************************************************ | |
143 * * | |
144 + * ICU based generic conversion functions * | |
145 + * * | |
146 + ************************************************************************/ | |
147 + | |
148 +#ifdef LIBXML_ICU_ENABLED | |
149 +/** | |
150 + * xmlUconvWrapper: | |
151 + * @cd: ICU uconverter data structure | |
152 + * @toUnicode : non-zero if toUnicode. 0 otherwise. | |
153 + * @out: a pointer to an array of bytes to store the result | |
154 + * @outlen: the length of @out | |
155 + * @in: a pointer to an array of ISO Latin 1 chars | |
156 + * @inlen: the length of @in | |
157 + * | |
158 + * Returns 0 if success, or | |
159 + * -1 by lack of space, or | |
160 + * -2 if the transcoding fails (for *in is not valid utf8 string or | |
161 + * the result of transformation can't fit into the encoding we want), or | |
162 + * -3 if there the last byte can't form a single output char. | |
163 + * | |
164 + * The value of @inlen after return is the number of octets consumed | |
165 + * as the return value is positive, else unpredictable. | |
166 + * The value of @outlen after return is the number of ocetes consumed. | |
167 + */ | |
168 +static int | |
169 +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | |
170 + const unsigned char *in, int *inlen) { | |
171 + const char *ucv_in = (const char *) in; | |
172 + char *ucv_out = (char *) out; | |
173 + UErrorCode err = U_ZERO_ERROR; | |
174 + | |
175 + if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | |
176 + if (outlen != NULL) *outlen = 0; | |
177 + return(-1); | |
178 + } | |
179 + | |
180 + /* | |
181 + * TODO(jungshik) | |
182 + * 1. is ucnv_convert(To|From)Algorithmic better? | |
183 + * 2. had we better use an explicit pivot buffer? | |
184 + * 3. error returned comes from 'fromUnicode' only even | |
185 + * when toUnicode is true ! | |
186 + */ | |
187 + if (toUnicode) { | |
188 + /* encoding => UTF-16 => UTF-8 */ | |
189 + ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | |
190 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
191 + 0, TRUE, &err); | |
192 + } else { | |
193 + /* UTF-8 => UTF-16 => encoding */ | |
194 + ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | |
195 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
196 + 0, TRUE, &err); | |
197 + } | |
198 + *inlen = ucv_in - (const char*) in; | |
199 + *outlen = ucv_out - (char *) out; | |
200 + if (U_SUCCESS(err)) | |
201 + return 0; | |
202 + if (err == U_BUFFER_OVERFLOW_ERROR) | |
203 + return -1; | |
204 + if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | |
205 + return -2; | |
206 + /* if (err == U_TRUNCATED_CHAR_FOUND) */ | |
207 + return -3; | |
208 +} | |
209 +#endif /* LIBXML_ICU_ENABLED */ | |
210 + | |
211 +/************************************************************************ | |
212 + * * | |
213 * The real API used by libxml for on-the-fly conversion * | |
214 * * | |
215 ************************************************************************/ | |
216 @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, x mlBufferPtr out, | |
217 if (ret == -1) ret = -3; | |
218 } | |
219 #endif /* LIBXML_ICONV_ENABLED */ | |
220 +#ifdef LIBXML_ICU_ENABLED | |
221 + else if (handler->uconv_in != NULL) { | |
222 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
223 + &written, in->content, &toconv); | |
224 + xmlBufferShrink(in, toconv); | |
225 + out->use += written; | |
226 + out->content[out->use] = 0; | |
227 + if (ret == -1) ret = -3; | |
228 + } | |
229 +#endif /* LIBXML_ICU_ENABLED */ | |
230 #ifdef DEBUG_ENCODING | |
231 switch (ret) { | |
232 case 0: | |
233 @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBuf ferPtr out, | |
234 ret = -3; | |
235 } | |
236 #endif /* LIBXML_ICONV_ENABLED */ | |
237 +#ifdef LIBXML_ICU_ENABLED | |
238 + else if (handler->uconv_in != NULL) { | |
239 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
240 + &written, in->content, &toconv); | |
241 + xmlBufferShrink(in, toconv); | |
242 + out->use += written; | |
243 + out->content[out->use] = 0; | |
244 + if (ret == -1) | |
245 + ret = -3; | |
246 + } | |
247 +#endif /* LIBXML_ICU_ENABLED */ | |
248 switch (ret) { | |
249 case 0: | |
250 #ifdef DEBUG_ENCODING | |
251 @@ -2015,6 +2190,15 @@ retry: | |
252 out->content[out->use] = 0; | |
253 } | |
254 #endif /* LIBXML_ICONV_ENABLED */ | |
255 +#ifdef LIBXML_ICU_ENABLED | |
256 + else if (handler->uconv_out != NULL) { | |
257 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
258 + &out->content[out->use], | |
259 + &written, NULL, &toconv); | |
260 + out->use += written; | |
261 + out->content[out->use] = 0; | |
262 + } | |
263 +#endif /* LIBXML_ICU_ENABLED */ | |
264 #ifdef DEBUG_ENCODING | |
265 xmlGenericError(xmlGenericErrorContext, | |
266 "initialized encoder\n"); | |
267 @@ -2061,6 +2245,26 @@ retry: | |
268 } | |
269 } | |
270 #endif /* LIBXML_ICONV_ENABLED */ | |
271 +#ifdef LIBXML_ICU_ENABLED | |
272 + else if (handler->uconv_out != NULL) { | |
273 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
274 + &out->content[out->use], | |
275 + &written, in->content, &toconv); | |
276 + xmlBufferShrink(in, toconv); | |
277 + out->use += written; | |
278 + writtentot += written; | |
279 + out->content[out->use] = 0; | |
280 + if (ret == -1) { | |
281 + if (written > 0) { | |
282 + /* | |
283 + * Can be a limitation of iconv | |
284 + */ | |
285 + goto retry; | |
286 + } | |
287 + ret = -3; | |
288 + } | |
289 + } | |
290 +#endif /* LIBXML_ICU_ENABLED */ | |
291 else { | |
292 xmlEncodingErr(XML_I18N_NO_OUTPUT, | |
293 "xmlCharEncOutFunc: no output function !\n", NULL); | |
294 @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { | |
295 xmlFree(handler); | |
296 } | |
297 #endif /* LIBXML_ICONV_ENABLED */ | |
298 +#ifdef LIBXML_ICU_ENABLED | |
299 + if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | |
300 + if (handler->name != NULL) | |
301 + xmlFree(handler->name); | |
302 + handler->name = NULL; | |
303 + if (handler->uconv_out != NULL) { | |
304 + closeIcuConverter(handler->uconv_out); | |
305 + handler->uconv_out = NULL; | |
306 + } | |
307 + if (handler->uconv_in != NULL) { | |
308 + closeIcuConverter(handler->uconv_in); | |
309 + handler->uconv_in = NULL; | |
310 + } | |
311 + xmlFree(handler); | |
312 + } | |
313 +#endif | |
314 #ifdef DEBUG_ENCODING | |
315 if (ret) | |
316 xmlGenericError(xmlGenericErrorContext, | |
317 @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
318 cur += toconv; | |
319 } while (ret == -2); | |
320 #endif | |
321 +#ifdef LIBXML_ICU_ENABLED | |
322 + } else if (handler->uconv_out != NULL) { | |
323 + do { | |
324 + toconv = in->end - cur; | |
325 + written = 32000; | |
326 + ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | |
327 + &written, cur, &toconv); | |
328 + if (ret < 0) { | |
329 + if (written > 0) | |
330 + ret = -2; | |
331 + else | |
332 + return(-1); | |
333 + } | |
334 + unused += written; | |
335 + cur += toconv; | |
336 + } while (ret == -2); | |
337 } else { | |
338 /* could not find a converter */ | |
339 return(-1); | |
340 @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
341 } | |
342 return(in->consumed + (in->cur - in->base)); | |
343 } | |
344 +#endif | |
345 | |
346 -#ifndef LIBXML_ICONV_ENABLED | |
347 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
348 #ifdef LIBXML_ISO8859X_ENABLED | |
349 | |
350 /** | |
351 diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/i nclude/libxml/encoding.h | |
352 index c74b25f..b5f8b48 100644 | |
353 --- a/third_party/libxml/include/libxml/encoding.h | |
354 +++ b/third_party/libxml/include/libxml/encoding.h | |
355 @@ -26,6 +26,24 @@ | |
356 | |
357 #ifdef LIBXML_ICONV_ENABLED | |
358 #include <iconv.h> | |
359 +#else | |
360 +#ifdef LIBXML_ICU_ENABLED | |
361 +#include <unicode/ucnv.h> | |
362 +#if 0 | |
363 +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | |
364 + * to prevent unwanted ICU symbols being exposed to users of libxml2. | |
365 + * One particular case is Qt4 conflicting on UChar32. | |
366 + */ | |
367 +#include <stdint.h> | |
368 +struct UConverter; | |
369 +typedef struct UConverter UConverter; | |
370 +#ifdef _MSC_VER | |
371 +typedef wchar_t UChar; | |
372 +#else | |
373 +typedef uint16_t UChar; | |
374 +#endif | |
375 +#endif | |
376 +#endif | |
377 #endif | |
378 #ifdef __cplusplus | |
379 extern "C" { | |
380 @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *ou t, int *outlen, | |
381 * Block defining the handlers for non UTF-8 encodings. | |
382 * If iconv is supported, there are two extra fields. | |
383 */ | |
384 +#ifdef LIBXML_ICU_ENABLED | |
385 +struct _uconv_t { | |
386 + UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
387 + UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
388 +}; | |
389 +typedef struct _uconv_t uconv_t; | |
390 +#endif | |
391 | |
392 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
393 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
394 @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { | |
395 iconv_t iconv_in; | |
396 iconv_t iconv_out; | |
397 #endif /* LIBXML_ICONV_ENABLED */ | |
398 +#ifdef LIBXML_ICU_ENABLED | |
399 + uconv_t *uconv_in; | |
400 + uconv_t *uconv_out; | |
401 +#endif /* LIBXML_ICU_ENABLED */ | |
402 }; | |
403 | |
404 #ifdef __cplusplus | |
405 diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/inc lude/libxml/parser.h | |
406 index dd79c42..3580b63 100644 | |
407 --- a/third_party/libxml/include/libxml/parser.h | |
408 +++ b/third_party/libxml/include/libxml/parser.h | |
409 @@ -1222,6 +1222,7 @@ typedef enum { | |
410 XML_WITH_DEBUG_MEM = 29, | |
411 XML_WITH_DEBUG_RUN = 30, | |
412 XML_WITH_ZLIB = 31, | |
413 + XML_WITH_ICU = 32, | |
414 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | |
415 } xmlFeature; | |
416 | |
417 diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/lib xml/include/libxml/xmlversion.h.in | |
418 index 4739f3a..de310ab 100644 | |
419 --- a/third_party/libxml/include/libxml/xmlversion.h.in | |
420 +++ b/third_party/libxml/include/libxml/xmlversion.h.in | |
421 @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); | |
422 #endif | |
423 | |
424 /** | |
425 + * LIBXML_ICU_ENABLED: | |
426 + * | |
427 + * Whether icu support is available | |
428 + */ | |
429 +#if @WITH_ICU@ | |
430 +#define LIBXML_ICU_ENABLED | |
431 +#endif | |
432 + | |
433 +/** | |
434 * LIBXML_ISO8859X_ENABLED: | |
435 * | |
436 * Whether ISO-8859-* support is made available in case iconv is not | |
437 diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c | |
438 index 85e7599..3ba2a06 100644 | |
439 --- a/third_party/libxml/parser.c | |
440 +++ b/third_party/libxml/parser.c | |
441 @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) | |
442 #else | |
443 return(0); | |
444 #endif | |
445 + case XML_WITH_ICU: | |
446 +#ifdef LIBXML_ICU_ENABLED | |
447 + return(1); | |
448 +#else | |
449 + return(0); | |
450 +#endif | |
451 default: | |
452 break; | |
453 } | |
454 diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu | |
jungshik at Google
2010/07/12 18:23:29
I guess you don't want to include this diff in the
| |
455 index 324cea3..6c22c3c 100644 | |
456 --- a/third_party/libxml/patches/icu | |
457 +++ b/third_party/libxml/patches/icu | |
458 @@ -1,434 +0,0 @@ | |
459 -Code support for ICU. Note that this relies on modifications to the | |
460 -build environment (either configure or configure.js on Windows). | |
461 - | |
462 -Index: libxml/encoding.c | |
463 -=================================================================== | |
464 ---- libxml.orig/encoding.c 2010-07-09 14:48:28.881863834 -0700 | |
465 -+++ libxml/encoding.c 2010-07-09 14:49:23.479741318 -0700 | |
466 -@@ -58,7 +58,7 @@ | |
467 - static int xmlCharEncodingAliasesNb = 0; | |
468 - static int xmlCharEncodingAliasesMax = 0; | |
469 - | |
470 --#ifdef LIBXML_ICONV_ENABLED | |
471 -+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | |
472 - #if 0 | |
473 - #define DEBUG_ENCODING /* Define this to get encoding traces */ | |
474 - #endif | |
475 -@@ -97,6 +97,54 @@ | |
476 - NULL, 0, val, NULL, NULL, 0, 0, msg, val); | |
477 - } | |
478 - | |
479 -+#ifdef LIBXML_ICU_ENABLED | |
480 -+static uconv_t* | |
481 -+openIcuConverter(const char* name, int toUnicode) | |
482 -+{ | |
483 -+ UErrorCode status = U_ZERO_ERROR; | |
484 -+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | |
485 -+ if (conv == NULL) | |
486 -+ return NULL; | |
487 -+ | |
488 -+ conv->uconv = ucnv_open(name, &status); | |
489 -+ if (U_FAILURE(status)) | |
490 -+ goto error; | |
491 -+ | |
492 -+ status = U_ZERO_ERROR; | |
493 -+ if (toUnicode) { | |
494 -+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, | |
495 -+ NULL, NULL, NULL, &status); | |
496 -+ } | |
497 -+ else { | |
498 -+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, | |
499 -+ NULL, NULL, NULL, &status); | |
500 -+ } | |
501 -+ if (U_FAILURE(status)) | |
502 -+ goto error; | |
503 -+ | |
504 -+ status = U_ZERO_ERROR; | |
505 -+ conv->utf8 = ucnv_open("UTF-8", &status); | |
506 -+ if (U_SUCCESS(status)) | |
507 -+ return conv; | |
508 -+ | |
509 -+error: | |
510 -+ if (conv->uconv) | |
511 -+ ucnv_close(conv->uconv); | |
512 -+ xmlFree(conv); | |
513 -+ return NULL; | |
514 -+} | |
515 -+ | |
516 -+static void | |
517 -+closeIcuConverter(uconv_t *conv) | |
518 -+{ | |
519 -+ if (conv != NULL) { | |
520 -+ ucnv_close(conv->uconv); | |
521 -+ ucnv_close(conv->utf8); | |
522 -+ xmlFree(conv); | |
523 -+ } | |
524 -+} | |
525 -+#endif /* LIBXML_ICU_ENABLED */ | |
526 -+ | |
527 - /************************************************************************ | |
528 - * * | |
529 - * Conversions To/From UTF8 encoding * | |
530 -@@ -1306,7 +1354,11 @@ | |
531 - #ifdef LIBXML_ICONV_ENABLED | |
532 - handler->iconv_in = NULL; | |
533 - handler->iconv_out = NULL; | |
534 --#endif /* LIBXML_ICONV_ENABLED */ | |
535 -+#endif | |
536 -+#ifdef LIBXML_ICU_ENABLED | |
537 -+ handler->uconv_in = NULL; | |
538 -+ handler->uconv_out = NULL; | |
539 -+#endif | |
540 - | |
541 - /* | |
542 - * registers and returns the handler. | |
543 -@@ -1371,7 +1423,7 @@ | |
544 - xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | |
545 - xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | |
546 - #endif /* LIBXML_OUTPUT_ENABLED */ | |
547 --#ifndef LIBXML_ICONV_ENABLED | |
548 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
549 - #ifdef LIBXML_ISO8859X_ENABLED | |
550 - xmlRegisterCharEncodingHandlersISO8859x (); | |
551 - #endif | |
552 -@@ -1578,6 +1630,10 @@ | |
553 - xmlCharEncodingHandlerPtr enc; | |
554 - iconv_t icv_in, icv_out; | |
555 - #endif /* LIBXML_ICONV_ENABLED */ | |
556 -+#ifdef LIBXML_ICU_ENABLED | |
557 -+ xmlCharEncodingHandlerPtr enc; | |
558 -+ uconv_t *ucv_in, *ucv_out; | |
559 -+#endif /* LIBXML_ICU_ENABLED */ | |
560 - char upper[100]; | |
561 - int i; | |
562 - | |
563 -@@ -1647,6 +1703,35 @@ | |
564 - "iconv : problems with filters for '%s'\n", name); | |
565 - } | |
566 - #endif /* LIBXML_ICONV_ENABLED */ | |
567 -+#ifdef LIBXML_ICU_ENABLED | |
568 -+ /* check whether icu can handle this */ | |
569 -+ ucv_in = openIcuConverter(name, 1); | |
570 -+ ucv_out = openIcuConverter(name, 0); | |
571 -+ if (ucv_in != NULL && ucv_out != NULL) { | |
572 -+ enc = (xmlCharEncodingHandlerPtr) | |
573 -+ xmlMalloc(sizeof(xmlCharEncodingHandler)); | |
574 -+ if (enc == NULL) { | |
575 -+ closeIcuConverter(ucv_in); | |
576 -+ closeIcuConverter(ucv_out); | |
577 -+ return(NULL); | |
578 -+ } | |
579 -+ enc->name = xmlMemStrdup(name); | |
580 -+ enc->input = NULL; | |
581 -+ enc->output = NULL; | |
582 -+ enc->uconv_in = ucv_in; | |
583 -+ enc->uconv_out = ucv_out; | |
584 -+#ifdef DEBUG_ENCODING | |
585 -+ xmlGenericError(xmlGenericErrorContext, | |
586 -+ "Found ICU converter handler for encoding %s\n", name); | |
587 -+#endif | |
588 -+ return enc; | |
589 -+ } else if (ucv_in != NULL || ucv_out != NULL) { | |
590 -+ closeIcuConverter(ucv_in); | |
591 -+ closeIcuConverter(ucv_out); | |
592 -+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | |
593 -+ "ICU converter : problems with filters for '%s'\n", name); | |
594 -+ } | |
595 -+#endif /* LIBXML_ICU_ENABLED */ | |
596 - | |
597 - #ifdef DEBUG_ENCODING | |
598 - xmlGenericError(xmlGenericErrorContext, | |
599 -@@ -1737,6 +1822,75 @@ | |
600 - | |
601 - /************************************************************************ | |
602 - * * | |
603 -+ * ICU based generic conversion functions * | |
604 -+ * * | |
605 -+ ************************************************************************/ | |
606 -+ | |
607 -+#ifdef LIBXML_ICU_ENABLED | |
608 -+/** | |
609 -+ * xmlUconvWrapper: | |
610 -+ * @cd: ICU uconverter data structure | |
611 -+ * @toUnicode : non-zero if toUnicode. 0 otherwise. | |
612 -+ * @out: a pointer to an array of bytes to store the result | |
613 -+ * @outlen: the length of @out | |
614 -+ * @in: a pointer to an array of ISO Latin 1 chars | |
615 -+ * @inlen: the length of @in | |
616 -+ * | |
617 -+ * Returns 0 if success, or | |
618 -+ * -1 by lack of space, or | |
619 -+ * -2 if the transcoding fails (for *in is not valid utf8 string or | |
620 -+ * the result of transformation can't fit into the encoding we want), o r | |
621 -+ * -3 if there the last byte can't form a single output char. | |
622 -+ * | |
623 -+ * The value of @inlen after return is the number of octets consumed | |
624 -+ * as the return value is positive, else unpredictable. | |
625 -+ * The value of @outlen after return is the number of ocetes consumed. | |
626 -+ */ | |
627 -+static int | |
628 -+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | |
629 -+ const unsigned char *in, int *inlen) { | |
630 -+ const char *ucv_in = (const char *) in; | |
631 -+ char *ucv_out = (char *) out; | |
632 -+ UErrorCode err = U_ZERO_ERROR; | |
633 -+ | |
634 -+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | |
635 -+ if (outlen != NULL) *outlen = 0; | |
636 -+ return(-1); | |
637 -+ } | |
638 -+ | |
639 -+ /* | |
640 -+ * TODO(jungshik) | |
641 -+ * 1. is ucnv_convert(To|From)Algorithmic better? | |
642 -+ * 2. had we better use an explicit pivot buffer? | |
643 -+ * 3. error returned comes from 'fromUnicode' only even | |
644 -+ * when toUnicode is true ! | |
645 -+ */ | |
646 -+ if (toUnicode) { | |
647 -+ /* encoding => UTF-16 => UTF-8 */ | |
648 -+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | |
649 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
650 -+ 0, TRUE, &err); | |
651 -+ } else { | |
652 -+ /* UTF-8 => UTF-16 => encoding */ | |
653 -+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | |
654 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
655 -+ 0, TRUE, &err); | |
656 -+ } | |
657 -+ *inlen = ucv_in - (const char*) in; | |
658 -+ *outlen = ucv_out - (char *) out; | |
659 -+ if (U_SUCCESS(err)) | |
660 -+ return 0; | |
661 -+ if (err == U_BUFFER_OVERFLOW_ERROR) | |
662 -+ return -1; | |
663 -+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | |
664 -+ return -2; | |
665 -+ /* if (err == U_TRUNCATED_CHAR_FOUND) */ | |
666 -+ return -3; | |
667 -+} | |
668 -+#endif /* LIBXML_ICU_ENABLED */ | |
669 -+ | |
670 -+/************************************************************************ | |
671 -+ * * | |
672 - * The real API used by libxml for on-the-fly conversion * | |
673 - * * | |
674 - ************************************************************************/ | |
675 -@@ -1810,6 +1964,16 @@ | |
676 - if (ret == -1) ret = -3; | |
677 - } | |
678 - #endif /* LIBXML_ICONV_ENABLED */ | |
679 -+#ifdef LIBXML_ICU_ENABLED | |
680 -+ else if (handler->uconv_in != NULL) { | |
681 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
682 -+ &written, in->content, &toconv); | |
683 -+ xmlBufferShrink(in, toconv); | |
684 -+ out->use += written; | |
685 -+ out->content[out->use] = 0; | |
686 -+ if (ret == -1) ret = -3; | |
687 -+ } | |
688 -+#endif /* LIBXML_ICU_ENABLED */ | |
689 - #ifdef DEBUG_ENCODING | |
690 - switch (ret) { | |
691 - case 0: | |
692 -@@ -1915,6 +2079,17 @@ | |
693 - ret = -3; | |
694 - } | |
695 - #endif /* LIBXML_ICONV_ENABLED */ | |
696 -+#ifdef LIBXML_ICU_ENABLED | |
697 -+ else if (handler->uconv_in != NULL) { | |
698 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
699 -+ &written, in->content, &toconv); | |
700 -+ xmlBufferShrink(in, toconv); | |
701 -+ out->use += written; | |
702 -+ out->content[out->use] = 0; | |
703 -+ if (ret == -1) | |
704 -+ ret = -3; | |
705 -+ } | |
706 -+#endif /* LIBXML_ICU_ENABLED */ | |
707 - switch (ret) { | |
708 - case 0: | |
709 - #ifdef DEBUG_ENCODING | |
710 -@@ -2015,6 +2190,15 @@ | |
711 - out->content[out->use] = 0; | |
712 - } | |
713 - #endif /* LIBXML_ICONV_ENABLED */ | |
714 -+#ifdef LIBXML_ICU_ENABLED | |
715 -+ else if (handler->uconv_out != NULL) { | |
716 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, | |
717 -+ &out->content[out->use], | |
718 -+ &written, NULL, &toconv); | |
719 -+ out->use += written; | |
720 -+ out->content[out->use] = 0; | |
721 -+ } | |
722 -+#endif /* LIBXML_ICU_ENABLED */ | |
723 - #ifdef DEBUG_ENCODING | |
724 - xmlGenericError(xmlGenericErrorContext, | |
725 - "initialized encoder\n"); | |
726 -@@ -2061,6 +2245,26 @@ | |
727 - } | |
728 - } | |
729 - #endif /* LIBXML_ICONV_ENABLED */ | |
730 -+#ifdef LIBXML_ICU_ENABLED | |
731 -+ else if (handler->uconv_out != NULL) { | |
732 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, | |
733 -+ &out->content[out->use], | |
734 -+ &written, in->content, &toconv); | |
735 -+ xmlBufferShrink(in, toconv); | |
736 -+ out->use += written; | |
737 -+ writtentot += written; | |
738 -+ out->content[out->use] = 0; | |
739 -+ if (ret == -1) { | |
740 -+ if (written > 0) { | |
741 -+ /* | |
742 -+ * Can be a limitation of iconv | |
743 -+ */ | |
744 -+ goto retry; | |
745 -+ } | |
746 -+ ret = -3; | |
747 -+ } | |
748 -+ } | |
749 -+#endif /* LIBXML_ICU_ENABLED */ | |
750 - else { | |
751 - xmlEncodingErr(XML_I18N_NO_OUTPUT, | |
752 - "xmlCharEncOutFunc: no output function !\n", NULL); | |
753 -@@ -2173,6 +2377,22 @@ | |
754 - xmlFree(handler); | |
755 - } | |
756 - #endif /* LIBXML_ICONV_ENABLED */ | |
757 -+#ifdef LIBXML_ICU_ENABLED | |
758 -+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | |
759 -+ if (handler->name != NULL) | |
760 -+ xmlFree(handler->name); | |
761 -+ handler->name = NULL; | |
762 -+ if (handler->uconv_out != NULL) { | |
763 -+ closeIcuConverter(handler->uconv_out); | |
764 -+ handler->uconv_out = NULL; | |
765 -+ } | |
766 -+ if (handler->uconv_in != NULL) { | |
767 -+ closeIcuConverter(handler->uconv_in); | |
768 -+ handler->uconv_in = NULL; | |
769 -+ } | |
770 -+ xmlFree(handler); | |
771 -+ } | |
772 -+#endif | |
773 - #ifdef DEBUG_ENCODING | |
774 - if (ret) | |
775 - xmlGenericError(xmlGenericErrorContext, | |
776 -@@ -2248,6 +2468,22 @@ | |
777 - cur += toconv; | |
778 - } while (ret == -2); | |
779 - #endif | |
780 -+#ifdef LIBXML_ICU_ENABLED | |
781 -+ } else if (handler->uconv_out != NULL) { | |
782 -+ do { | |
783 -+ toconv = in->end - cur; | |
784 -+ written = 32000; | |
785 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | |
786 -+ &written, cur, &toconv); | |
787 -+ if (ret < 0) { | |
788 -+ if (written > 0) | |
789 -+ ret = -2; | |
790 -+ else | |
791 -+ return(-1); | |
792 -+ } | |
793 -+ unused += written; | |
794 -+ cur += toconv; | |
795 -+ } while (ret == -2); | |
796 - } else { | |
797 - /* could not find a converter */ | |
798 - return(-1); | |
799 -@@ -2259,8 +2495,9 @@ | |
800 - } | |
801 - return(in->consumed + (in->cur - in->base)); | |
802 - } | |
803 -+#endif | |
804 - | |
805 --#ifndef LIBXML_ICONV_ENABLED | |
806 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
807 - #ifdef LIBXML_ISO8859X_ENABLED | |
808 - | |
809 - /** | |
810 -Index: libxml/include/libxml/encoding.h | |
811 -=================================================================== | |
812 ---- libxml.orig/include/libxml/encoding.h 2010-07-09 14:50:27.503114118 -0 700 | |
813 -+++ libxml/include/libxml/encoding.h 2010-07-09 14:53:27.251611643 -0700 | |
814 -@@ -26,6 +26,24 @@ | |
815 - | |
816 - #ifdef LIBXML_ICONV_ENABLED | |
817 - #include <iconv.h> | |
818 -+#else | |
819 -+#ifdef LIBXML_ICU_ENABLED | |
820 -+#include <unicode/ucnv.h> | |
821 -+#if 0 | |
822 -+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | |
823 -+ * to prevent unwanted ICU symbols being exposed to users of libxml2. | |
824 -+ * One particular case is Qt4 conflicting on UChar32. | |
825 -+ */ | |
826 -+#include <stdint.h> | |
827 -+struct UConverter; | |
828 -+typedef struct UConverter UConverter; | |
829 -+#ifdef _MSC_VER | |
830 -+typedef wchar_t UChar; | |
831 -+#else | |
832 -+typedef uint16_t UChar; | |
833 -+#endif | |
834 -+#endif | |
835 -+#endif | |
836 - #endif | |
837 - #ifdef __cplusplus | |
838 - extern "C" { | |
839 -@@ -125,6 +143,13 @@ | |
840 - * Block defining the handlers for non UTF-8 encodings. | |
841 - * If iconv is supported, there are two extra fields. | |
842 - */ | |
843 -+#ifdef LIBXML_ICU_ENABLED | |
844 -+struct _uconv_t { | |
845 -+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
846 -+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
847 -+}; | |
848 -+typedef struct _uconv_t uconv_t; | |
849 -+#endif | |
850 - | |
851 - typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
852 - typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
853 -@@ -136,6 +161,10 @@ | |
854 - iconv_t iconv_in; | |
855 - iconv_t iconv_out; | |
856 - #endif /* LIBXML_ICONV_ENABLED */ | |
857 -+#ifdef LIBXML_ICU_ENABLED | |
858 -+ uconv_t *uconv_in; | |
859 -+ uconv_t *uconv_out; | |
860 -+#endif /* LIBXML_ICU_ENABLED */ | |
861 - }; | |
862 - | |
863 - #ifdef __cplusplus | |
864 -Index: libxml/include/libxml/parser.h | |
865 -=================================================================== | |
866 ---- libxml.orig/include/libxml/parser.h 2010-07-09 14:51:21.190673740 -0 700 | |
867 -+++ libxml/include/libxml/parser.h 2010-07-09 14:53:19.571862214 -0700 | |
868 -@@ -1222,6 +1222,7 @@ | |
869 - XML_WITH_DEBUG_MEM = 29, | |
870 - XML_WITH_DEBUG_RUN = 30, | |
871 - XML_WITH_ZLIB = 31, | |
872 -+ XML_WITH_ICU = 32, | |
873 - XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | |
874 - } xmlFeature; | |
875 - | |
876 -Index: libxml/parser.c | |
877 -=================================================================== | |
878 ---- libxml.orig/parser.c 2010-07-09 14:52:15.150057108 -0700 | |
879 -+++ libxml/parser.c 2010-07-09 14:53:06.190137405 -0700 | |
880 -@@ -954,6 +954,12 @@ | |
881 - #else | |
882 - return(0); | |
883 - #endif | |
884 -+ case XML_WITH_ICU: | |
885 -+#ifdef LIBXML_ICU_ENABLED | |
886 -+ return(1); | |
887 -+#else | |
888 -+ return(0); | |
889 -+#endif | |
890 - default: | |
891 - break; | |
892 - } | |
OLD | NEW |