Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(72)

Side by Side Diff: third_party/libxml/patches/icu

Issue 2951008: Update libxml to 2.7.7. (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/patches/LoadLibraryA ('k') | third_party/libxml/patches/icu-configure » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 Add code support for ICU.
2
3 diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c
4 index b86a547..0f41df9 100644
5 --- a/third_party/libxml/encoding.c
6 +++ b/third_party/libxml/encoding.c
7 @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
8 static int xmlCharEncodingAliasesNb = 0;
9 static int xmlCharEncodingAliasesMax = 0;
10
11 -#ifdef LIBXML_ICONV_ENABLED
12 +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
13 #if 0
14 #define DEBUG_ENCODING /* Define this to get encoding traces */
15 #endif
16 @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
17 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
18 }
19
20 +#ifdef LIBXML_ICU_ENABLED
21 +static uconv_t*
22 +openIcuConverter(const char* name, int toUnicode)
23 +{
24 + UErrorCode status = U_ZERO_ERROR;
25 + uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
26 + if (conv == NULL)
27 + return NULL;
28 +
29 + conv->uconv = ucnv_open(name, &status);
30 + if (U_FAILURE(status))
31 + goto error;
32 +
33 + status = U_ZERO_ERROR;
34 + if (toUnicode) {
35 + ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
36 + NULL, NULL, NULL, &status);
37 + }
38 + else {
39 + ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
40 + NULL, NULL, NULL, &status);
41 + }
42 + if (U_FAILURE(status))
43 + goto error;
44 +
45 + status = U_ZERO_ERROR;
46 + conv->utf8 = ucnv_open("UTF-8", &status);
47 + if (U_SUCCESS(status))
48 + return conv;
49 +
50 +error:
51 + if (conv->uconv)
52 + ucnv_close(conv->uconv);
53 + xmlFree(conv);
54 + return NULL;
55 +}
56 +
57 +static void
58 +closeIcuConverter(uconv_t *conv)
59 +{
60 + if (conv != NULL) {
61 + ucnv_close(conv->uconv);
62 + ucnv_close(conv->utf8);
63 + xmlFree(conv);
64 + }
65 +}
66 +#endif /* LIBXML_ICU_ENABLED */
67 +
68 /************************************************************************
69 * *
70 * Conversions To/From UTF8 encoding *
71 @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name,
72 #ifdef LIBXML_ICONV_ENABLED
73 handler->iconv_in = NULL;
74 handler->iconv_out = NULL;
75 -#endif /* LIBXML_ICONV_ENABLED */
76 +#endif
77 +#ifdef LIBXML_ICU_ENABLED
78 + handler->uconv_in = NULL;
79 + handler->uconv_out = NULL;
80 +#endif
81
82 /*
83 * registers and returns the handler.
84 @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) {
85 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
86 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
87 #endif /* LIBXML_OUTPUT_ENABLED */
88 -#ifndef LIBXML_ICONV_ENABLED
89 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
90 #ifdef LIBXML_ISO8859X_ENABLED
91 xmlRegisterCharEncodingHandlersISO8859x ();
92 #endif
93 @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) {
94 xmlCharEncodingHandlerPtr enc;
95 iconv_t icv_in, icv_out;
96 #endif /* LIBXML_ICONV_ENABLED */
97 +#ifdef LIBXML_ICU_ENABLED
98 + xmlCharEncodingHandlerPtr enc;
99 + uconv_t *ucv_in, *ucv_out;
100 +#endif /* LIBXML_ICU_ENABLED */
101 char upper[100];
102 int i;
103
104 @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) {
105 "iconv : problems with filters for '%s'\n", name);
106 }
107 #endif /* LIBXML_ICONV_ENABLED */
108 +#ifdef LIBXML_ICU_ENABLED
109 + /* check whether icu can handle this */
110 + ucv_in = openIcuConverter(name, 1);
111 + ucv_out = openIcuConverter(name, 0);
112 + if (ucv_in != NULL && ucv_out != NULL) {
113 + enc = (xmlCharEncodingHandlerPtr)
114 + xmlMalloc(sizeof(xmlCharEncodingHandler));
115 + if (enc == NULL) {
116 + closeIcuConverter(ucv_in);
117 + closeIcuConverter(ucv_out);
118 + return(NULL);
119 + }
120 + enc->name = xmlMemStrdup(name);
121 + enc->input = NULL;
122 + enc->output = NULL;
123 + enc->uconv_in = ucv_in;
124 + enc->uconv_out = ucv_out;
125 +#ifdef DEBUG_ENCODING
126 + xmlGenericError(xmlGenericErrorContext,
127 + "Found ICU converter handler for encoding %s\n", name);
128 +#endif
129 + return enc;
130 + } else if (ucv_in != NULL || ucv_out != NULL) {
131 + closeIcuConverter(ucv_in);
132 + closeIcuConverter(ucv_out);
133 + xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
134 + "ICU converter : problems with filters for '%s'\n", name);
135 + }
136 +#endif /* LIBXML_ICU_ENABLED */
137
138 #ifdef DEBUG_ENCODING
139 xmlGenericError(xmlGenericErrorContext,
140 @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outl en,
141
142 /************************************************************************
143 * *
144 + * ICU based generic conversion functions *
145 + * *
146 + ************************************************************************/
147 +
148 +#ifdef LIBXML_ICU_ENABLED
149 +/**
150 + * xmlUconvWrapper:
151 + * @cd: ICU uconverter data structure
152 + * @toUnicode : non-zero if toUnicode. 0 otherwise.
153 + * @out: a pointer to an array of bytes to store the result
154 + * @outlen: the length of @out
155 + * @in: a pointer to an array of ISO Latin 1 chars
156 + * @inlen: the length of @in
157 + *
158 + * Returns 0 if success, or
159 + * -1 by lack of space, or
160 + * -2 if the transcoding fails (for *in is not valid utf8 string or
161 + * the result of transformation can't fit into the encoding we want), or
162 + * -3 if there the last byte can't form a single output char.
163 + *
164 + * The value of @inlen after return is the number of octets consumed
165 + * as the return value is positive, else unpredictable.
166 + * The value of @outlen after return is the number of ocetes consumed.
167 + */
168 +static int
169 +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
170 + const unsigned char *in, int *inlen) {
171 + const char *ucv_in = (const char *) in;
172 + char *ucv_out = (char *) out;
173 + UErrorCode err = U_ZERO_ERROR;
174 +
175 + if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
176 + if (outlen != NULL) *outlen = 0;
177 + return(-1);
178 + }
179 +
180 + /*
181 + * TODO(jungshik)
182 + * 1. is ucnv_convert(To|From)Algorithmic better?
183 + * 2. had we better use an explicit pivot buffer?
184 + * 3. error returned comes from 'fromUnicode' only even
185 + * when toUnicode is true !
186 + */
187 + if (toUnicode) {
188 + /* encoding => UTF-16 => UTF-8 */
189 + ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
190 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
191 + 0, TRUE, &err);
192 + } else {
193 + /* UTF-8 => UTF-16 => encoding */
194 + ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
195 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
196 + 0, TRUE, &err);
197 + }
198 + *inlen = ucv_in - (const char*) in;
199 + *outlen = ucv_out - (char *) out;
200 + if (U_SUCCESS(err))
201 + return 0;
202 + if (err == U_BUFFER_OVERFLOW_ERROR)
203 + return -1;
204 + if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
205 + return -2;
206 + /* if (err == U_TRUNCATED_CHAR_FOUND) */
207 + return -3;
208 +}
209 +#endif /* LIBXML_ICU_ENABLED */
210 +
211 +/************************************************************************
212 + * *
213 * The real API used by libxml for on-the-fly conversion *
214 * *
215 ************************************************************************/
216 @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, x mlBufferPtr out,
217 if (ret == -1) ret = -3;
218 }
219 #endif /* LIBXML_ICONV_ENABLED */
220 +#ifdef LIBXML_ICU_ENABLED
221 + else if (handler->uconv_in != NULL) {
222 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
223 + &written, in->content, &toconv);
224 + xmlBufferShrink(in, toconv);
225 + out->use += written;
226 + out->content[out->use] = 0;
227 + if (ret == -1) ret = -3;
228 + }
229 +#endif /* LIBXML_ICU_ENABLED */
230 #ifdef DEBUG_ENCODING
231 switch (ret) {
232 case 0:
233 @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBuf ferPtr out,
234 ret = -3;
235 }
236 #endif /* LIBXML_ICONV_ENABLED */
237 +#ifdef LIBXML_ICU_ENABLED
238 + else if (handler->uconv_in != NULL) {
239 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
240 + &written, in->content, &toconv);
241 + xmlBufferShrink(in, toconv);
242 + out->use += written;
243 + out->content[out->use] = 0;
244 + if (ret == -1)
245 + ret = -3;
246 + }
247 +#endif /* LIBXML_ICU_ENABLED */
248 switch (ret) {
249 case 0:
250 #ifdef DEBUG_ENCODING
251 @@ -2015,6 +2190,15 @@ retry:
252 out->content[out->use] = 0;
253 }
254 #endif /* LIBXML_ICONV_ENABLED */
255 +#ifdef LIBXML_ICU_ENABLED
256 + else if (handler->uconv_out != NULL) {
257 + ret = xmlUconvWrapper(handler->uconv_out, 0,
258 + &out->content[out->use],
259 + &written, NULL, &toconv);
260 + out->use += written;
261 + out->content[out->use] = 0;
262 + }
263 +#endif /* LIBXML_ICU_ENABLED */
264 #ifdef DEBUG_ENCODING
265 xmlGenericError(xmlGenericErrorContext,
266 "initialized encoder\n");
267 @@ -2061,6 +2245,26 @@ retry:
268 }
269 }
270 #endif /* LIBXML_ICONV_ENABLED */
271 +#ifdef LIBXML_ICU_ENABLED
272 + else if (handler->uconv_out != NULL) {
273 + ret = xmlUconvWrapper(handler->uconv_out, 0,
274 + &out->content[out->use],
275 + &written, in->content, &toconv);
276 + xmlBufferShrink(in, toconv);
277 + out->use += written;
278 + writtentot += written;
279 + out->content[out->use] = 0;
280 + if (ret == -1) {
281 + if (written > 0) {
282 + /*
283 + * Can be a limitation of iconv
284 + */
285 + goto retry;
286 + }
287 + ret = -3;
288 + }
289 + }
290 +#endif /* LIBXML_ICU_ENABLED */
291 else {
292 xmlEncodingErr(XML_I18N_NO_OUTPUT,
293 "xmlCharEncOutFunc: no output function !\n", NULL);
294 @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
295 xmlFree(handler);
296 }
297 #endif /* LIBXML_ICONV_ENABLED */
298 +#ifdef LIBXML_ICU_ENABLED
299 + if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
300 + if (handler->name != NULL)
301 + xmlFree(handler->name);
302 + handler->name = NULL;
303 + if (handler->uconv_out != NULL) {
304 + closeIcuConverter(handler->uconv_out);
305 + handler->uconv_out = NULL;
306 + }
307 + if (handler->uconv_in != NULL) {
308 + closeIcuConverter(handler->uconv_in);
309 + handler->uconv_in = NULL;
310 + }
311 + xmlFree(handler);
312 + }
313 +#endif
314 #ifdef DEBUG_ENCODING
315 if (ret)
316 xmlGenericError(xmlGenericErrorContext,
317 @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
318 cur += toconv;
319 } while (ret == -2);
320 #endif
321 +#ifdef LIBXML_ICU_ENABLED
322 + } else if (handler->uconv_out != NULL) {
323 + do {
324 + toconv = in->end - cur;
325 + written = 32000;
326 + ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
327 + &written, cur, &toconv);
328 + if (ret < 0) {
329 + if (written > 0)
330 + ret = -2;
331 + else
332 + return(-1);
333 + }
334 + unused += written;
335 + cur += toconv;
336 + } while (ret == -2);
337 } else {
338 /* could not find a converter */
339 return(-1);
340 @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
341 }
342 return(in->consumed + (in->cur - in->base));
343 }
344 +#endif
345
346 -#ifndef LIBXML_ICONV_ENABLED
347 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
348 #ifdef LIBXML_ISO8859X_ENABLED
349
350 /**
351 diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/i nclude/libxml/encoding.h
352 index c74b25f..b5f8b48 100644
353 --- a/third_party/libxml/include/libxml/encoding.h
354 +++ b/third_party/libxml/include/libxml/encoding.h
355 @@ -26,6 +26,24 @@
356
357 #ifdef LIBXML_ICONV_ENABLED
358 #include <iconv.h>
359 +#else
360 +#ifdef LIBXML_ICU_ENABLED
361 +#include <unicode/ucnv.h>
362 +#if 0
363 +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
364 + * to prevent unwanted ICU symbols being exposed to users of libxml2.
365 + * One particular case is Qt4 conflicting on UChar32.
366 + */
367 +#include <stdint.h>
368 +struct UConverter;
369 +typedef struct UConverter UConverter;
370 +#ifdef _MSC_VER
371 +typedef wchar_t UChar;
372 +#else
373 +typedef uint16_t UChar;
374 +#endif
375 +#endif
376 +#endif
377 #endif
378 #ifdef __cplusplus
379 extern "C" {
380 @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *ou t, int *outlen,
381 * Block defining the handlers for non UTF-8 encodings.
382 * If iconv is supported, there are two extra fields.
383 */
384 +#ifdef LIBXML_ICU_ENABLED
385 +struct _uconv_t {
386 + UConverter *uconv; /* for conversion between an encoding and UTF-16 */
387 + UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
388 +};
389 +typedef struct _uconv_t uconv_t;
390 +#endif
391
392 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
393 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
394 @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler {
395 iconv_t iconv_in;
396 iconv_t iconv_out;
397 #endif /* LIBXML_ICONV_ENABLED */
398 +#ifdef LIBXML_ICU_ENABLED
399 + uconv_t *uconv_in;
400 + uconv_t *uconv_out;
401 +#endif /* LIBXML_ICU_ENABLED */
402 };
403
404 #ifdef __cplusplus
405 diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/inc lude/libxml/parser.h
406 index dd79c42..3580b63 100644
407 --- a/third_party/libxml/include/libxml/parser.h
408 +++ b/third_party/libxml/include/libxml/parser.h
409 @@ -1222,6 +1222,7 @@ typedef enum {
410 XML_WITH_DEBUG_MEM = 29,
411 XML_WITH_DEBUG_RUN = 30,
412 XML_WITH_ZLIB = 31,
413 + XML_WITH_ICU = 32,
414 XML_WITH_NONE = 99999 /* just to be sure of allocation size */
415 } xmlFeature;
416
417 diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/lib xml/include/libxml/xmlversion.h.in
418 index 4739f3a..de310ab 100644
419 --- a/third_party/libxml/include/libxml/xmlversion.h.in
420 +++ b/third_party/libxml/include/libxml/xmlversion.h.in
421 @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
422 #endif
423
424 /**
425 + * LIBXML_ICU_ENABLED:
426 + *
427 + * Whether icu support is available
428 + */
429 +#if @WITH_ICU@
430 +#define LIBXML_ICU_ENABLED
431 +#endif
432 +
433 +/**
434 * LIBXML_ISO8859X_ENABLED:
435 *
436 * Whether ISO-8859-* support is made available in case iconv is not
437 diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c
438 index 85e7599..3ba2a06 100644
439 --- a/third_party/libxml/parser.c
440 +++ b/third_party/libxml/parser.c
441 @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature)
442 #else
443 return(0);
444 #endif
445 + case XML_WITH_ICU:
446 +#ifdef LIBXML_ICU_ENABLED
447 + return(1);
448 +#else
449 + return(0);
450 +#endif
451 default:
452 break;
453 }
454 diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu
jungshik at Google 2010/07/12 18:23:29 I guess you don't want to include this diff in the
455 index 324cea3..6c22c3c 100644
456 --- a/third_party/libxml/patches/icu
457 +++ b/third_party/libxml/patches/icu
458 @@ -1,434 +0,0 @@
459 -Code support for ICU. Note that this relies on modifications to the
460 -build environment (either configure or configure.js on Windows).
461 -
462 -Index: libxml/encoding.c
463 -===================================================================
464 ---- libxml.orig/encoding.c 2010-07-09 14:48:28.881863834 -0700
465 -+++ libxml/encoding.c 2010-07-09 14:49:23.479741318 -0700
466 -@@ -58,7 +58,7 @@
467 - static int xmlCharEncodingAliasesNb = 0;
468 - static int xmlCharEncodingAliasesMax = 0;
469 -
470 --#ifdef LIBXML_ICONV_ENABLED
471 -+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
472 - #if 0
473 - #define DEBUG_ENCODING /* Define this to get encoding traces */
474 - #endif
475 -@@ -97,6 +97,54 @@
476 - NULL, 0, val, NULL, NULL, 0, 0, msg, val);
477 - }
478 -
479 -+#ifdef LIBXML_ICU_ENABLED
480 -+static uconv_t*
481 -+openIcuConverter(const char* name, int toUnicode)
482 -+{
483 -+ UErrorCode status = U_ZERO_ERROR;
484 -+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
485 -+ if (conv == NULL)
486 -+ return NULL;
487 -+
488 -+ conv->uconv = ucnv_open(name, &status);
489 -+ if (U_FAILURE(status))
490 -+ goto error;
491 -+
492 -+ status = U_ZERO_ERROR;
493 -+ if (toUnicode) {
494 -+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
495 -+ NULL, NULL, NULL, &status);
496 -+ }
497 -+ else {
498 -+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
499 -+ NULL, NULL, NULL, &status);
500 -+ }
501 -+ if (U_FAILURE(status))
502 -+ goto error;
503 -+
504 -+ status = U_ZERO_ERROR;
505 -+ conv->utf8 = ucnv_open("UTF-8", &status);
506 -+ if (U_SUCCESS(status))
507 -+ return conv;
508 -+
509 -+error:
510 -+ if (conv->uconv)
511 -+ ucnv_close(conv->uconv);
512 -+ xmlFree(conv);
513 -+ return NULL;
514 -+}
515 -+
516 -+static void
517 -+closeIcuConverter(uconv_t *conv)
518 -+{
519 -+ if (conv != NULL) {
520 -+ ucnv_close(conv->uconv);
521 -+ ucnv_close(conv->utf8);
522 -+ xmlFree(conv);
523 -+ }
524 -+}
525 -+#endif /* LIBXML_ICU_ENABLED */
526 -+
527 - /************************************************************************
528 - * *
529 - * Conversions To/From UTF8 encoding *
530 -@@ -1306,7 +1354,11 @@
531 - #ifdef LIBXML_ICONV_ENABLED
532 - handler->iconv_in = NULL;
533 - handler->iconv_out = NULL;
534 --#endif /* LIBXML_ICONV_ENABLED */
535 -+#endif
536 -+#ifdef LIBXML_ICU_ENABLED
537 -+ handler->uconv_in = NULL;
538 -+ handler->uconv_out = NULL;
539 -+#endif
540 -
541 - /*
542 - * registers and returns the handler.
543 -@@ -1371,7 +1423,7 @@
544 - xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
545 - xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
546 - #endif /* LIBXML_OUTPUT_ENABLED */
547 --#ifndef LIBXML_ICONV_ENABLED
548 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
549 - #ifdef LIBXML_ISO8859X_ENABLED
550 - xmlRegisterCharEncodingHandlersISO8859x ();
551 - #endif
552 -@@ -1578,6 +1630,10 @@
553 - xmlCharEncodingHandlerPtr enc;
554 - iconv_t icv_in, icv_out;
555 - #endif /* LIBXML_ICONV_ENABLED */
556 -+#ifdef LIBXML_ICU_ENABLED
557 -+ xmlCharEncodingHandlerPtr enc;
558 -+ uconv_t *ucv_in, *ucv_out;
559 -+#endif /* LIBXML_ICU_ENABLED */
560 - char upper[100];
561 - int i;
562 -
563 -@@ -1647,6 +1703,35 @@
564 - "iconv : problems with filters for '%s'\n", name);
565 - }
566 - #endif /* LIBXML_ICONV_ENABLED */
567 -+#ifdef LIBXML_ICU_ENABLED
568 -+ /* check whether icu can handle this */
569 -+ ucv_in = openIcuConverter(name, 1);
570 -+ ucv_out = openIcuConverter(name, 0);
571 -+ if (ucv_in != NULL && ucv_out != NULL) {
572 -+ enc = (xmlCharEncodingHandlerPtr)
573 -+ xmlMalloc(sizeof(xmlCharEncodingHandler));
574 -+ if (enc == NULL) {
575 -+ closeIcuConverter(ucv_in);
576 -+ closeIcuConverter(ucv_out);
577 -+ return(NULL);
578 -+ }
579 -+ enc->name = xmlMemStrdup(name);
580 -+ enc->input = NULL;
581 -+ enc->output = NULL;
582 -+ enc->uconv_in = ucv_in;
583 -+ enc->uconv_out = ucv_out;
584 -+#ifdef DEBUG_ENCODING
585 -+ xmlGenericError(xmlGenericErrorContext,
586 -+ "Found ICU converter handler for encoding %s\n", name);
587 -+#endif
588 -+ return enc;
589 -+ } else if (ucv_in != NULL || ucv_out != NULL) {
590 -+ closeIcuConverter(ucv_in);
591 -+ closeIcuConverter(ucv_out);
592 -+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
593 -+ "ICU converter : problems with filters for '%s'\n", name);
594 -+ }
595 -+#endif /* LIBXML_ICU_ENABLED */
596 -
597 - #ifdef DEBUG_ENCODING
598 - xmlGenericError(xmlGenericErrorContext,
599 -@@ -1737,6 +1822,75 @@
600 -
601 - /************************************************************************
602 - * *
603 -+ * ICU based generic conversion functions *
604 -+ * *
605 -+ ************************************************************************/
606 -+
607 -+#ifdef LIBXML_ICU_ENABLED
608 -+/**
609 -+ * xmlUconvWrapper:
610 -+ * @cd: ICU uconverter data structure
611 -+ * @toUnicode : non-zero if toUnicode. 0 otherwise.
612 -+ * @out: a pointer to an array of bytes to store the result
613 -+ * @outlen: the length of @out
614 -+ * @in: a pointer to an array of ISO Latin 1 chars
615 -+ * @inlen: the length of @in
616 -+ *
617 -+ * Returns 0 if success, or
618 -+ * -1 by lack of space, or
619 -+ * -2 if the transcoding fails (for *in is not valid utf8 string or
620 -+ * the result of transformation can't fit into the encoding we want), o r
621 -+ * -3 if there the last byte can't form a single output char.
622 -+ *
623 -+ * The value of @inlen after return is the number of octets consumed
624 -+ * as the return value is positive, else unpredictable.
625 -+ * The value of @outlen after return is the number of ocetes consumed.
626 -+ */
627 -+static int
628 -+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
629 -+ const unsigned char *in, int *inlen) {
630 -+ const char *ucv_in = (const char *) in;
631 -+ char *ucv_out = (char *) out;
632 -+ UErrorCode err = U_ZERO_ERROR;
633 -+
634 -+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
635 -+ if (outlen != NULL) *outlen = 0;
636 -+ return(-1);
637 -+ }
638 -+
639 -+ /*
640 -+ * TODO(jungshik)
641 -+ * 1. is ucnv_convert(To|From)Algorithmic better?
642 -+ * 2. had we better use an explicit pivot buffer?
643 -+ * 3. error returned comes from 'fromUnicode' only even
644 -+ * when toUnicode is true !
645 -+ */
646 -+ if (toUnicode) {
647 -+ /* encoding => UTF-16 => UTF-8 */
648 -+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
649 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
650 -+ 0, TRUE, &err);
651 -+ } else {
652 -+ /* UTF-8 => UTF-16 => encoding */
653 -+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
654 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
655 -+ 0, TRUE, &err);
656 -+ }
657 -+ *inlen = ucv_in - (const char*) in;
658 -+ *outlen = ucv_out - (char *) out;
659 -+ if (U_SUCCESS(err))
660 -+ return 0;
661 -+ if (err == U_BUFFER_OVERFLOW_ERROR)
662 -+ return -1;
663 -+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
664 -+ return -2;
665 -+ /* if (err == U_TRUNCATED_CHAR_FOUND) */
666 -+ return -3;
667 -+}
668 -+#endif /* LIBXML_ICU_ENABLED */
669 -+
670 -+/************************************************************************
671 -+ * *
672 - * The real API used by libxml for on-the-fly conversion *
673 - * *
674 - ************************************************************************/
675 -@@ -1810,6 +1964,16 @@
676 - if (ret == -1) ret = -3;
677 - }
678 - #endif /* LIBXML_ICONV_ENABLED */
679 -+#ifdef LIBXML_ICU_ENABLED
680 -+ else if (handler->uconv_in != NULL) {
681 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
682 -+ &written, in->content, &toconv);
683 -+ xmlBufferShrink(in, toconv);
684 -+ out->use += written;
685 -+ out->content[out->use] = 0;
686 -+ if (ret == -1) ret = -3;
687 -+ }
688 -+#endif /* LIBXML_ICU_ENABLED */
689 - #ifdef DEBUG_ENCODING
690 - switch (ret) {
691 - case 0:
692 -@@ -1915,6 +2079,17 @@
693 - ret = -3;
694 - }
695 - #endif /* LIBXML_ICONV_ENABLED */
696 -+#ifdef LIBXML_ICU_ENABLED
697 -+ else if (handler->uconv_in != NULL) {
698 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
699 -+ &written, in->content, &toconv);
700 -+ xmlBufferShrink(in, toconv);
701 -+ out->use += written;
702 -+ out->content[out->use] = 0;
703 -+ if (ret == -1)
704 -+ ret = -3;
705 -+ }
706 -+#endif /* LIBXML_ICU_ENABLED */
707 - switch (ret) {
708 - case 0:
709 - #ifdef DEBUG_ENCODING
710 -@@ -2015,6 +2190,15 @@
711 - out->content[out->use] = 0;
712 - }
713 - #endif /* LIBXML_ICONV_ENABLED */
714 -+#ifdef LIBXML_ICU_ENABLED
715 -+ else if (handler->uconv_out != NULL) {
716 -+ ret = xmlUconvWrapper(handler->uconv_out, 0,
717 -+ &out->content[out->use],
718 -+ &written, NULL, &toconv);
719 -+ out->use += written;
720 -+ out->content[out->use] = 0;
721 -+ }
722 -+#endif /* LIBXML_ICU_ENABLED */
723 - #ifdef DEBUG_ENCODING
724 - xmlGenericError(xmlGenericErrorContext,
725 - "initialized encoder\n");
726 -@@ -2061,6 +2245,26 @@
727 - }
728 - }
729 - #endif /* LIBXML_ICONV_ENABLED */
730 -+#ifdef LIBXML_ICU_ENABLED
731 -+ else if (handler->uconv_out != NULL) {
732 -+ ret = xmlUconvWrapper(handler->uconv_out, 0,
733 -+ &out->content[out->use],
734 -+ &written, in->content, &toconv);
735 -+ xmlBufferShrink(in, toconv);
736 -+ out->use += written;
737 -+ writtentot += written;
738 -+ out->content[out->use] = 0;
739 -+ if (ret == -1) {
740 -+ if (written > 0) {
741 -+ /*
742 -+ * Can be a limitation of iconv
743 -+ */
744 -+ goto retry;
745 -+ }
746 -+ ret = -3;
747 -+ }
748 -+ }
749 -+#endif /* LIBXML_ICU_ENABLED */
750 - else {
751 - xmlEncodingErr(XML_I18N_NO_OUTPUT,
752 - "xmlCharEncOutFunc: no output function !\n", NULL);
753 -@@ -2173,6 +2377,22 @@
754 - xmlFree(handler);
755 - }
756 - #endif /* LIBXML_ICONV_ENABLED */
757 -+#ifdef LIBXML_ICU_ENABLED
758 -+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
759 -+ if (handler->name != NULL)
760 -+ xmlFree(handler->name);
761 -+ handler->name = NULL;
762 -+ if (handler->uconv_out != NULL) {
763 -+ closeIcuConverter(handler->uconv_out);
764 -+ handler->uconv_out = NULL;
765 -+ }
766 -+ if (handler->uconv_in != NULL) {
767 -+ closeIcuConverter(handler->uconv_in);
768 -+ handler->uconv_in = NULL;
769 -+ }
770 -+ xmlFree(handler);
771 -+ }
772 -+#endif
773 - #ifdef DEBUG_ENCODING
774 - if (ret)
775 - xmlGenericError(xmlGenericErrorContext,
776 -@@ -2248,6 +2468,22 @@
777 - cur += toconv;
778 - } while (ret == -2);
779 - #endif
780 -+#ifdef LIBXML_ICU_ENABLED
781 -+ } else if (handler->uconv_out != NULL) {
782 -+ do {
783 -+ toconv = in->end - cur;
784 -+ written = 32000;
785 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
786 -+ &written, cur, &toconv);
787 -+ if (ret < 0) {
788 -+ if (written > 0)
789 -+ ret = -2;
790 -+ else
791 -+ return(-1);
792 -+ }
793 -+ unused += written;
794 -+ cur += toconv;
795 -+ } while (ret == -2);
796 - } else {
797 - /* could not find a converter */
798 - return(-1);
799 -@@ -2259,8 +2495,9 @@
800 - }
801 - return(in->consumed + (in->cur - in->base));
802 - }
803 -+#endif
804 -
805 --#ifndef LIBXML_ICONV_ENABLED
806 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
807 - #ifdef LIBXML_ISO8859X_ENABLED
808 -
809 - /**
810 -Index: libxml/include/libxml/encoding.h
811 -===================================================================
812 ---- libxml.orig/include/libxml/encoding.h 2010-07-09 14:50:27.503114118 -0 700
813 -+++ libxml/include/libxml/encoding.h 2010-07-09 14:53:27.251611643 -0700
814 -@@ -26,6 +26,24 @@
815 -
816 - #ifdef LIBXML_ICONV_ENABLED
817 - #include <iconv.h>
818 -+#else
819 -+#ifdef LIBXML_ICU_ENABLED
820 -+#include <unicode/ucnv.h>
821 -+#if 0
822 -+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h>
823 -+ * to prevent unwanted ICU symbols being exposed to users of libxml2.
824 -+ * One particular case is Qt4 conflicting on UChar32.
825 -+ */
826 -+#include <stdint.h>
827 -+struct UConverter;
828 -+typedef struct UConverter UConverter;
829 -+#ifdef _MSC_VER
830 -+typedef wchar_t UChar;
831 -+#else
832 -+typedef uint16_t UChar;
833 -+#endif
834 -+#endif
835 -+#endif
836 - #endif
837 - #ifdef __cplusplus
838 - extern "C" {
839 -@@ -125,6 +143,13 @@
840 - * Block defining the handlers for non UTF-8 encodings.
841 - * If iconv is supported, there are two extra fields.
842 - */
843 -+#ifdef LIBXML_ICU_ENABLED
844 -+struct _uconv_t {
845 -+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */
846 -+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
847 -+};
848 -+typedef struct _uconv_t uconv_t;
849 -+#endif
850 -
851 - typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
852 - typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
853 -@@ -136,6 +161,10 @@
854 - iconv_t iconv_in;
855 - iconv_t iconv_out;
856 - #endif /* LIBXML_ICONV_ENABLED */
857 -+#ifdef LIBXML_ICU_ENABLED
858 -+ uconv_t *uconv_in;
859 -+ uconv_t *uconv_out;
860 -+#endif /* LIBXML_ICU_ENABLED */
861 - };
862 -
863 - #ifdef __cplusplus
864 -Index: libxml/include/libxml/parser.h
865 -===================================================================
866 ---- libxml.orig/include/libxml/parser.h 2010-07-09 14:51:21.190673740 -0 700
867 -+++ libxml/include/libxml/parser.h 2010-07-09 14:53:19.571862214 -0700
868 -@@ -1222,6 +1222,7 @@
869 - XML_WITH_DEBUG_MEM = 29,
870 - XML_WITH_DEBUG_RUN = 30,
871 - XML_WITH_ZLIB = 31,
872 -+ XML_WITH_ICU = 32,
873 - XML_WITH_NONE = 99999 /* just to be sure of allocation size */
874 - } xmlFeature;
875 -
876 -Index: libxml/parser.c
877 -===================================================================
878 ---- libxml.orig/parser.c 2010-07-09 14:52:15.150057108 -0700
879 -+++ libxml/parser.c 2010-07-09 14:53:06.190137405 -0700
880 -@@ -954,6 +954,12 @@
881 - #else
882 - return(0);
883 - #endif
884 -+ case XML_WITH_ICU:
885 -+#ifdef LIBXML_ICU_ENABLED
886 -+ return(1);
887 -+#else
888 -+ return(0);
889 -+#endif
890 - default:
891 - break;
892 - }
OLDNEW
« no previous file with comments | « third_party/libxml/patches/LoadLibraryA ('k') | third_party/libxml/patches/icu-configure » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698