Chromium Code Reviews| Index: third_party/libxml/patches/icu |
| diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..814d23c9832a9ad6db9eafad58449307814a144f |
| --- /dev/null |
| +++ b/third_party/libxml/patches/icu |
| @@ -0,0 +1,892 @@ |
| +Add code support for ICU. |
| + |
| +diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c |
| +index b86a547..0f41df9 100644 |
| +--- a/third_party/libxml/encoding.c |
| ++++ b/third_party/libxml/encoding.c |
| +@@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; |
| + static int xmlCharEncodingAliasesNb = 0; |
| + static int xmlCharEncodingAliasesMax = 0; |
| + |
| +-#ifdef LIBXML_ICONV_ENABLED |
| ++#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) |
| + #if 0 |
| + #define DEBUG_ENCODING /* Define this to get encoding traces */ |
| + #endif |
| +@@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) |
| + NULL, 0, val, NULL, NULL, 0, 0, msg, val); |
| + } |
| + |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++static uconv_t* |
| ++openIcuConverter(const char* name, int toUnicode) |
| ++{ |
| ++ UErrorCode status = U_ZERO_ERROR; |
| ++ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); |
| ++ if (conv == NULL) |
| ++ return NULL; |
| ++ |
| ++ conv->uconv = ucnv_open(name, &status); |
| ++ if (U_FAILURE(status)) |
| ++ goto error; |
| ++ |
| ++ status = U_ZERO_ERROR; |
| ++ if (toUnicode) { |
| ++ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, |
| ++ NULL, NULL, NULL, &status); |
| ++ } |
| ++ else { |
| ++ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, |
| ++ NULL, NULL, NULL, &status); |
| ++ } |
| ++ if (U_FAILURE(status)) |
| ++ goto error; |
| ++ |
| ++ status = U_ZERO_ERROR; |
| ++ conv->utf8 = ucnv_open("UTF-8", &status); |
| ++ if (U_SUCCESS(status)) |
| ++ return conv; |
| ++ |
| ++error: |
| ++ if (conv->uconv) |
| ++ ucnv_close(conv->uconv); |
| ++ xmlFree(conv); |
| ++ return NULL; |
| ++} |
| ++ |
| ++static void |
| ++closeIcuConverter(uconv_t *conv) |
| ++{ |
| ++ if (conv != NULL) { |
| ++ ucnv_close(conv->uconv); |
| ++ ucnv_close(conv->utf8); |
| ++ xmlFree(conv); |
| ++ } |
| ++} |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| ++ |
| + /************************************************************************ |
| + * * |
| + * Conversions To/From UTF8 encoding * |
| +@@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, |
| + #ifdef LIBXML_ICONV_ENABLED |
| + handler->iconv_in = NULL; |
| + handler->iconv_out = NULL; |
| +-#endif /* LIBXML_ICONV_ENABLED */ |
| ++#endif |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ handler->uconv_in = NULL; |
| ++ handler->uconv_out = NULL; |
| ++#endif |
| + |
| + /* |
| + * registers and returns the handler. |
| +@@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { |
| + xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); |
| + xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); |
| + #endif /* LIBXML_OUTPUT_ENABLED */ |
| +-#ifndef LIBXML_ICONV_ENABLED |
| ++#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
| + #ifdef LIBXML_ISO8859X_ENABLED |
| + xmlRegisterCharEncodingHandlersISO8859x (); |
| + #endif |
| +@@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { |
| + xmlCharEncodingHandlerPtr enc; |
| + iconv_t icv_in, icv_out; |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ xmlCharEncodingHandlerPtr enc; |
| ++ uconv_t *ucv_in, *ucv_out; |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + char upper[100]; |
| + int i; |
| + |
| +@@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { |
| + "iconv : problems with filters for '%s'\n", name); |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ /* check whether icu can handle this */ |
| ++ ucv_in = openIcuConverter(name, 1); |
| ++ ucv_out = openIcuConverter(name, 0); |
| ++ if (ucv_in != NULL && ucv_out != NULL) { |
| ++ enc = (xmlCharEncodingHandlerPtr) |
| ++ xmlMalloc(sizeof(xmlCharEncodingHandler)); |
| ++ if (enc == NULL) { |
| ++ closeIcuConverter(ucv_in); |
| ++ closeIcuConverter(ucv_out); |
| ++ return(NULL); |
| ++ } |
| ++ enc->name = xmlMemStrdup(name); |
| ++ enc->input = NULL; |
| ++ enc->output = NULL; |
| ++ enc->uconv_in = ucv_in; |
| ++ enc->uconv_out = ucv_out; |
| ++#ifdef DEBUG_ENCODING |
| ++ xmlGenericError(xmlGenericErrorContext, |
| ++ "Found ICU converter handler for encoding %s\n", name); |
| ++#endif |
| ++ return enc; |
| ++ } else if (ucv_in != NULL || ucv_out != NULL) { |
| ++ closeIcuConverter(ucv_in); |
| ++ closeIcuConverter(ucv_out); |
| ++ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, |
| ++ "ICU converter : problems with filters for '%s'\n", name); |
| ++ } |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + |
| + #ifdef DEBUG_ENCODING |
| + xmlGenericError(xmlGenericErrorContext, |
| +@@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, |
| + |
| + /************************************************************************ |
| + * * |
| ++ * ICU based generic conversion functions * |
| ++ * * |
| ++ ************************************************************************/ |
| ++ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++/** |
| ++ * xmlUconvWrapper: |
| ++ * @cd: ICU uconverter data structure |
| ++ * @toUnicode : non-zero if toUnicode. 0 otherwise. |
| ++ * @out: a pointer to an array of bytes to store the result |
| ++ * @outlen: the length of @out |
| ++ * @in: a pointer to an array of ISO Latin 1 chars |
| ++ * @inlen: the length of @in |
| ++ * |
| ++ * Returns 0 if success, or |
| ++ * -1 by lack of space, or |
| ++ * -2 if the transcoding fails (for *in is not valid utf8 string or |
| ++ * the result of transformation can't fit into the encoding we want), or |
| ++ * -3 if there the last byte can't form a single output char. |
| ++ * |
| ++ * The value of @inlen after return is the number of octets consumed |
| ++ * as the return value is positive, else unpredictable. |
| ++ * The value of @outlen after return is the number of ocetes consumed. |
| ++ */ |
| ++static int |
| ++xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, |
| ++ const unsigned char *in, int *inlen) { |
| ++ const char *ucv_in = (const char *) in; |
| ++ char *ucv_out = (char *) out; |
| ++ UErrorCode err = U_ZERO_ERROR; |
| ++ |
| ++ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
| ++ if (outlen != NULL) *outlen = 0; |
| ++ return(-1); |
| ++ } |
| ++ |
| ++ /* |
| ++ * TODO(jungshik) |
| ++ * 1. is ucnv_convert(To|From)Algorithmic better? |
| ++ * 2. had we better use an explicit pivot buffer? |
| ++ * 3. error returned comes from 'fromUnicode' only even |
| ++ * when toUnicode is true ! |
| ++ */ |
| ++ if (toUnicode) { |
| ++ /* encoding => UTF-16 => UTF-8 */ |
| ++ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, |
| ++ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
| ++ 0, TRUE, &err); |
| ++ } else { |
| ++ /* UTF-8 => UTF-16 => encoding */ |
| ++ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, |
| ++ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
| ++ 0, TRUE, &err); |
| ++ } |
| ++ *inlen = ucv_in - (const char*) in; |
| ++ *outlen = ucv_out - (char *) out; |
| ++ if (U_SUCCESS(err)) |
| ++ return 0; |
| ++ if (err == U_BUFFER_OVERFLOW_ERROR) |
| ++ return -1; |
| ++ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) |
| ++ return -2; |
| ++ /* if (err == U_TRUNCATED_CHAR_FOUND) */ |
| ++ return -3; |
| ++} |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| ++ |
| ++/************************************************************************ |
| ++ * * |
| + * The real API used by libxml for on-the-fly conversion * |
| + * * |
| + ************************************************************************/ |
| +@@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, |
| + if (ret == -1) ret = -3; |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ else if (handler->uconv_in != NULL) { |
| ++ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
| ++ &written, in->content, &toconv); |
| ++ xmlBufferShrink(in, toconv); |
| ++ out->use += written; |
| ++ out->content[out->use] = 0; |
| ++ if (ret == -1) ret = -3; |
| ++ } |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + #ifdef DEBUG_ENCODING |
| + switch (ret) { |
| + case 0: |
| +@@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, |
| + ret = -3; |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ else if (handler->uconv_in != NULL) { |
| ++ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
| ++ &written, in->content, &toconv); |
| ++ xmlBufferShrink(in, toconv); |
| ++ out->use += written; |
| ++ out->content[out->use] = 0; |
| ++ if (ret == -1) |
| ++ ret = -3; |
| ++ } |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + switch (ret) { |
| + case 0: |
| + #ifdef DEBUG_ENCODING |
| +@@ -2015,6 +2190,15 @@ retry: |
| + out->content[out->use] = 0; |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ else if (handler->uconv_out != NULL) { |
| ++ ret = xmlUconvWrapper(handler->uconv_out, 0, |
| ++ &out->content[out->use], |
| ++ &written, NULL, &toconv); |
| ++ out->use += written; |
| ++ out->content[out->use] = 0; |
| ++ } |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + #ifdef DEBUG_ENCODING |
| + xmlGenericError(xmlGenericErrorContext, |
| + "initialized encoder\n"); |
| +@@ -2061,6 +2245,26 @@ retry: |
| + } |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ else if (handler->uconv_out != NULL) { |
| ++ ret = xmlUconvWrapper(handler->uconv_out, 0, |
| ++ &out->content[out->use], |
| ++ &written, in->content, &toconv); |
| ++ xmlBufferShrink(in, toconv); |
| ++ out->use += written; |
| ++ writtentot += written; |
| ++ out->content[out->use] = 0; |
| ++ if (ret == -1) { |
| ++ if (written > 0) { |
| ++ /* |
| ++ * Can be a limitation of iconv |
| ++ */ |
| ++ goto retry; |
| ++ } |
| ++ ret = -3; |
| ++ } |
| ++ } |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + else { |
| + xmlEncodingErr(XML_I18N_NO_OUTPUT, |
| + "xmlCharEncOutFunc: no output function !\n", NULL); |
| +@@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { |
| + xmlFree(handler); |
| + } |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { |
| ++ if (handler->name != NULL) |
| ++ xmlFree(handler->name); |
| ++ handler->name = NULL; |
| ++ if (handler->uconv_out != NULL) { |
| ++ closeIcuConverter(handler->uconv_out); |
| ++ handler->uconv_out = NULL; |
| ++ } |
| ++ if (handler->uconv_in != NULL) { |
| ++ closeIcuConverter(handler->uconv_in); |
| ++ handler->uconv_in = NULL; |
| ++ } |
| ++ xmlFree(handler); |
| ++ } |
| ++#endif |
| + #ifdef DEBUG_ENCODING |
| + if (ret) |
| + xmlGenericError(xmlGenericErrorContext, |
| +@@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { |
| + cur += toconv; |
| + } while (ret == -2); |
| + #endif |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ } else if (handler->uconv_out != NULL) { |
| ++ do { |
| ++ toconv = in->end - cur; |
| ++ written = 32000; |
| ++ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], |
| ++ &written, cur, &toconv); |
| ++ if (ret < 0) { |
| ++ if (written > 0) |
| ++ ret = -2; |
| ++ else |
| ++ return(-1); |
| ++ } |
| ++ unused += written; |
| ++ cur += toconv; |
| ++ } while (ret == -2); |
| + } else { |
| + /* could not find a converter */ |
| + return(-1); |
| +@@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { |
| + } |
| + return(in->consumed + (in->cur - in->base)); |
| + } |
| ++#endif |
| + |
| +-#ifndef LIBXML_ICONV_ENABLED |
| ++#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
| + #ifdef LIBXML_ISO8859X_ENABLED |
| + |
| + /** |
| +diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/include/libxml/encoding.h |
| +index c74b25f..b5f8b48 100644 |
| +--- a/third_party/libxml/include/libxml/encoding.h |
| ++++ b/third_party/libxml/include/libxml/encoding.h |
| +@@ -26,6 +26,24 @@ |
| + |
| + #ifdef LIBXML_ICONV_ENABLED |
| + #include <iconv.h> |
| ++#else |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++#include <unicode/ucnv.h> |
| ++#if 0 |
| ++/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> |
| ++ * to prevent unwanted ICU symbols being exposed to users of libxml2. |
| ++ * One particular case is Qt4 conflicting on UChar32. |
| ++ */ |
| ++#include <stdint.h> |
| ++struct UConverter; |
| ++typedef struct UConverter UConverter; |
| ++#ifdef _MSC_VER |
| ++typedef wchar_t UChar; |
| ++#else |
| ++typedef uint16_t UChar; |
| ++#endif |
| ++#endif |
| ++#endif |
| + #endif |
| + #ifdef __cplusplus |
| + extern "C" { |
| +@@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, |
| + * Block defining the handlers for non UTF-8 encodings. |
| + * If iconv is supported, there are two extra fields. |
| + */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++struct _uconv_t { |
| ++ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ |
| ++ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ |
| ++}; |
| ++typedef struct _uconv_t uconv_t; |
| ++#endif |
| + |
| + typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; |
| + typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
| +@@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { |
| + iconv_t iconv_in; |
| + iconv_t iconv_out; |
| + #endif /* LIBXML_ICONV_ENABLED */ |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ uconv_t *uconv_in; |
| ++ uconv_t *uconv_out; |
| ++#endif /* LIBXML_ICU_ENABLED */ |
| + }; |
| + |
| + #ifdef __cplusplus |
| +diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/include/libxml/parser.h |
| +index dd79c42..3580b63 100644 |
| +--- a/third_party/libxml/include/libxml/parser.h |
| ++++ b/third_party/libxml/include/libxml/parser.h |
| +@@ -1222,6 +1222,7 @@ typedef enum { |
| + XML_WITH_DEBUG_MEM = 29, |
| + XML_WITH_DEBUG_RUN = 30, |
| + XML_WITH_ZLIB = 31, |
| ++ XML_WITH_ICU = 32, |
| + XML_WITH_NONE = 99999 /* just to be sure of allocation size */ |
| + } xmlFeature; |
| + |
| +diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/libxml/include/libxml/xmlversion.h.in |
| +index 4739f3a..de310ab 100644 |
| +--- a/third_party/libxml/include/libxml/xmlversion.h.in |
| ++++ b/third_party/libxml/include/libxml/xmlversion.h.in |
| +@@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); |
| + #endif |
| + |
| + /** |
| ++ * LIBXML_ICU_ENABLED: |
| ++ * |
| ++ * Whether icu support is available |
| ++ */ |
| ++#if @WITH_ICU@ |
| ++#define LIBXML_ICU_ENABLED |
| ++#endif |
| ++ |
| ++/** |
| + * LIBXML_ISO8859X_ENABLED: |
| + * |
| + * Whether ISO-8859-* support is made available in case iconv is not |
| +diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c |
| +index 85e7599..3ba2a06 100644 |
| +--- a/third_party/libxml/parser.c |
| ++++ b/third_party/libxml/parser.c |
| +@@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) |
| + #else |
| + return(0); |
| + #endif |
| ++ case XML_WITH_ICU: |
| ++#ifdef LIBXML_ICU_ENABLED |
| ++ return(1); |
| ++#else |
| ++ return(0); |
| ++#endif |
| + default: |
| + break; |
| + } |
| +diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu |
|
jungshik at Google
2010/07/12 18:23:29
I guess you don't want to include this diff in the
|
| +index 324cea3..6c22c3c 100644 |
| +--- a/third_party/libxml/patches/icu |
| ++++ b/third_party/libxml/patches/icu |
| +@@ -1,434 +0,0 @@ |
| +-Code support for ICU. Note that this relies on modifications to the |
| +-build environment (either configure or configure.js on Windows). |
| +- |
| +-Index: libxml/encoding.c |
| +-=================================================================== |
| +---- libxml.orig/encoding.c 2010-07-09 14:48:28.881863834 -0700 |
| +-+++ libxml/encoding.c 2010-07-09 14:49:23.479741318 -0700 |
| +-@@ -58,7 +58,7 @@ |
| +- static int xmlCharEncodingAliasesNb = 0; |
| +- static int xmlCharEncodingAliasesMax = 0; |
| +- |
| +--#ifdef LIBXML_ICONV_ENABLED |
| +-+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) |
| +- #if 0 |
| +- #define DEBUG_ENCODING /* Define this to get encoding traces */ |
| +- #endif |
| +-@@ -97,6 +97,54 @@ |
| +- NULL, 0, val, NULL, NULL, 0, 0, msg, val); |
| +- } |
| +- |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+static uconv_t* |
| +-+openIcuConverter(const char* name, int toUnicode) |
| +-+{ |
| +-+ UErrorCode status = U_ZERO_ERROR; |
| +-+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); |
| +-+ if (conv == NULL) |
| +-+ return NULL; |
| +-+ |
| +-+ conv->uconv = ucnv_open(name, &status); |
| +-+ if (U_FAILURE(status)) |
| +-+ goto error; |
| +-+ |
| +-+ status = U_ZERO_ERROR; |
| +-+ if (toUnicode) { |
| +-+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, |
| +-+ NULL, NULL, NULL, &status); |
| +-+ } |
| +-+ else { |
| +-+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, |
| +-+ NULL, NULL, NULL, &status); |
| +-+ } |
| +-+ if (U_FAILURE(status)) |
| +-+ goto error; |
| +-+ |
| +-+ status = U_ZERO_ERROR; |
| +-+ conv->utf8 = ucnv_open("UTF-8", &status); |
| +-+ if (U_SUCCESS(status)) |
| +-+ return conv; |
| +-+ |
| +-+error: |
| +-+ if (conv->uconv) |
| +-+ ucnv_close(conv->uconv); |
| +-+ xmlFree(conv); |
| +-+ return NULL; |
| +-+} |
| +-+ |
| +-+static void |
| +-+closeIcuConverter(uconv_t *conv) |
| +-+{ |
| +-+ if (conv != NULL) { |
| +-+ ucnv_close(conv->uconv); |
| +-+ ucnv_close(conv->utf8); |
| +-+ xmlFree(conv); |
| +-+ } |
| +-+} |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +-+ |
| +- /************************************************************************ |
| +- * * |
| +- * Conversions To/From UTF8 encoding * |
| +-@@ -1306,7 +1354,11 @@ |
| +- #ifdef LIBXML_ICONV_ENABLED |
| +- handler->iconv_in = NULL; |
| +- handler->iconv_out = NULL; |
| +--#endif /* LIBXML_ICONV_ENABLED */ |
| +-+#endif |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ handler->uconv_in = NULL; |
| +-+ handler->uconv_out = NULL; |
| +-+#endif |
| +- |
| +- /* |
| +- * registers and returns the handler. |
| +-@@ -1371,7 +1423,7 @@ |
| +- xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); |
| +- xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); |
| +- #endif /* LIBXML_OUTPUT_ENABLED */ |
| +--#ifndef LIBXML_ICONV_ENABLED |
| +-+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
| +- #ifdef LIBXML_ISO8859X_ENABLED |
| +- xmlRegisterCharEncodingHandlersISO8859x (); |
| +- #endif |
| +-@@ -1578,6 +1630,10 @@ |
| +- xmlCharEncodingHandlerPtr enc; |
| +- iconv_t icv_in, icv_out; |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ xmlCharEncodingHandlerPtr enc; |
| +-+ uconv_t *ucv_in, *ucv_out; |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- char upper[100]; |
| +- int i; |
| +- |
| +-@@ -1647,6 +1703,35 @@ |
| +- "iconv : problems with filters for '%s'\n", name); |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ /* check whether icu can handle this */ |
| +-+ ucv_in = openIcuConverter(name, 1); |
| +-+ ucv_out = openIcuConverter(name, 0); |
| +-+ if (ucv_in != NULL && ucv_out != NULL) { |
| +-+ enc = (xmlCharEncodingHandlerPtr) |
| +-+ xmlMalloc(sizeof(xmlCharEncodingHandler)); |
| +-+ if (enc == NULL) { |
| +-+ closeIcuConverter(ucv_in); |
| +-+ closeIcuConverter(ucv_out); |
| +-+ return(NULL); |
| +-+ } |
| +-+ enc->name = xmlMemStrdup(name); |
| +-+ enc->input = NULL; |
| +-+ enc->output = NULL; |
| +-+ enc->uconv_in = ucv_in; |
| +-+ enc->uconv_out = ucv_out; |
| +-+#ifdef DEBUG_ENCODING |
| +-+ xmlGenericError(xmlGenericErrorContext, |
| +-+ "Found ICU converter handler for encoding %s\n", name); |
| +-+#endif |
| +-+ return enc; |
| +-+ } else if (ucv_in != NULL || ucv_out != NULL) { |
| +-+ closeIcuConverter(ucv_in); |
| +-+ closeIcuConverter(ucv_out); |
| +-+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, |
| +-+ "ICU converter : problems with filters for '%s'\n", name); |
| +-+ } |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- |
| +- #ifdef DEBUG_ENCODING |
| +- xmlGenericError(xmlGenericErrorContext, |
| +-@@ -1737,6 +1822,75 @@ |
| +- |
| +- /************************************************************************ |
| +- * * |
| +-+ * ICU based generic conversion functions * |
| +-+ * * |
| +-+ ************************************************************************/ |
| +-+ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+/** |
| +-+ * xmlUconvWrapper: |
| +-+ * @cd: ICU uconverter data structure |
| +-+ * @toUnicode : non-zero if toUnicode. 0 otherwise. |
| +-+ * @out: a pointer to an array of bytes to store the result |
| +-+ * @outlen: the length of @out |
| +-+ * @in: a pointer to an array of ISO Latin 1 chars |
| +-+ * @inlen: the length of @in |
| +-+ * |
| +-+ * Returns 0 if success, or |
| +-+ * -1 by lack of space, or |
| +-+ * -2 if the transcoding fails (for *in is not valid utf8 string or |
| +-+ * the result of transformation can't fit into the encoding we want), or |
| +-+ * -3 if there the last byte can't form a single output char. |
| +-+ * |
| +-+ * The value of @inlen after return is the number of octets consumed |
| +-+ * as the return value is positive, else unpredictable. |
| +-+ * The value of @outlen after return is the number of ocetes consumed. |
| +-+ */ |
| +-+static int |
| +-+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, |
| +-+ const unsigned char *in, int *inlen) { |
| +-+ const char *ucv_in = (const char *) in; |
| +-+ char *ucv_out = (char *) out; |
| +-+ UErrorCode err = U_ZERO_ERROR; |
| +-+ |
| +-+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
| +-+ if (outlen != NULL) *outlen = 0; |
| +-+ return(-1); |
| +-+ } |
| +-+ |
| +-+ /* |
| +-+ * TODO(jungshik) |
| +-+ * 1. is ucnv_convert(To|From)Algorithmic better? |
| +-+ * 2. had we better use an explicit pivot buffer? |
| +-+ * 3. error returned comes from 'fromUnicode' only even |
| +-+ * when toUnicode is true ! |
| +-+ */ |
| +-+ if (toUnicode) { |
| +-+ /* encoding => UTF-16 => UTF-8 */ |
| +-+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, |
| +-+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
| +-+ 0, TRUE, &err); |
| +-+ } else { |
| +-+ /* UTF-8 => UTF-16 => encoding */ |
| +-+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, |
| +-+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
| +-+ 0, TRUE, &err); |
| +-+ } |
| +-+ *inlen = ucv_in - (const char*) in; |
| +-+ *outlen = ucv_out - (char *) out; |
| +-+ if (U_SUCCESS(err)) |
| +-+ return 0; |
| +-+ if (err == U_BUFFER_OVERFLOW_ERROR) |
| +-+ return -1; |
| +-+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) |
| +-+ return -2; |
| +-+ /* if (err == U_TRUNCATED_CHAR_FOUND) */ |
| +-+ return -3; |
| +-+} |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +-+ |
| +-+/************************************************************************ |
| +-+ * * |
| +- * The real API used by libxml for on-the-fly conversion * |
| +- * * |
| +- ************************************************************************/ |
| +-@@ -1810,6 +1964,16 @@ |
| +- if (ret == -1) ret = -3; |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ else if (handler->uconv_in != NULL) { |
| +-+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
| +-+ &written, in->content, &toconv); |
| +-+ xmlBufferShrink(in, toconv); |
| +-+ out->use += written; |
| +-+ out->content[out->use] = 0; |
| +-+ if (ret == -1) ret = -3; |
| +-+ } |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- #ifdef DEBUG_ENCODING |
| +- switch (ret) { |
| +- case 0: |
| +-@@ -1915,6 +2079,17 @@ |
| +- ret = -3; |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ else if (handler->uconv_in != NULL) { |
| +-+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
| +-+ &written, in->content, &toconv); |
| +-+ xmlBufferShrink(in, toconv); |
| +-+ out->use += written; |
| +-+ out->content[out->use] = 0; |
| +-+ if (ret == -1) |
| +-+ ret = -3; |
| +-+ } |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- switch (ret) { |
| +- case 0: |
| +- #ifdef DEBUG_ENCODING |
| +-@@ -2015,6 +2190,15 @@ |
| +- out->content[out->use] = 0; |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ else if (handler->uconv_out != NULL) { |
| +-+ ret = xmlUconvWrapper(handler->uconv_out, 0, |
| +-+ &out->content[out->use], |
| +-+ &written, NULL, &toconv); |
| +-+ out->use += written; |
| +-+ out->content[out->use] = 0; |
| +-+ } |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- #ifdef DEBUG_ENCODING |
| +- xmlGenericError(xmlGenericErrorContext, |
| +- "initialized encoder\n"); |
| +-@@ -2061,6 +2245,26 @@ |
| +- } |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ else if (handler->uconv_out != NULL) { |
| +-+ ret = xmlUconvWrapper(handler->uconv_out, 0, |
| +-+ &out->content[out->use], |
| +-+ &written, in->content, &toconv); |
| +-+ xmlBufferShrink(in, toconv); |
| +-+ out->use += written; |
| +-+ writtentot += written; |
| +-+ out->content[out->use] = 0; |
| +-+ if (ret == -1) { |
| +-+ if (written > 0) { |
| +-+ /* |
| +-+ * Can be a limitation of iconv |
| +-+ */ |
| +-+ goto retry; |
| +-+ } |
| +-+ ret = -3; |
| +-+ } |
| +-+ } |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- else { |
| +- xmlEncodingErr(XML_I18N_NO_OUTPUT, |
| +- "xmlCharEncOutFunc: no output function !\n", NULL); |
| +-@@ -2173,6 +2377,22 @@ |
| +- xmlFree(handler); |
| +- } |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { |
| +-+ if (handler->name != NULL) |
| +-+ xmlFree(handler->name); |
| +-+ handler->name = NULL; |
| +-+ if (handler->uconv_out != NULL) { |
| +-+ closeIcuConverter(handler->uconv_out); |
| +-+ handler->uconv_out = NULL; |
| +-+ } |
| +-+ if (handler->uconv_in != NULL) { |
| +-+ closeIcuConverter(handler->uconv_in); |
| +-+ handler->uconv_in = NULL; |
| +-+ } |
| +-+ xmlFree(handler); |
| +-+ } |
| +-+#endif |
| +- #ifdef DEBUG_ENCODING |
| +- if (ret) |
| +- xmlGenericError(xmlGenericErrorContext, |
| +-@@ -2248,6 +2468,22 @@ |
| +- cur += toconv; |
| +- } while (ret == -2); |
| +- #endif |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ } else if (handler->uconv_out != NULL) { |
| +-+ do { |
| +-+ toconv = in->end - cur; |
| +-+ written = 32000; |
| +-+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], |
| +-+ &written, cur, &toconv); |
| +-+ if (ret < 0) { |
| +-+ if (written > 0) |
| +-+ ret = -2; |
| +-+ else |
| +-+ return(-1); |
| +-+ } |
| +-+ unused += written; |
| +-+ cur += toconv; |
| +-+ } while (ret == -2); |
| +- } else { |
| +- /* could not find a converter */ |
| +- return(-1); |
| +-@@ -2259,8 +2495,9 @@ |
| +- } |
| +- return(in->consumed + (in->cur - in->base)); |
| +- } |
| +-+#endif |
| +- |
| +--#ifndef LIBXML_ICONV_ENABLED |
| +-+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
| +- #ifdef LIBXML_ISO8859X_ENABLED |
| +- |
| +- /** |
| +-Index: libxml/include/libxml/encoding.h |
| +-=================================================================== |
| +---- libxml.orig/include/libxml/encoding.h 2010-07-09 14:50:27.503114118 -0700 |
| +-+++ libxml/include/libxml/encoding.h 2010-07-09 14:53:27.251611643 -0700 |
| +-@@ -26,6 +26,24 @@ |
| +- |
| +- #ifdef LIBXML_ICONV_ENABLED |
| +- #include <iconv.h> |
| +-+#else |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+#include <unicode/ucnv.h> |
| +-+#if 0 |
| +-+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> |
| +-+ * to prevent unwanted ICU symbols being exposed to users of libxml2. |
| +-+ * One particular case is Qt4 conflicting on UChar32. |
| +-+ */ |
| +-+#include <stdint.h> |
| +-+struct UConverter; |
| +-+typedef struct UConverter UConverter; |
| +-+#ifdef _MSC_VER |
| +-+typedef wchar_t UChar; |
| +-+#else |
| +-+typedef uint16_t UChar; |
| +-+#endif |
| +-+#endif |
| +-+#endif |
| +- #endif |
| +- #ifdef __cplusplus |
| +- extern "C" { |
| +-@@ -125,6 +143,13 @@ |
| +- * Block defining the handlers for non UTF-8 encodings. |
| +- * If iconv is supported, there are two extra fields. |
| +- */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+struct _uconv_t { |
| +-+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ |
| +-+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ |
| +-+}; |
| +-+typedef struct _uconv_t uconv_t; |
| +-+#endif |
| +- |
| +- typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; |
| +- typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
| +-@@ -136,6 +161,10 @@ |
| +- iconv_t iconv_in; |
| +- iconv_t iconv_out; |
| +- #endif /* LIBXML_ICONV_ENABLED */ |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ uconv_t *uconv_in; |
| +-+ uconv_t *uconv_out; |
| +-+#endif /* LIBXML_ICU_ENABLED */ |
| +- }; |
| +- |
| +- #ifdef __cplusplus |
| +-Index: libxml/include/libxml/parser.h |
| +-=================================================================== |
| +---- libxml.orig/include/libxml/parser.h 2010-07-09 14:51:21.190673740 -0700 |
| +-+++ libxml/include/libxml/parser.h 2010-07-09 14:53:19.571862214 -0700 |
| +-@@ -1222,6 +1222,7 @@ |
| +- XML_WITH_DEBUG_MEM = 29, |
| +- XML_WITH_DEBUG_RUN = 30, |
| +- XML_WITH_ZLIB = 31, |
| +-+ XML_WITH_ICU = 32, |
| +- XML_WITH_NONE = 99999 /* just to be sure of allocation size */ |
| +- } xmlFeature; |
| +- |
| +-Index: libxml/parser.c |
| +-=================================================================== |
| +---- libxml.orig/parser.c 2010-07-09 14:52:15.150057108 -0700 |
| +-+++ libxml/parser.c 2010-07-09 14:53:06.190137405 -0700 |
| +-@@ -954,6 +954,12 @@ |
| +- #else |
| +- return(0); |
| +- #endif |
| +-+ case XML_WITH_ICU: |
| +-+#ifdef LIBXML_ICU_ENABLED |
| +-+ return(1); |
| +-+#else |
| +-+ return(0); |
| +-+#endif |
| +- default: |
| +- break; |
| +- } |