Index: third_party/libxml/patches/icu |
diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu |
new file mode 100644 |
index 0000000000000000000000000000000000000000..814d23c9832a9ad6db9eafad58449307814a144f |
--- /dev/null |
+++ b/third_party/libxml/patches/icu |
@@ -0,0 +1,892 @@ |
+Add code support for ICU. |
+ |
+diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c |
+index b86a547..0f41df9 100644 |
+--- a/third_party/libxml/encoding.c |
++++ b/third_party/libxml/encoding.c |
+@@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; |
+ static int xmlCharEncodingAliasesNb = 0; |
+ static int xmlCharEncodingAliasesMax = 0; |
+ |
+-#ifdef LIBXML_ICONV_ENABLED |
++#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) |
+ #if 0 |
+ #define DEBUG_ENCODING /* Define this to get encoding traces */ |
+ #endif |
+@@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) |
+ NULL, 0, val, NULL, NULL, 0, 0, msg, val); |
+ } |
+ |
++#ifdef LIBXML_ICU_ENABLED |
++static uconv_t* |
++openIcuConverter(const char* name, int toUnicode) |
++{ |
++ UErrorCode status = U_ZERO_ERROR; |
++ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); |
++ if (conv == NULL) |
++ return NULL; |
++ |
++ conv->uconv = ucnv_open(name, &status); |
++ if (U_FAILURE(status)) |
++ goto error; |
++ |
++ status = U_ZERO_ERROR; |
++ if (toUnicode) { |
++ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, |
++ NULL, NULL, NULL, &status); |
++ } |
++ else { |
++ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, |
++ NULL, NULL, NULL, &status); |
++ } |
++ if (U_FAILURE(status)) |
++ goto error; |
++ |
++ status = U_ZERO_ERROR; |
++ conv->utf8 = ucnv_open("UTF-8", &status); |
++ if (U_SUCCESS(status)) |
++ return conv; |
++ |
++error: |
++ if (conv->uconv) |
++ ucnv_close(conv->uconv); |
++ xmlFree(conv); |
++ return NULL; |
++} |
++ |
++static void |
++closeIcuConverter(uconv_t *conv) |
++{ |
++ if (conv != NULL) { |
++ ucnv_close(conv->uconv); |
++ ucnv_close(conv->utf8); |
++ xmlFree(conv); |
++ } |
++} |
++#endif /* LIBXML_ICU_ENABLED */ |
++ |
+ /************************************************************************ |
+ * * |
+ * Conversions To/From UTF8 encoding * |
+@@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, |
+ #ifdef LIBXML_ICONV_ENABLED |
+ handler->iconv_in = NULL; |
+ handler->iconv_out = NULL; |
+-#endif /* LIBXML_ICONV_ENABLED */ |
++#endif |
++#ifdef LIBXML_ICU_ENABLED |
++ handler->uconv_in = NULL; |
++ handler->uconv_out = NULL; |
++#endif |
+ |
+ /* |
+ * registers and returns the handler. |
+@@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { |
+ xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); |
+ xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); |
+ #endif /* LIBXML_OUTPUT_ENABLED */ |
+-#ifndef LIBXML_ICONV_ENABLED |
++#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
+ #ifdef LIBXML_ISO8859X_ENABLED |
+ xmlRegisterCharEncodingHandlersISO8859x (); |
+ #endif |
+@@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { |
+ xmlCharEncodingHandlerPtr enc; |
+ iconv_t icv_in, icv_out; |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ xmlCharEncodingHandlerPtr enc; |
++ uconv_t *ucv_in, *ucv_out; |
++#endif /* LIBXML_ICU_ENABLED */ |
+ char upper[100]; |
+ int i; |
+ |
+@@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { |
+ "iconv : problems with filters for '%s'\n", name); |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ /* check whether icu can handle this */ |
++ ucv_in = openIcuConverter(name, 1); |
++ ucv_out = openIcuConverter(name, 0); |
++ if (ucv_in != NULL && ucv_out != NULL) { |
++ enc = (xmlCharEncodingHandlerPtr) |
++ xmlMalloc(sizeof(xmlCharEncodingHandler)); |
++ if (enc == NULL) { |
++ closeIcuConverter(ucv_in); |
++ closeIcuConverter(ucv_out); |
++ return(NULL); |
++ } |
++ enc->name = xmlMemStrdup(name); |
++ enc->input = NULL; |
++ enc->output = NULL; |
++ enc->uconv_in = ucv_in; |
++ enc->uconv_out = ucv_out; |
++#ifdef DEBUG_ENCODING |
++ xmlGenericError(xmlGenericErrorContext, |
++ "Found ICU converter handler for encoding %s\n", name); |
++#endif |
++ return enc; |
++ } else if (ucv_in != NULL || ucv_out != NULL) { |
++ closeIcuConverter(ucv_in); |
++ closeIcuConverter(ucv_out); |
++ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, |
++ "ICU converter : problems with filters for '%s'\n", name); |
++ } |
++#endif /* LIBXML_ICU_ENABLED */ |
+ |
+ #ifdef DEBUG_ENCODING |
+ xmlGenericError(xmlGenericErrorContext, |
+@@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, |
+ |
+ /************************************************************************ |
+ * * |
++ * ICU based generic conversion functions * |
++ * * |
++ ************************************************************************/ |
++ |
++#ifdef LIBXML_ICU_ENABLED |
++/** |
++ * xmlUconvWrapper: |
++ * @cd: ICU uconverter data structure |
++ * @toUnicode : non-zero if toUnicode. 0 otherwise. |
++ * @out: a pointer to an array of bytes to store the result |
++ * @outlen: the length of @out |
++ * @in: a pointer to an array of ISO Latin 1 chars |
++ * @inlen: the length of @in |
++ * |
++ * Returns 0 if success, or |
++ * -1 by lack of space, or |
++ * -2 if the transcoding fails (for *in is not valid utf8 string or |
++ * the result of transformation can't fit into the encoding we want), or |
++ * -3 if there the last byte can't form a single output char. |
++ * |
++ * The value of @inlen after return is the number of octets consumed |
++ * as the return value is positive, else unpredictable. |
++ * The value of @outlen after return is the number of ocetes consumed. |
++ */ |
++static int |
++xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, |
++ const unsigned char *in, int *inlen) { |
++ const char *ucv_in = (const char *) in; |
++ char *ucv_out = (char *) out; |
++ UErrorCode err = U_ZERO_ERROR; |
++ |
++ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
++ if (outlen != NULL) *outlen = 0; |
++ return(-1); |
++ } |
++ |
++ /* |
++ * TODO(jungshik) |
++ * 1. is ucnv_convert(To|From)Algorithmic better? |
++ * 2. had we better use an explicit pivot buffer? |
++ * 3. error returned comes from 'fromUnicode' only even |
++ * when toUnicode is true ! |
++ */ |
++ if (toUnicode) { |
++ /* encoding => UTF-16 => UTF-8 */ |
++ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, |
++ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
++ 0, TRUE, &err); |
++ } else { |
++ /* UTF-8 => UTF-16 => encoding */ |
++ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, |
++ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
++ 0, TRUE, &err); |
++ } |
++ *inlen = ucv_in - (const char*) in; |
++ *outlen = ucv_out - (char *) out; |
++ if (U_SUCCESS(err)) |
++ return 0; |
++ if (err == U_BUFFER_OVERFLOW_ERROR) |
++ return -1; |
++ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) |
++ return -2; |
++ /* if (err == U_TRUNCATED_CHAR_FOUND) */ |
++ return -3; |
++} |
++#endif /* LIBXML_ICU_ENABLED */ |
++ |
++/************************************************************************ |
++ * * |
+ * The real API used by libxml for on-the-fly conversion * |
+ * * |
+ ************************************************************************/ |
+@@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, |
+ if (ret == -1) ret = -3; |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ else if (handler->uconv_in != NULL) { |
++ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
++ &written, in->content, &toconv); |
++ xmlBufferShrink(in, toconv); |
++ out->use += written; |
++ out->content[out->use] = 0; |
++ if (ret == -1) ret = -3; |
++ } |
++#endif /* LIBXML_ICU_ENABLED */ |
+ #ifdef DEBUG_ENCODING |
+ switch (ret) { |
+ case 0: |
+@@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, |
+ ret = -3; |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ else if (handler->uconv_in != NULL) { |
++ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
++ &written, in->content, &toconv); |
++ xmlBufferShrink(in, toconv); |
++ out->use += written; |
++ out->content[out->use] = 0; |
++ if (ret == -1) |
++ ret = -3; |
++ } |
++#endif /* LIBXML_ICU_ENABLED */ |
+ switch (ret) { |
+ case 0: |
+ #ifdef DEBUG_ENCODING |
+@@ -2015,6 +2190,15 @@ retry: |
+ out->content[out->use] = 0; |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ else if (handler->uconv_out != NULL) { |
++ ret = xmlUconvWrapper(handler->uconv_out, 0, |
++ &out->content[out->use], |
++ &written, NULL, &toconv); |
++ out->use += written; |
++ out->content[out->use] = 0; |
++ } |
++#endif /* LIBXML_ICU_ENABLED */ |
+ #ifdef DEBUG_ENCODING |
+ xmlGenericError(xmlGenericErrorContext, |
+ "initialized encoder\n"); |
+@@ -2061,6 +2245,26 @@ retry: |
+ } |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ else if (handler->uconv_out != NULL) { |
++ ret = xmlUconvWrapper(handler->uconv_out, 0, |
++ &out->content[out->use], |
++ &written, in->content, &toconv); |
++ xmlBufferShrink(in, toconv); |
++ out->use += written; |
++ writtentot += written; |
++ out->content[out->use] = 0; |
++ if (ret == -1) { |
++ if (written > 0) { |
++ /* |
++ * Can be a limitation of iconv |
++ */ |
++ goto retry; |
++ } |
++ ret = -3; |
++ } |
++ } |
++#endif /* LIBXML_ICU_ENABLED */ |
+ else { |
+ xmlEncodingErr(XML_I18N_NO_OUTPUT, |
+ "xmlCharEncOutFunc: no output function !\n", NULL); |
+@@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { |
+ xmlFree(handler); |
+ } |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { |
++ if (handler->name != NULL) |
++ xmlFree(handler->name); |
++ handler->name = NULL; |
++ if (handler->uconv_out != NULL) { |
++ closeIcuConverter(handler->uconv_out); |
++ handler->uconv_out = NULL; |
++ } |
++ if (handler->uconv_in != NULL) { |
++ closeIcuConverter(handler->uconv_in); |
++ handler->uconv_in = NULL; |
++ } |
++ xmlFree(handler); |
++ } |
++#endif |
+ #ifdef DEBUG_ENCODING |
+ if (ret) |
+ xmlGenericError(xmlGenericErrorContext, |
+@@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { |
+ cur += toconv; |
+ } while (ret == -2); |
+ #endif |
++#ifdef LIBXML_ICU_ENABLED |
++ } else if (handler->uconv_out != NULL) { |
++ do { |
++ toconv = in->end - cur; |
++ written = 32000; |
++ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], |
++ &written, cur, &toconv); |
++ if (ret < 0) { |
++ if (written > 0) |
++ ret = -2; |
++ else |
++ return(-1); |
++ } |
++ unused += written; |
++ cur += toconv; |
++ } while (ret == -2); |
+ } else { |
+ /* could not find a converter */ |
+ return(-1); |
+@@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { |
+ } |
+ return(in->consumed + (in->cur - in->base)); |
+ } |
++#endif |
+ |
+-#ifndef LIBXML_ICONV_ENABLED |
++#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
+ #ifdef LIBXML_ISO8859X_ENABLED |
+ |
+ /** |
+diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/include/libxml/encoding.h |
+index c74b25f..b5f8b48 100644 |
+--- a/third_party/libxml/include/libxml/encoding.h |
++++ b/third_party/libxml/include/libxml/encoding.h |
+@@ -26,6 +26,24 @@ |
+ |
+ #ifdef LIBXML_ICONV_ENABLED |
+ #include <iconv.h> |
++#else |
++#ifdef LIBXML_ICU_ENABLED |
++#include <unicode/ucnv.h> |
++#if 0 |
++/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> |
++ * to prevent unwanted ICU symbols being exposed to users of libxml2. |
++ * One particular case is Qt4 conflicting on UChar32. |
++ */ |
++#include <stdint.h> |
++struct UConverter; |
++typedef struct UConverter UConverter; |
++#ifdef _MSC_VER |
++typedef wchar_t UChar; |
++#else |
++typedef uint16_t UChar; |
++#endif |
++#endif |
++#endif |
+ #endif |
+ #ifdef __cplusplus |
+ extern "C" { |
+@@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, |
+ * Block defining the handlers for non UTF-8 encodings. |
+ * If iconv is supported, there are two extra fields. |
+ */ |
++#ifdef LIBXML_ICU_ENABLED |
++struct _uconv_t { |
++ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ |
++ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ |
++}; |
++typedef struct _uconv_t uconv_t; |
++#endif |
+ |
+ typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; |
+ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
+@@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { |
+ iconv_t iconv_in; |
+ iconv_t iconv_out; |
+ #endif /* LIBXML_ICONV_ENABLED */ |
++#ifdef LIBXML_ICU_ENABLED |
++ uconv_t *uconv_in; |
++ uconv_t *uconv_out; |
++#endif /* LIBXML_ICU_ENABLED */ |
+ }; |
+ |
+ #ifdef __cplusplus |
+diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/include/libxml/parser.h |
+index dd79c42..3580b63 100644 |
+--- a/third_party/libxml/include/libxml/parser.h |
++++ b/third_party/libxml/include/libxml/parser.h |
+@@ -1222,6 +1222,7 @@ typedef enum { |
+ XML_WITH_DEBUG_MEM = 29, |
+ XML_WITH_DEBUG_RUN = 30, |
+ XML_WITH_ZLIB = 31, |
++ XML_WITH_ICU = 32, |
+ XML_WITH_NONE = 99999 /* just to be sure of allocation size */ |
+ } xmlFeature; |
+ |
+diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/libxml/include/libxml/xmlversion.h.in |
+index 4739f3a..de310ab 100644 |
+--- a/third_party/libxml/include/libxml/xmlversion.h.in |
++++ b/third_party/libxml/include/libxml/xmlversion.h.in |
+@@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); |
+ #endif |
+ |
+ /** |
++ * LIBXML_ICU_ENABLED: |
++ * |
++ * Whether icu support is available |
++ */ |
++#if @WITH_ICU@ |
++#define LIBXML_ICU_ENABLED |
++#endif |
++ |
++/** |
+ * LIBXML_ISO8859X_ENABLED: |
+ * |
+ * Whether ISO-8859-* support is made available in case iconv is not |
+diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c |
+index 85e7599..3ba2a06 100644 |
+--- a/third_party/libxml/parser.c |
++++ b/third_party/libxml/parser.c |
+@@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) |
+ #else |
+ return(0); |
+ #endif |
++ case XML_WITH_ICU: |
++#ifdef LIBXML_ICU_ENABLED |
++ return(1); |
++#else |
++ return(0); |
++#endif |
+ default: |
+ break; |
+ } |
+diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu |
jungshik at Google
2010/07/12 18:23:29
I guess you don't want to include this diff in the
|
+index 324cea3..6c22c3c 100644 |
+--- a/third_party/libxml/patches/icu |
++++ b/third_party/libxml/patches/icu |
+@@ -1,434 +0,0 @@ |
+-Code support for ICU. Note that this relies on modifications to the |
+-build environment (either configure or configure.js on Windows). |
+- |
+-Index: libxml/encoding.c |
+-=================================================================== |
+---- libxml.orig/encoding.c 2010-07-09 14:48:28.881863834 -0700 |
+-+++ libxml/encoding.c 2010-07-09 14:49:23.479741318 -0700 |
+-@@ -58,7 +58,7 @@ |
+- static int xmlCharEncodingAliasesNb = 0; |
+- static int xmlCharEncodingAliasesMax = 0; |
+- |
+--#ifdef LIBXML_ICONV_ENABLED |
+-+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) |
+- #if 0 |
+- #define DEBUG_ENCODING /* Define this to get encoding traces */ |
+- #endif |
+-@@ -97,6 +97,54 @@ |
+- NULL, 0, val, NULL, NULL, 0, 0, msg, val); |
+- } |
+- |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+static uconv_t* |
+-+openIcuConverter(const char* name, int toUnicode) |
+-+{ |
+-+ UErrorCode status = U_ZERO_ERROR; |
+-+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); |
+-+ if (conv == NULL) |
+-+ return NULL; |
+-+ |
+-+ conv->uconv = ucnv_open(name, &status); |
+-+ if (U_FAILURE(status)) |
+-+ goto error; |
+-+ |
+-+ status = U_ZERO_ERROR; |
+-+ if (toUnicode) { |
+-+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, |
+-+ NULL, NULL, NULL, &status); |
+-+ } |
+-+ else { |
+-+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, |
+-+ NULL, NULL, NULL, &status); |
+-+ } |
+-+ if (U_FAILURE(status)) |
+-+ goto error; |
+-+ |
+-+ status = U_ZERO_ERROR; |
+-+ conv->utf8 = ucnv_open("UTF-8", &status); |
+-+ if (U_SUCCESS(status)) |
+-+ return conv; |
+-+ |
+-+error: |
+-+ if (conv->uconv) |
+-+ ucnv_close(conv->uconv); |
+-+ xmlFree(conv); |
+-+ return NULL; |
+-+} |
+-+ |
+-+static void |
+-+closeIcuConverter(uconv_t *conv) |
+-+{ |
+-+ if (conv != NULL) { |
+-+ ucnv_close(conv->uconv); |
+-+ ucnv_close(conv->utf8); |
+-+ xmlFree(conv); |
+-+ } |
+-+} |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+-+ |
+- /************************************************************************ |
+- * * |
+- * Conversions To/From UTF8 encoding * |
+-@@ -1306,7 +1354,11 @@ |
+- #ifdef LIBXML_ICONV_ENABLED |
+- handler->iconv_in = NULL; |
+- handler->iconv_out = NULL; |
+--#endif /* LIBXML_ICONV_ENABLED */ |
+-+#endif |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ handler->uconv_in = NULL; |
+-+ handler->uconv_out = NULL; |
+-+#endif |
+- |
+- /* |
+- * registers and returns the handler. |
+-@@ -1371,7 +1423,7 @@ |
+- xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); |
+- xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); |
+- #endif /* LIBXML_OUTPUT_ENABLED */ |
+--#ifndef LIBXML_ICONV_ENABLED |
+-+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
+- #ifdef LIBXML_ISO8859X_ENABLED |
+- xmlRegisterCharEncodingHandlersISO8859x (); |
+- #endif |
+-@@ -1578,6 +1630,10 @@ |
+- xmlCharEncodingHandlerPtr enc; |
+- iconv_t icv_in, icv_out; |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ xmlCharEncodingHandlerPtr enc; |
+-+ uconv_t *ucv_in, *ucv_out; |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- char upper[100]; |
+- int i; |
+- |
+-@@ -1647,6 +1703,35 @@ |
+- "iconv : problems with filters for '%s'\n", name); |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ /* check whether icu can handle this */ |
+-+ ucv_in = openIcuConverter(name, 1); |
+-+ ucv_out = openIcuConverter(name, 0); |
+-+ if (ucv_in != NULL && ucv_out != NULL) { |
+-+ enc = (xmlCharEncodingHandlerPtr) |
+-+ xmlMalloc(sizeof(xmlCharEncodingHandler)); |
+-+ if (enc == NULL) { |
+-+ closeIcuConverter(ucv_in); |
+-+ closeIcuConverter(ucv_out); |
+-+ return(NULL); |
+-+ } |
+-+ enc->name = xmlMemStrdup(name); |
+-+ enc->input = NULL; |
+-+ enc->output = NULL; |
+-+ enc->uconv_in = ucv_in; |
+-+ enc->uconv_out = ucv_out; |
+-+#ifdef DEBUG_ENCODING |
+-+ xmlGenericError(xmlGenericErrorContext, |
+-+ "Found ICU converter handler for encoding %s\n", name); |
+-+#endif |
+-+ return enc; |
+-+ } else if (ucv_in != NULL || ucv_out != NULL) { |
+-+ closeIcuConverter(ucv_in); |
+-+ closeIcuConverter(ucv_out); |
+-+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, |
+-+ "ICU converter : problems with filters for '%s'\n", name); |
+-+ } |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- |
+- #ifdef DEBUG_ENCODING |
+- xmlGenericError(xmlGenericErrorContext, |
+-@@ -1737,6 +1822,75 @@ |
+- |
+- /************************************************************************ |
+- * * |
+-+ * ICU based generic conversion functions * |
+-+ * * |
+-+ ************************************************************************/ |
+-+ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+/** |
+-+ * xmlUconvWrapper: |
+-+ * @cd: ICU uconverter data structure |
+-+ * @toUnicode : non-zero if toUnicode. 0 otherwise. |
+-+ * @out: a pointer to an array of bytes to store the result |
+-+ * @outlen: the length of @out |
+-+ * @in: a pointer to an array of ISO Latin 1 chars |
+-+ * @inlen: the length of @in |
+-+ * |
+-+ * Returns 0 if success, or |
+-+ * -1 by lack of space, or |
+-+ * -2 if the transcoding fails (for *in is not valid utf8 string or |
+-+ * the result of transformation can't fit into the encoding we want), or |
+-+ * -3 if there the last byte can't form a single output char. |
+-+ * |
+-+ * The value of @inlen after return is the number of octets consumed |
+-+ * as the return value is positive, else unpredictable. |
+-+ * The value of @outlen after return is the number of ocetes consumed. |
+-+ */ |
+-+static int |
+-+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, |
+-+ const unsigned char *in, int *inlen) { |
+-+ const char *ucv_in = (const char *) in; |
+-+ char *ucv_out = (char *) out; |
+-+ UErrorCode err = U_ZERO_ERROR; |
+-+ |
+-+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { |
+-+ if (outlen != NULL) *outlen = 0; |
+-+ return(-1); |
+-+ } |
+-+ |
+-+ /* |
+-+ * TODO(jungshik) |
+-+ * 1. is ucnv_convert(To|From)Algorithmic better? |
+-+ * 2. had we better use an explicit pivot buffer? |
+-+ * 3. error returned comes from 'fromUnicode' only even |
+-+ * when toUnicode is true ! |
+-+ */ |
+-+ if (toUnicode) { |
+-+ /* encoding => UTF-16 => UTF-8 */ |
+-+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, |
+-+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
+-+ 0, TRUE, &err); |
+-+ } else { |
+-+ /* UTF-8 => UTF-16 => encoding */ |
+-+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, |
+-+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, |
+-+ 0, TRUE, &err); |
+-+ } |
+-+ *inlen = ucv_in - (const char*) in; |
+-+ *outlen = ucv_out - (char *) out; |
+-+ if (U_SUCCESS(err)) |
+-+ return 0; |
+-+ if (err == U_BUFFER_OVERFLOW_ERROR) |
+-+ return -1; |
+-+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) |
+-+ return -2; |
+-+ /* if (err == U_TRUNCATED_CHAR_FOUND) */ |
+-+ return -3; |
+-+} |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+-+ |
+-+/************************************************************************ |
+-+ * * |
+- * The real API used by libxml for on-the-fly conversion * |
+- * * |
+- ************************************************************************/ |
+-@@ -1810,6 +1964,16 @@ |
+- if (ret == -1) ret = -3; |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ else if (handler->uconv_in != NULL) { |
+-+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
+-+ &written, in->content, &toconv); |
+-+ xmlBufferShrink(in, toconv); |
+-+ out->use += written; |
+-+ out->content[out->use] = 0; |
+-+ if (ret == -1) ret = -3; |
+-+ } |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- #ifdef DEBUG_ENCODING |
+- switch (ret) { |
+- case 0: |
+-@@ -1915,6 +2079,17 @@ |
+- ret = -3; |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ else if (handler->uconv_in != NULL) { |
+-+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], |
+-+ &written, in->content, &toconv); |
+-+ xmlBufferShrink(in, toconv); |
+-+ out->use += written; |
+-+ out->content[out->use] = 0; |
+-+ if (ret == -1) |
+-+ ret = -3; |
+-+ } |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- switch (ret) { |
+- case 0: |
+- #ifdef DEBUG_ENCODING |
+-@@ -2015,6 +2190,15 @@ |
+- out->content[out->use] = 0; |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ else if (handler->uconv_out != NULL) { |
+-+ ret = xmlUconvWrapper(handler->uconv_out, 0, |
+-+ &out->content[out->use], |
+-+ &written, NULL, &toconv); |
+-+ out->use += written; |
+-+ out->content[out->use] = 0; |
+-+ } |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- #ifdef DEBUG_ENCODING |
+- xmlGenericError(xmlGenericErrorContext, |
+- "initialized encoder\n"); |
+-@@ -2061,6 +2245,26 @@ |
+- } |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ else if (handler->uconv_out != NULL) { |
+-+ ret = xmlUconvWrapper(handler->uconv_out, 0, |
+-+ &out->content[out->use], |
+-+ &written, in->content, &toconv); |
+-+ xmlBufferShrink(in, toconv); |
+-+ out->use += written; |
+-+ writtentot += written; |
+-+ out->content[out->use] = 0; |
+-+ if (ret == -1) { |
+-+ if (written > 0) { |
+-+ /* |
+-+ * Can be a limitation of iconv |
+-+ */ |
+-+ goto retry; |
+-+ } |
+-+ ret = -3; |
+-+ } |
+-+ } |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- else { |
+- xmlEncodingErr(XML_I18N_NO_OUTPUT, |
+- "xmlCharEncOutFunc: no output function !\n", NULL); |
+-@@ -2173,6 +2377,22 @@ |
+- xmlFree(handler); |
+- } |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { |
+-+ if (handler->name != NULL) |
+-+ xmlFree(handler->name); |
+-+ handler->name = NULL; |
+-+ if (handler->uconv_out != NULL) { |
+-+ closeIcuConverter(handler->uconv_out); |
+-+ handler->uconv_out = NULL; |
+-+ } |
+-+ if (handler->uconv_in != NULL) { |
+-+ closeIcuConverter(handler->uconv_in); |
+-+ handler->uconv_in = NULL; |
+-+ } |
+-+ xmlFree(handler); |
+-+ } |
+-+#endif |
+- #ifdef DEBUG_ENCODING |
+- if (ret) |
+- xmlGenericError(xmlGenericErrorContext, |
+-@@ -2248,6 +2468,22 @@ |
+- cur += toconv; |
+- } while (ret == -2); |
+- #endif |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ } else if (handler->uconv_out != NULL) { |
+-+ do { |
+-+ toconv = in->end - cur; |
+-+ written = 32000; |
+-+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], |
+-+ &written, cur, &toconv); |
+-+ if (ret < 0) { |
+-+ if (written > 0) |
+-+ ret = -2; |
+-+ else |
+-+ return(-1); |
+-+ } |
+-+ unused += written; |
+-+ cur += toconv; |
+-+ } while (ret == -2); |
+- } else { |
+- /* could not find a converter */ |
+- return(-1); |
+-@@ -2259,8 +2495,9 @@ |
+- } |
+- return(in->consumed + (in->cur - in->base)); |
+- } |
+-+#endif |
+- |
+--#ifndef LIBXML_ICONV_ENABLED |
+-+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) |
+- #ifdef LIBXML_ISO8859X_ENABLED |
+- |
+- /** |
+-Index: libxml/include/libxml/encoding.h |
+-=================================================================== |
+---- libxml.orig/include/libxml/encoding.h 2010-07-09 14:50:27.503114118 -0700 |
+-+++ libxml/include/libxml/encoding.h 2010-07-09 14:53:27.251611643 -0700 |
+-@@ -26,6 +26,24 @@ |
+- |
+- #ifdef LIBXML_ICONV_ENABLED |
+- #include <iconv.h> |
+-+#else |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+#include <unicode/ucnv.h> |
+-+#if 0 |
+-+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> |
+-+ * to prevent unwanted ICU symbols being exposed to users of libxml2. |
+-+ * One particular case is Qt4 conflicting on UChar32. |
+-+ */ |
+-+#include <stdint.h> |
+-+struct UConverter; |
+-+typedef struct UConverter UConverter; |
+-+#ifdef _MSC_VER |
+-+typedef wchar_t UChar; |
+-+#else |
+-+typedef uint16_t UChar; |
+-+#endif |
+-+#endif |
+-+#endif |
+- #endif |
+- #ifdef __cplusplus |
+- extern "C" { |
+-@@ -125,6 +143,13 @@ |
+- * Block defining the handlers for non UTF-8 encodings. |
+- * If iconv is supported, there are two extra fields. |
+- */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+struct _uconv_t { |
+-+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ |
+-+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ |
+-+}; |
+-+typedef struct _uconv_t uconv_t; |
+-+#endif |
+- |
+- typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; |
+- typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; |
+-@@ -136,6 +161,10 @@ |
+- iconv_t iconv_in; |
+- iconv_t iconv_out; |
+- #endif /* LIBXML_ICONV_ENABLED */ |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ uconv_t *uconv_in; |
+-+ uconv_t *uconv_out; |
+-+#endif /* LIBXML_ICU_ENABLED */ |
+- }; |
+- |
+- #ifdef __cplusplus |
+-Index: libxml/include/libxml/parser.h |
+-=================================================================== |
+---- libxml.orig/include/libxml/parser.h 2010-07-09 14:51:21.190673740 -0700 |
+-+++ libxml/include/libxml/parser.h 2010-07-09 14:53:19.571862214 -0700 |
+-@@ -1222,6 +1222,7 @@ |
+- XML_WITH_DEBUG_MEM = 29, |
+- XML_WITH_DEBUG_RUN = 30, |
+- XML_WITH_ZLIB = 31, |
+-+ XML_WITH_ICU = 32, |
+- XML_WITH_NONE = 99999 /* just to be sure of allocation size */ |
+- } xmlFeature; |
+- |
+-Index: libxml/parser.c |
+-=================================================================== |
+---- libxml.orig/parser.c 2010-07-09 14:52:15.150057108 -0700 |
+-+++ libxml/parser.c 2010-07-09 14:53:06.190137405 -0700 |
+-@@ -954,6 +954,12 @@ |
+- #else |
+- return(0); |
+- #endif |
+-+ case XML_WITH_ICU: |
+-+#ifdef LIBXML_ICU_ENABLED |
+-+ return(1); |
+-+#else |
+-+ return(0); |
+-+#endif |
+- default: |
+- break; |
+- } |