| OLD | NEW |
| (Empty) |
| 1 Add code support for ICU. | |
| 2 | |
| 3 diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c | |
| 4 index b86a547..0f41df9 100644 | |
| 5 --- a/third_party/libxml/encoding.c | |
| 6 +++ b/third_party/libxml/encoding.c | |
| 7 @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; | |
| 8 static int xmlCharEncodingAliasesNb = 0; | |
| 9 static int xmlCharEncodingAliasesMax = 0; | |
| 10 | |
| 11 -#ifdef LIBXML_ICONV_ENABLED | |
| 12 +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | |
| 13 #if 0 | |
| 14 #define DEBUG_ENCODING /* Define this to get encoding traces */ | |
| 15 #endif | |
| 16 @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const
char *val) | |
| 17 NULL, 0, val, NULL, NULL, 0, 0, msg, val); | |
| 18 } | |
| 19 | |
| 20 +#ifdef LIBXML_ICU_ENABLED | |
| 21 +static uconv_t* | |
| 22 +openIcuConverter(const char* name, int toUnicode) | |
| 23 +{ | |
| 24 + UErrorCode status = U_ZERO_ERROR; | |
| 25 + uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | |
| 26 + if (conv == NULL) | |
| 27 + return NULL; | |
| 28 + | |
| 29 + conv->uconv = ucnv_open(name, &status); | |
| 30 + if (U_FAILURE(status)) | |
| 31 + goto error; | |
| 32 + | |
| 33 + status = U_ZERO_ERROR; | |
| 34 + if (toUnicode) { | |
| 35 + ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, | |
| 36 + NULL, NULL, NULL, &status); | |
| 37 + } | |
| 38 + else { | |
| 39 + ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, | |
| 40 + NULL, NULL, NULL, &status); | |
| 41 + } | |
| 42 + if (U_FAILURE(status)) | |
| 43 + goto error; | |
| 44 + | |
| 45 + status = U_ZERO_ERROR; | |
| 46 + conv->utf8 = ucnv_open("UTF-8", &status); | |
| 47 + if (U_SUCCESS(status)) | |
| 48 + return conv; | |
| 49 + | |
| 50 +error: | |
| 51 + if (conv->uconv) | |
| 52 + ucnv_close(conv->uconv); | |
| 53 + xmlFree(conv); | |
| 54 + return NULL; | |
| 55 +} | |
| 56 + | |
| 57 +static void | |
| 58 +closeIcuConverter(uconv_t *conv) | |
| 59 +{ | |
| 60 + if (conv != NULL) { | |
| 61 + ucnv_close(conv->uconv); | |
| 62 + ucnv_close(conv->utf8); | |
| 63 + xmlFree(conv); | |
| 64 + } | |
| 65 +} | |
| 66 +#endif /* LIBXML_ICU_ENABLED */ | |
| 67 + | |
| 68 /************************************************************************ | |
| 69 * * | |
| 70 * Conversions To/From UTF8 encoding * | |
| 71 @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, | |
| 72 #ifdef LIBXML_ICONV_ENABLED | |
| 73 handler->iconv_in = NULL; | |
| 74 handler->iconv_out = NULL; | |
| 75 -#endif /* LIBXML_ICONV_ENABLED */ | |
| 76 +#endif | |
| 77 +#ifdef LIBXML_ICU_ENABLED | |
| 78 + handler->uconv_in = NULL; | |
| 79 + handler->uconv_out = NULL; | |
| 80 +#endif | |
| 81 | |
| 82 /* | |
| 83 * registers and returns the handler. | |
| 84 @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { | |
| 85 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | |
| 86 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | |
| 87 #endif /* LIBXML_OUTPUT_ENABLED */ | |
| 88 -#ifndef LIBXML_ICONV_ENABLED | |
| 89 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 90 #ifdef LIBXML_ISO8859X_ENABLED | |
| 91 xmlRegisterCharEncodingHandlersISO8859x (); | |
| 92 #endif | |
| 93 @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { | |
| 94 xmlCharEncodingHandlerPtr enc; | |
| 95 iconv_t icv_in, icv_out; | |
| 96 #endif /* LIBXML_ICONV_ENABLED */ | |
| 97 +#ifdef LIBXML_ICU_ENABLED | |
| 98 + xmlCharEncodingHandlerPtr enc; | |
| 99 + uconv_t *ucv_in, *ucv_out; | |
| 100 +#endif /* LIBXML_ICU_ENABLED */ | |
| 101 char upper[100]; | |
| 102 int i; | |
| 103 | |
| 104 @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { | |
| 105 "iconv : problems with filters for '%s'\n", name); | |
| 106 } | |
| 107 #endif /* LIBXML_ICONV_ENABLED */ | |
| 108 +#ifdef LIBXML_ICU_ENABLED | |
| 109 + /* check whether icu can handle this */ | |
| 110 + ucv_in = openIcuConverter(name, 1); | |
| 111 + ucv_out = openIcuConverter(name, 0); | |
| 112 + if (ucv_in != NULL && ucv_out != NULL) { | |
| 113 + enc = (xmlCharEncodingHandlerPtr) | |
| 114 + xmlMalloc(sizeof(xmlCharEncodingHandler)); | |
| 115 + if (enc == NULL) { | |
| 116 + closeIcuConverter(ucv_in); | |
| 117 + closeIcuConverter(ucv_out); | |
| 118 + return(NULL); | |
| 119 + } | |
| 120 + enc->name = xmlMemStrdup(name); | |
| 121 + enc->input = NULL; | |
| 122 + enc->output = NULL; | |
| 123 + enc->uconv_in = ucv_in; | |
| 124 + enc->uconv_out = ucv_out; | |
| 125 +#ifdef DEBUG_ENCODING | |
| 126 + xmlGenericError(xmlGenericErrorContext, | |
| 127 + "Found ICU converter handler for encoding %s\n", name); | |
| 128 +#endif | |
| 129 + return enc; | |
| 130 + } else if (ucv_in != NULL || ucv_out != NULL) { | |
| 131 + closeIcuConverter(ucv_in); | |
| 132 + closeIcuConverter(ucv_out); | |
| 133 + xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | |
| 134 + "ICU converter : problems with filters for '%s'\n", name); | |
| 135 + } | |
| 136 +#endif /* LIBXML_ICU_ENABLED */ | |
| 137 | |
| 138 #ifdef DEBUG_ENCODING | |
| 139 xmlGenericError(xmlGenericErrorContext, | |
| 140 @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outl
en, | |
| 141 | |
| 142 /************************************************************************ | |
| 143 * * | |
| 144 + * ICU based generic conversion functions * | |
| 145 + * * | |
| 146 + ************************************************************************/ | |
| 147 + | |
| 148 +#ifdef LIBXML_ICU_ENABLED | |
| 149 +/** | |
| 150 + * xmlUconvWrapper: | |
| 151 + * @cd: ICU uconverter data structure | |
| 152 + * @toUnicode : non-zero if toUnicode. 0 otherwise. | |
| 153 + * @out: a pointer to an array of bytes to store the result | |
| 154 + * @outlen: the length of @out | |
| 155 + * @in: a pointer to an array of ISO Latin 1 chars | |
| 156 + * @inlen: the length of @in | |
| 157 + * | |
| 158 + * Returns 0 if success, or | |
| 159 + * -1 by lack of space, or | |
| 160 + * -2 if the transcoding fails (for *in is not valid utf8 string or | |
| 161 + * the result of transformation can't fit into the encoding we want), or | |
| 162 + * -3 if there the last byte can't form a single output char. | |
| 163 + * | |
| 164 + * The value of @inlen after return is the number of octets consumed | |
| 165 + * as the return value is positive, else unpredictable. | |
| 166 + * The value of @outlen after return is the number of ocetes consumed. | |
| 167 + */ | |
| 168 +static int | |
| 169 +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | |
| 170 + const unsigned char *in, int *inlen) { | |
| 171 + const char *ucv_in = (const char *) in; | |
| 172 + char *ucv_out = (char *) out; | |
| 173 + UErrorCode err = U_ZERO_ERROR; | |
| 174 + | |
| 175 + if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | |
| 176 + if (outlen != NULL) *outlen = 0; | |
| 177 + return(-1); | |
| 178 + } | |
| 179 + | |
| 180 + /* | |
| 181 + * TODO(jungshik) | |
| 182 + * 1. is ucnv_convert(To|From)Algorithmic better? | |
| 183 + * 2. had we better use an explicit pivot buffer? | |
| 184 + * 3. error returned comes from 'fromUnicode' only even | |
| 185 + * when toUnicode is true ! | |
| 186 + */ | |
| 187 + if (toUnicode) { | |
| 188 + /* encoding => UTF-16 => UTF-8 */ | |
| 189 + ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | |
| 190 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 191 + 0, TRUE, &err); | |
| 192 + } else { | |
| 193 + /* UTF-8 => UTF-16 => encoding */ | |
| 194 + ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | |
| 195 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 196 + 0, TRUE, &err); | |
| 197 + } | |
| 198 + *inlen = ucv_in - (const char*) in; | |
| 199 + *outlen = ucv_out - (char *) out; | |
| 200 + if (U_SUCCESS(err)) | |
| 201 + return 0; | |
| 202 + if (err == U_BUFFER_OVERFLOW_ERROR) | |
| 203 + return -1; | |
| 204 + if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | |
| 205 + return -2; | |
| 206 + /* if (err == U_TRUNCATED_CHAR_FOUND) */ | |
| 207 + return -3; | |
| 208 +} | |
| 209 +#endif /* LIBXML_ICU_ENABLED */ | |
| 210 + | |
| 211 +/************************************************************************ | |
| 212 + * * | |
| 213 * The real API used by libxml for on-the-fly conversion * | |
| 214 * * | |
| 215 ************************************************************************/ | |
| 216 @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, x
mlBufferPtr out, | |
| 217 if (ret == -1) ret = -3; | |
| 218 } | |
| 219 #endif /* LIBXML_ICONV_ENABLED */ | |
| 220 +#ifdef LIBXML_ICU_ENABLED | |
| 221 + else if (handler->uconv_in != NULL) { | |
| 222 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 223 + &written, in->content, &toconv); | |
| 224 + xmlBufferShrink(in, toconv); | |
| 225 + out->use += written; | |
| 226 + out->content[out->use] = 0; | |
| 227 + if (ret == -1) ret = -3; | |
| 228 + } | |
| 229 +#endif /* LIBXML_ICU_ENABLED */ | |
| 230 #ifdef DEBUG_ENCODING | |
| 231 switch (ret) { | |
| 232 case 0: | |
| 233 @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBuf
ferPtr out, | |
| 234 ret = -3; | |
| 235 } | |
| 236 #endif /* LIBXML_ICONV_ENABLED */ | |
| 237 +#ifdef LIBXML_ICU_ENABLED | |
| 238 + else if (handler->uconv_in != NULL) { | |
| 239 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 240 + &written, in->content, &toconv); | |
| 241 + xmlBufferShrink(in, toconv); | |
| 242 + out->use += written; | |
| 243 + out->content[out->use] = 0; | |
| 244 + if (ret == -1) | |
| 245 + ret = -3; | |
| 246 + } | |
| 247 +#endif /* LIBXML_ICU_ENABLED */ | |
| 248 switch (ret) { | |
| 249 case 0: | |
| 250 #ifdef DEBUG_ENCODING | |
| 251 @@ -2015,6 +2190,15 @@ retry: | |
| 252 out->content[out->use] = 0; | |
| 253 } | |
| 254 #endif /* LIBXML_ICONV_ENABLED */ | |
| 255 +#ifdef LIBXML_ICU_ENABLED | |
| 256 + else if (handler->uconv_out != NULL) { | |
| 257 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 258 + &out->content[out->use], | |
| 259 + &written, NULL, &toconv); | |
| 260 + out->use += written; | |
| 261 + out->content[out->use] = 0; | |
| 262 + } | |
| 263 +#endif /* LIBXML_ICU_ENABLED */ | |
| 264 #ifdef DEBUG_ENCODING | |
| 265 xmlGenericError(xmlGenericErrorContext, | |
| 266 "initialized encoder\n"); | |
| 267 @@ -2061,6 +2245,26 @@ retry: | |
| 268 } | |
| 269 } | |
| 270 #endif /* LIBXML_ICONV_ENABLED */ | |
| 271 +#ifdef LIBXML_ICU_ENABLED | |
| 272 + else if (handler->uconv_out != NULL) { | |
| 273 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 274 + &out->content[out->use], | |
| 275 + &written, in->content, &toconv); | |
| 276 + xmlBufferShrink(in, toconv); | |
| 277 + out->use += written; | |
| 278 + writtentot += written; | |
| 279 + out->content[out->use] = 0; | |
| 280 + if (ret == -1) { | |
| 281 + if (written > 0) { | |
| 282 + /* | |
| 283 + * Can be a limitation of iconv | |
| 284 + */ | |
| 285 + goto retry; | |
| 286 + } | |
| 287 + ret = -3; | |
| 288 + } | |
| 289 + } | |
| 290 +#endif /* LIBXML_ICU_ENABLED */ | |
| 291 else { | |
| 292 xmlEncodingErr(XML_I18N_NO_OUTPUT, | |
| 293 "xmlCharEncOutFunc: no output function !\n", NULL); | |
| 294 @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { | |
| 295 xmlFree(handler); | |
| 296 } | |
| 297 #endif /* LIBXML_ICONV_ENABLED */ | |
| 298 +#ifdef LIBXML_ICU_ENABLED | |
| 299 + if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | |
| 300 + if (handler->name != NULL) | |
| 301 + xmlFree(handler->name); | |
| 302 + handler->name = NULL; | |
| 303 + if (handler->uconv_out != NULL) { | |
| 304 + closeIcuConverter(handler->uconv_out); | |
| 305 + handler->uconv_out = NULL; | |
| 306 + } | |
| 307 + if (handler->uconv_in != NULL) { | |
| 308 + closeIcuConverter(handler->uconv_in); | |
| 309 + handler->uconv_in = NULL; | |
| 310 + } | |
| 311 + xmlFree(handler); | |
| 312 + } | |
| 313 +#endif | |
| 314 #ifdef DEBUG_ENCODING | |
| 315 if (ret) | |
| 316 xmlGenericError(xmlGenericErrorContext, | |
| 317 @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
| 318 cur += toconv; | |
| 319 } while (ret == -2); | |
| 320 #endif | |
| 321 +#ifdef LIBXML_ICU_ENABLED | |
| 322 + } else if (handler->uconv_out != NULL) { | |
| 323 + do { | |
| 324 + toconv = in->end - cur; | |
| 325 + written = 32000; | |
| 326 + ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | |
| 327 + &written, cur, &toconv); | |
| 328 + if (ret < 0) { | |
| 329 + if (written > 0) | |
| 330 + ret = -2; | |
| 331 + else | |
| 332 + return(-1); | |
| 333 + } | |
| 334 + unused += written; | |
| 335 + cur += toconv; | |
| 336 + } while (ret == -2); | |
| 337 } else { | |
| 338 /* could not find a converter */ | |
| 339 return(-1); | |
| 340 @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
| 341 } | |
| 342 return(in->consumed + (in->cur - in->base)); | |
| 343 } | |
| 344 +#endif | |
| 345 | |
| 346 -#ifndef LIBXML_ICONV_ENABLED | |
| 347 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 348 #ifdef LIBXML_ISO8859X_ENABLED | |
| 349 | |
| 350 /** | |
| 351 diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/i
nclude/libxml/encoding.h | |
| 352 index c74b25f..b5f8b48 100644 | |
| 353 --- a/third_party/libxml/include/libxml/encoding.h | |
| 354 +++ b/third_party/libxml/include/libxml/encoding.h | |
| 355 @@ -26,6 +26,24 @@ | |
| 356 | |
| 357 #ifdef LIBXML_ICONV_ENABLED | |
| 358 #include <iconv.h> | |
| 359 +#else | |
| 360 +#ifdef LIBXML_ICU_ENABLED | |
| 361 +#include <unicode/ucnv.h> | |
| 362 +#if 0 | |
| 363 +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | |
| 364 + * to prevent unwanted ICU symbols being exposed to users of libxml2. | |
| 365 + * One particular case is Qt4 conflicting on UChar32. | |
| 366 + */ | |
| 367 +#include <stdint.h> | |
| 368 +struct UConverter; | |
| 369 +typedef struct UConverter UConverter; | |
| 370 +#ifdef _MSC_VER | |
| 371 +typedef wchar_t UChar; | |
| 372 +#else | |
| 373 +typedef uint16_t UChar; | |
| 374 +#endif | |
| 375 +#endif | |
| 376 +#endif | |
| 377 #endif | |
| 378 #ifdef __cplusplus | |
| 379 extern "C" { | |
| 380 @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *ou
t, int *outlen, | |
| 381 * Block defining the handlers for non UTF-8 encodings. | |
| 382 * If iconv is supported, there are two extra fields. | |
| 383 */ | |
| 384 +#ifdef LIBXML_ICU_ENABLED | |
| 385 +struct _uconv_t { | |
| 386 + UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
| 387 + UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
| 388 +}; | |
| 389 +typedef struct _uconv_t uconv_t; | |
| 390 +#endif | |
| 391 | |
| 392 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
| 393 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
| 394 @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { | |
| 395 iconv_t iconv_in; | |
| 396 iconv_t iconv_out; | |
| 397 #endif /* LIBXML_ICONV_ENABLED */ | |
| 398 +#ifdef LIBXML_ICU_ENABLED | |
| 399 + uconv_t *uconv_in; | |
| 400 + uconv_t *uconv_out; | |
| 401 +#endif /* LIBXML_ICU_ENABLED */ | |
| 402 }; | |
| 403 | |
| 404 #ifdef __cplusplus | |
| 405 diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/inc
lude/libxml/parser.h | |
| 406 index dd79c42..3580b63 100644 | |
| 407 --- a/third_party/libxml/include/libxml/parser.h | |
| 408 +++ b/third_party/libxml/include/libxml/parser.h | |
| 409 @@ -1222,6 +1222,7 @@ typedef enum { | |
| 410 XML_WITH_DEBUG_MEM = 29, | |
| 411 XML_WITH_DEBUG_RUN = 30, | |
| 412 XML_WITH_ZLIB = 31, | |
| 413 + XML_WITH_ICU = 32, | |
| 414 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | |
| 415 } xmlFeature; | |
| 416 | |
| 417 diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/lib
xml/include/libxml/xmlversion.h.in | |
| 418 index 4739f3a..de310ab 100644 | |
| 419 --- a/third_party/libxml/include/libxml/xmlversion.h.in | |
| 420 +++ b/third_party/libxml/include/libxml/xmlversion.h.in | |
| 421 @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); | |
| 422 #endif | |
| 423 | |
| 424 /** | |
| 425 + * LIBXML_ICU_ENABLED: | |
| 426 + * | |
| 427 + * Whether icu support is available | |
| 428 + */ | |
| 429 +#if @WITH_ICU@ | |
| 430 +#define LIBXML_ICU_ENABLED | |
| 431 +#endif | |
| 432 + | |
| 433 +/** | |
| 434 * LIBXML_ISO8859X_ENABLED: | |
| 435 * | |
| 436 * Whether ISO-8859-* support is made available in case iconv is not | |
| 437 diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c | |
| 438 index 85e7599..3ba2a06 100644 | |
| 439 --- a/third_party/libxml/parser.c | |
| 440 +++ b/third_party/libxml/parser.c | |
| 441 @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) | |
| 442 #else | |
| 443 return(0); | |
| 444 #endif | |
| 445 + case XML_WITH_ICU: | |
| 446 +#ifdef LIBXML_ICU_ENABLED | |
| 447 + return(1); | |
| 448 +#else | |
| 449 + return(0); | |
| 450 +#endif | |
| 451 default: | |
| 452 break; | |
| 453 } | |
| OLD | NEW |