Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 Add code support for ICU. | |
| 2 | |
| 3 diff --git a/third_party/libxml/encoding.c b/third_party/libxml/encoding.c | |
| 4 index b86a547..0f41df9 100644 | |
| 5 --- a/third_party/libxml/encoding.c | |
| 6 +++ b/third_party/libxml/encoding.c | |
| 7 @@ -58,7 +58,7 @@ static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; | |
| 8 static int xmlCharEncodingAliasesNb = 0; | |
| 9 static int xmlCharEncodingAliasesMax = 0; | |
| 10 | |
| 11 -#ifdef LIBXML_ICONV_ENABLED | |
| 12 +#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | |
| 13 #if 0 | |
| 14 #define DEBUG_ENCODING /* Define this to get encoding traces */ | |
| 15 #endif | |
| 16 @@ -97,6 +97,54 @@ xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) | |
| 17 NULL, 0, val, NULL, NULL, 0, 0, msg, val); | |
| 18 } | |
| 19 | |
| 20 +#ifdef LIBXML_ICU_ENABLED | |
| 21 +static uconv_t* | |
| 22 +openIcuConverter(const char* name, int toUnicode) | |
| 23 +{ | |
| 24 + UErrorCode status = U_ZERO_ERROR; | |
| 25 + uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | |
| 26 + if (conv == NULL) | |
| 27 + return NULL; | |
| 28 + | |
| 29 + conv->uconv = ucnv_open(name, &status); | |
| 30 + if (U_FAILURE(status)) | |
| 31 + goto error; | |
| 32 + | |
| 33 + status = U_ZERO_ERROR; | |
| 34 + if (toUnicode) { | |
| 35 + ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, | |
| 36 + NULL, NULL, NULL, &status); | |
| 37 + } | |
| 38 + else { | |
| 39 + ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, | |
| 40 + NULL, NULL, NULL, &status); | |
| 41 + } | |
| 42 + if (U_FAILURE(status)) | |
| 43 + goto error; | |
| 44 + | |
| 45 + status = U_ZERO_ERROR; | |
| 46 + conv->utf8 = ucnv_open("UTF-8", &status); | |
| 47 + if (U_SUCCESS(status)) | |
| 48 + return conv; | |
| 49 + | |
| 50 +error: | |
| 51 + if (conv->uconv) | |
| 52 + ucnv_close(conv->uconv); | |
| 53 + xmlFree(conv); | |
| 54 + return NULL; | |
| 55 +} | |
| 56 + | |
| 57 +static void | |
| 58 +closeIcuConverter(uconv_t *conv) | |
| 59 +{ | |
| 60 + if (conv != NULL) { | |
| 61 + ucnv_close(conv->uconv); | |
| 62 + ucnv_close(conv->utf8); | |
| 63 + xmlFree(conv); | |
| 64 + } | |
| 65 +} | |
| 66 +#endif /* LIBXML_ICU_ENABLED */ | |
| 67 + | |
| 68 /************************************************************************ | |
| 69 * * | |
| 70 * Conversions To/From UTF8 encoding * | |
| 71 @@ -1306,7 +1354,11 @@ xmlNewCharEncodingHandler(const char *name, | |
| 72 #ifdef LIBXML_ICONV_ENABLED | |
| 73 handler->iconv_in = NULL; | |
| 74 handler->iconv_out = NULL; | |
| 75 -#endif /* LIBXML_ICONV_ENABLED */ | |
| 76 +#endif | |
| 77 +#ifdef LIBXML_ICU_ENABLED | |
| 78 + handler->uconv_in = NULL; | |
| 79 + handler->uconv_out = NULL; | |
| 80 +#endif | |
| 81 | |
| 82 /* | |
| 83 * registers and returns the handler. | |
| 84 @@ -1371,7 +1423,7 @@ xmlInitCharEncodingHandlers(void) { | |
| 85 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | |
| 86 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | |
| 87 #endif /* LIBXML_OUTPUT_ENABLED */ | |
| 88 -#ifndef LIBXML_ICONV_ENABLED | |
| 89 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 90 #ifdef LIBXML_ISO8859X_ENABLED | |
| 91 xmlRegisterCharEncodingHandlersISO8859x (); | |
| 92 #endif | |
| 93 @@ -1578,6 +1630,10 @@ xmlFindCharEncodingHandler(const char *name) { | |
| 94 xmlCharEncodingHandlerPtr enc; | |
| 95 iconv_t icv_in, icv_out; | |
| 96 #endif /* LIBXML_ICONV_ENABLED */ | |
| 97 +#ifdef LIBXML_ICU_ENABLED | |
| 98 + xmlCharEncodingHandlerPtr enc; | |
| 99 + uconv_t *ucv_in, *ucv_out; | |
| 100 +#endif /* LIBXML_ICU_ENABLED */ | |
| 101 char upper[100]; | |
| 102 int i; | |
| 103 | |
| 104 @@ -1647,6 +1703,35 @@ xmlFindCharEncodingHandler(const char *name) { | |
| 105 "iconv : problems with filters for '%s'\n", name); | |
| 106 } | |
| 107 #endif /* LIBXML_ICONV_ENABLED */ | |
| 108 +#ifdef LIBXML_ICU_ENABLED | |
| 109 + /* check whether icu can handle this */ | |
| 110 + ucv_in = openIcuConverter(name, 1); | |
| 111 + ucv_out = openIcuConverter(name, 0); | |
| 112 + if (ucv_in != NULL && ucv_out != NULL) { | |
| 113 + enc = (xmlCharEncodingHandlerPtr) | |
| 114 + xmlMalloc(sizeof(xmlCharEncodingHandler)); | |
| 115 + if (enc == NULL) { | |
| 116 + closeIcuConverter(ucv_in); | |
| 117 + closeIcuConverter(ucv_out); | |
| 118 + return(NULL); | |
| 119 + } | |
| 120 + enc->name = xmlMemStrdup(name); | |
| 121 + enc->input = NULL; | |
| 122 + enc->output = NULL; | |
| 123 + enc->uconv_in = ucv_in; | |
| 124 + enc->uconv_out = ucv_out; | |
| 125 +#ifdef DEBUG_ENCODING | |
| 126 + xmlGenericError(xmlGenericErrorContext, | |
| 127 + "Found ICU converter handler for encoding %s\n", name); | |
| 128 +#endif | |
| 129 + return enc; | |
| 130 + } else if (ucv_in != NULL || ucv_out != NULL) { | |
| 131 + closeIcuConverter(ucv_in); | |
| 132 + closeIcuConverter(ucv_out); | |
| 133 + xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | |
| 134 + "ICU converter : problems with filters for '%s'\n", name); | |
| 135 + } | |
| 136 +#endif /* LIBXML_ICU_ENABLED */ | |
| 137 | |
| 138 #ifdef DEBUG_ENCODING | |
| 139 xmlGenericError(xmlGenericErrorContext, | |
| 140 @@ -1737,6 +1822,75 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outl en, | |
| 141 | |
| 142 /************************************************************************ | |
| 143 * * | |
| 144 + * ICU based generic conversion functions * | |
| 145 + * * | |
| 146 + ************************************************************************/ | |
| 147 + | |
| 148 +#ifdef LIBXML_ICU_ENABLED | |
| 149 +/** | |
| 150 + * xmlUconvWrapper: | |
| 151 + * @cd: ICU uconverter data structure | |
| 152 + * @toUnicode : non-zero if toUnicode. 0 otherwise. | |
| 153 + * @out: a pointer to an array of bytes to store the result | |
| 154 + * @outlen: the length of @out | |
| 155 + * @in: a pointer to an array of ISO Latin 1 chars | |
| 156 + * @inlen: the length of @in | |
| 157 + * | |
| 158 + * Returns 0 if success, or | |
| 159 + * -1 by lack of space, or | |
| 160 + * -2 if the transcoding fails (for *in is not valid utf8 string or | |
| 161 + * the result of transformation can't fit into the encoding we want), or | |
| 162 + * -3 if there the last byte can't form a single output char. | |
| 163 + * | |
| 164 + * The value of @inlen after return is the number of octets consumed | |
| 165 + * as the return value is positive, else unpredictable. | |
| 166 + * The value of @outlen after return is the number of ocetes consumed. | |
| 167 + */ | |
| 168 +static int | |
| 169 +xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | |
| 170 + const unsigned char *in, int *inlen) { | |
| 171 + const char *ucv_in = (const char *) in; | |
| 172 + char *ucv_out = (char *) out; | |
| 173 + UErrorCode err = U_ZERO_ERROR; | |
| 174 + | |
| 175 + if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | |
| 176 + if (outlen != NULL) *outlen = 0; | |
| 177 + return(-1); | |
| 178 + } | |
| 179 + | |
| 180 + /* | |
| 181 + * TODO(jungshik) | |
| 182 + * 1. is ucnv_convert(To|From)Algorithmic better? | |
| 183 + * 2. had we better use an explicit pivot buffer? | |
| 184 + * 3. error returned comes from 'fromUnicode' only even | |
| 185 + * when toUnicode is true ! | |
| 186 + */ | |
| 187 + if (toUnicode) { | |
| 188 + /* encoding => UTF-16 => UTF-8 */ | |
| 189 + ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | |
| 190 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 191 + 0, TRUE, &err); | |
| 192 + } else { | |
| 193 + /* UTF-8 => UTF-16 => encoding */ | |
| 194 + ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | |
| 195 + &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 196 + 0, TRUE, &err); | |
| 197 + } | |
| 198 + *inlen = ucv_in - (const char*) in; | |
| 199 + *outlen = ucv_out - (char *) out; | |
| 200 + if (U_SUCCESS(err)) | |
| 201 + return 0; | |
| 202 + if (err == U_BUFFER_OVERFLOW_ERROR) | |
| 203 + return -1; | |
| 204 + if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | |
| 205 + return -2; | |
| 206 + /* if (err == U_TRUNCATED_CHAR_FOUND) */ | |
| 207 + return -3; | |
| 208 +} | |
| 209 +#endif /* LIBXML_ICU_ENABLED */ | |
| 210 + | |
| 211 +/************************************************************************ | |
| 212 + * * | |
| 213 * The real API used by libxml for on-the-fly conversion * | |
| 214 * * | |
| 215 ************************************************************************/ | |
| 216 @@ -1810,6 +1964,16 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, x mlBufferPtr out, | |
| 217 if (ret == -1) ret = -3; | |
| 218 } | |
| 219 #endif /* LIBXML_ICONV_ENABLED */ | |
| 220 +#ifdef LIBXML_ICU_ENABLED | |
| 221 + else if (handler->uconv_in != NULL) { | |
| 222 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 223 + &written, in->content, &toconv); | |
| 224 + xmlBufferShrink(in, toconv); | |
| 225 + out->use += written; | |
| 226 + out->content[out->use] = 0; | |
| 227 + if (ret == -1) ret = -3; | |
| 228 + } | |
| 229 +#endif /* LIBXML_ICU_ENABLED */ | |
| 230 #ifdef DEBUG_ENCODING | |
| 231 switch (ret) { | |
| 232 case 0: | |
| 233 @@ -1915,6 +2079,17 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBuf ferPtr out, | |
| 234 ret = -3; | |
| 235 } | |
| 236 #endif /* LIBXML_ICONV_ENABLED */ | |
| 237 +#ifdef LIBXML_ICU_ENABLED | |
| 238 + else if (handler->uconv_in != NULL) { | |
| 239 + ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 240 + &written, in->content, &toconv); | |
| 241 + xmlBufferShrink(in, toconv); | |
| 242 + out->use += written; | |
| 243 + out->content[out->use] = 0; | |
| 244 + if (ret == -1) | |
| 245 + ret = -3; | |
| 246 + } | |
| 247 +#endif /* LIBXML_ICU_ENABLED */ | |
| 248 switch (ret) { | |
| 249 case 0: | |
| 250 #ifdef DEBUG_ENCODING | |
| 251 @@ -2015,6 +2190,15 @@ retry: | |
| 252 out->content[out->use] = 0; | |
| 253 } | |
| 254 #endif /* LIBXML_ICONV_ENABLED */ | |
| 255 +#ifdef LIBXML_ICU_ENABLED | |
| 256 + else if (handler->uconv_out != NULL) { | |
| 257 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 258 + &out->content[out->use], | |
| 259 + &written, NULL, &toconv); | |
| 260 + out->use += written; | |
| 261 + out->content[out->use] = 0; | |
| 262 + } | |
| 263 +#endif /* LIBXML_ICU_ENABLED */ | |
| 264 #ifdef DEBUG_ENCODING | |
| 265 xmlGenericError(xmlGenericErrorContext, | |
| 266 "initialized encoder\n"); | |
| 267 @@ -2061,6 +2245,26 @@ retry: | |
| 268 } | |
| 269 } | |
| 270 #endif /* LIBXML_ICONV_ENABLED */ | |
| 271 +#ifdef LIBXML_ICU_ENABLED | |
| 272 + else if (handler->uconv_out != NULL) { | |
| 273 + ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 274 + &out->content[out->use], | |
| 275 + &written, in->content, &toconv); | |
| 276 + xmlBufferShrink(in, toconv); | |
| 277 + out->use += written; | |
| 278 + writtentot += written; | |
| 279 + out->content[out->use] = 0; | |
| 280 + if (ret == -1) { | |
| 281 + if (written > 0) { | |
| 282 + /* | |
| 283 + * Can be a limitation of iconv | |
| 284 + */ | |
| 285 + goto retry; | |
| 286 + } | |
| 287 + ret = -3; | |
| 288 + } | |
| 289 + } | |
| 290 +#endif /* LIBXML_ICU_ENABLED */ | |
| 291 else { | |
| 292 xmlEncodingErr(XML_I18N_NO_OUTPUT, | |
| 293 "xmlCharEncOutFunc: no output function !\n", NULL); | |
| 294 @@ -2173,6 +2377,22 @@ xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { | |
| 295 xmlFree(handler); | |
| 296 } | |
| 297 #endif /* LIBXML_ICONV_ENABLED */ | |
| 298 +#ifdef LIBXML_ICU_ENABLED | |
| 299 + if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | |
| 300 + if (handler->name != NULL) | |
| 301 + xmlFree(handler->name); | |
| 302 + handler->name = NULL; | |
| 303 + if (handler->uconv_out != NULL) { | |
| 304 + closeIcuConverter(handler->uconv_out); | |
| 305 + handler->uconv_out = NULL; | |
| 306 + } | |
| 307 + if (handler->uconv_in != NULL) { | |
| 308 + closeIcuConverter(handler->uconv_in); | |
| 309 + handler->uconv_in = NULL; | |
| 310 + } | |
| 311 + xmlFree(handler); | |
| 312 + } | |
| 313 +#endif | |
| 314 #ifdef DEBUG_ENCODING | |
| 315 if (ret) | |
| 316 xmlGenericError(xmlGenericErrorContext, | |
| 317 @@ -2248,6 +2468,22 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
| 318 cur += toconv; | |
| 319 } while (ret == -2); | |
| 320 #endif | |
| 321 +#ifdef LIBXML_ICU_ENABLED | |
| 322 + } else if (handler->uconv_out != NULL) { | |
| 323 + do { | |
| 324 + toconv = in->end - cur; | |
| 325 + written = 32000; | |
| 326 + ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | |
| 327 + &written, cur, &toconv); | |
| 328 + if (ret < 0) { | |
| 329 + if (written > 0) | |
| 330 + ret = -2; | |
| 331 + else | |
| 332 + return(-1); | |
| 333 + } | |
| 334 + unused += written; | |
| 335 + cur += toconv; | |
| 336 + } while (ret == -2); | |
| 337 } else { | |
| 338 /* could not find a converter */ | |
| 339 return(-1); | |
| 340 @@ -2259,8 +2495,9 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) { | |
| 341 } | |
| 342 return(in->consumed + (in->cur - in->base)); | |
| 343 } | |
| 344 +#endif | |
| 345 | |
| 346 -#ifndef LIBXML_ICONV_ENABLED | |
| 347 +#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 348 #ifdef LIBXML_ISO8859X_ENABLED | |
| 349 | |
| 350 /** | |
| 351 diff --git a/third_party/libxml/include/libxml/encoding.h b/third_party/libxml/i nclude/libxml/encoding.h | |
| 352 index c74b25f..b5f8b48 100644 | |
| 353 --- a/third_party/libxml/include/libxml/encoding.h | |
| 354 +++ b/third_party/libxml/include/libxml/encoding.h | |
| 355 @@ -26,6 +26,24 @@ | |
| 356 | |
| 357 #ifdef LIBXML_ICONV_ENABLED | |
| 358 #include <iconv.h> | |
| 359 +#else | |
| 360 +#ifdef LIBXML_ICU_ENABLED | |
| 361 +#include <unicode/ucnv.h> | |
| 362 +#if 0 | |
| 363 +/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | |
| 364 + * to prevent unwanted ICU symbols being exposed to users of libxml2. | |
| 365 + * One particular case is Qt4 conflicting on UChar32. | |
| 366 + */ | |
| 367 +#include <stdint.h> | |
| 368 +struct UConverter; | |
| 369 +typedef struct UConverter UConverter; | |
| 370 +#ifdef _MSC_VER | |
| 371 +typedef wchar_t UChar; | |
| 372 +#else | |
| 373 +typedef uint16_t UChar; | |
| 374 +#endif | |
| 375 +#endif | |
| 376 +#endif | |
| 377 #endif | |
| 378 #ifdef __cplusplus | |
| 379 extern "C" { | |
| 380 @@ -125,6 +143,13 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *ou t, int *outlen, | |
| 381 * Block defining the handlers for non UTF-8 encodings. | |
| 382 * If iconv is supported, there are two extra fields. | |
| 383 */ | |
| 384 +#ifdef LIBXML_ICU_ENABLED | |
| 385 +struct _uconv_t { | |
| 386 + UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
| 387 + UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
| 388 +}; | |
| 389 +typedef struct _uconv_t uconv_t; | |
| 390 +#endif | |
| 391 | |
| 392 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
| 393 typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
| 394 @@ -136,6 +161,10 @@ struct _xmlCharEncodingHandler { | |
| 395 iconv_t iconv_in; | |
| 396 iconv_t iconv_out; | |
| 397 #endif /* LIBXML_ICONV_ENABLED */ | |
| 398 +#ifdef LIBXML_ICU_ENABLED | |
| 399 + uconv_t *uconv_in; | |
| 400 + uconv_t *uconv_out; | |
| 401 +#endif /* LIBXML_ICU_ENABLED */ | |
| 402 }; | |
| 403 | |
| 404 #ifdef __cplusplus | |
| 405 diff --git a/third_party/libxml/include/libxml/parser.h b/third_party/libxml/inc lude/libxml/parser.h | |
| 406 index dd79c42..3580b63 100644 | |
| 407 --- a/third_party/libxml/include/libxml/parser.h | |
| 408 +++ b/third_party/libxml/include/libxml/parser.h | |
| 409 @@ -1222,6 +1222,7 @@ typedef enum { | |
| 410 XML_WITH_DEBUG_MEM = 29, | |
| 411 XML_WITH_DEBUG_RUN = 30, | |
| 412 XML_WITH_ZLIB = 31, | |
| 413 + XML_WITH_ICU = 32, | |
| 414 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | |
| 415 } xmlFeature; | |
| 416 | |
| 417 diff --git a/third_party/libxml/include/libxml/xmlversion.h.in b/third_party/lib xml/include/libxml/xmlversion.h.in | |
| 418 index 4739f3a..de310ab 100644 | |
| 419 --- a/third_party/libxml/include/libxml/xmlversion.h.in | |
| 420 +++ b/third_party/libxml/include/libxml/xmlversion.h.in | |
| 421 @@ -269,6 +269,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); | |
| 422 #endif | |
| 423 | |
| 424 /** | |
| 425 + * LIBXML_ICU_ENABLED: | |
| 426 + * | |
| 427 + * Whether icu support is available | |
| 428 + */ | |
| 429 +#if @WITH_ICU@ | |
| 430 +#define LIBXML_ICU_ENABLED | |
| 431 +#endif | |
| 432 + | |
| 433 +/** | |
| 434 * LIBXML_ISO8859X_ENABLED: | |
| 435 * | |
| 436 * Whether ISO-8859-* support is made available in case iconv is not | |
| 437 diff --git a/third_party/libxml/parser.c b/third_party/libxml/parser.c | |
| 438 index 85e7599..3ba2a06 100644 | |
| 439 --- a/third_party/libxml/parser.c | |
| 440 +++ b/third_party/libxml/parser.c | |
| 441 @@ -954,6 +954,12 @@ xmlHasFeature(xmlFeature feature) | |
| 442 #else | |
| 443 return(0); | |
| 444 #endif | |
| 445 + case XML_WITH_ICU: | |
| 446 +#ifdef LIBXML_ICU_ENABLED | |
| 447 + return(1); | |
| 448 +#else | |
| 449 + return(0); | |
| 450 +#endif | |
| 451 default: | |
| 452 break; | |
| 453 } | |
| 454 diff --git a/third_party/libxml/patches/icu b/third_party/libxml/patches/icu | |
|
jungshik at Google
2010/07/12 18:23:29
I guess you don't want to include this diff in the
| |
| 455 index 324cea3..6c22c3c 100644 | |
| 456 --- a/third_party/libxml/patches/icu | |
| 457 +++ b/third_party/libxml/patches/icu | |
| 458 @@ -1,434 +0,0 @@ | |
| 459 -Code support for ICU. Note that this relies on modifications to the | |
| 460 -build environment (either configure or configure.js on Windows). | |
| 461 - | |
| 462 -Index: libxml/encoding.c | |
| 463 -=================================================================== | |
| 464 ---- libxml.orig/encoding.c 2010-07-09 14:48:28.881863834 -0700 | |
| 465 -+++ libxml/encoding.c 2010-07-09 14:49:23.479741318 -0700 | |
| 466 -@@ -58,7 +58,7 @@ | |
| 467 - static int xmlCharEncodingAliasesNb = 0; | |
| 468 - static int xmlCharEncodingAliasesMax = 0; | |
| 469 - | |
| 470 --#ifdef LIBXML_ICONV_ENABLED | |
| 471 -+#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) | |
| 472 - #if 0 | |
| 473 - #define DEBUG_ENCODING /* Define this to get encoding traces */ | |
| 474 - #endif | |
| 475 -@@ -97,6 +97,54 @@ | |
| 476 - NULL, 0, val, NULL, NULL, 0, 0, msg, val); | |
| 477 - } | |
| 478 - | |
| 479 -+#ifdef LIBXML_ICU_ENABLED | |
| 480 -+static uconv_t* | |
| 481 -+openIcuConverter(const char* name, int toUnicode) | |
| 482 -+{ | |
| 483 -+ UErrorCode status = U_ZERO_ERROR; | |
| 484 -+ uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); | |
| 485 -+ if (conv == NULL) | |
| 486 -+ return NULL; | |
| 487 -+ | |
| 488 -+ conv->uconv = ucnv_open(name, &status); | |
| 489 -+ if (U_FAILURE(status)) | |
| 490 -+ goto error; | |
| 491 -+ | |
| 492 -+ status = U_ZERO_ERROR; | |
| 493 -+ if (toUnicode) { | |
| 494 -+ ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, | |
| 495 -+ NULL, NULL, NULL, &status); | |
| 496 -+ } | |
| 497 -+ else { | |
| 498 -+ ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, | |
| 499 -+ NULL, NULL, NULL, &status); | |
| 500 -+ } | |
| 501 -+ if (U_FAILURE(status)) | |
| 502 -+ goto error; | |
| 503 -+ | |
| 504 -+ status = U_ZERO_ERROR; | |
| 505 -+ conv->utf8 = ucnv_open("UTF-8", &status); | |
| 506 -+ if (U_SUCCESS(status)) | |
| 507 -+ return conv; | |
| 508 -+ | |
| 509 -+error: | |
| 510 -+ if (conv->uconv) | |
| 511 -+ ucnv_close(conv->uconv); | |
| 512 -+ xmlFree(conv); | |
| 513 -+ return NULL; | |
| 514 -+} | |
| 515 -+ | |
| 516 -+static void | |
| 517 -+closeIcuConverter(uconv_t *conv) | |
| 518 -+{ | |
| 519 -+ if (conv != NULL) { | |
| 520 -+ ucnv_close(conv->uconv); | |
| 521 -+ ucnv_close(conv->utf8); | |
| 522 -+ xmlFree(conv); | |
| 523 -+ } | |
| 524 -+} | |
| 525 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 526 -+ | |
| 527 - /************************************************************************ | |
| 528 - * * | |
| 529 - * Conversions To/From UTF8 encoding * | |
| 530 -@@ -1306,7 +1354,11 @@ | |
| 531 - #ifdef LIBXML_ICONV_ENABLED | |
| 532 - handler->iconv_in = NULL; | |
| 533 - handler->iconv_out = NULL; | |
| 534 --#endif /* LIBXML_ICONV_ENABLED */ | |
| 535 -+#endif | |
| 536 -+#ifdef LIBXML_ICU_ENABLED | |
| 537 -+ handler->uconv_in = NULL; | |
| 538 -+ handler->uconv_out = NULL; | |
| 539 -+#endif | |
| 540 - | |
| 541 - /* | |
| 542 - * registers and returns the handler. | |
| 543 -@@ -1371,7 +1423,7 @@ | |
| 544 - xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); | |
| 545 - xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); | |
| 546 - #endif /* LIBXML_OUTPUT_ENABLED */ | |
| 547 --#ifndef LIBXML_ICONV_ENABLED | |
| 548 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 549 - #ifdef LIBXML_ISO8859X_ENABLED | |
| 550 - xmlRegisterCharEncodingHandlersISO8859x (); | |
| 551 - #endif | |
| 552 -@@ -1578,6 +1630,10 @@ | |
| 553 - xmlCharEncodingHandlerPtr enc; | |
| 554 - iconv_t icv_in, icv_out; | |
| 555 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 556 -+#ifdef LIBXML_ICU_ENABLED | |
| 557 -+ xmlCharEncodingHandlerPtr enc; | |
| 558 -+ uconv_t *ucv_in, *ucv_out; | |
| 559 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 560 - char upper[100]; | |
| 561 - int i; | |
| 562 - | |
| 563 -@@ -1647,6 +1703,35 @@ | |
| 564 - "iconv : problems with filters for '%s'\n", name); | |
| 565 - } | |
| 566 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 567 -+#ifdef LIBXML_ICU_ENABLED | |
| 568 -+ /* check whether icu can handle this */ | |
| 569 -+ ucv_in = openIcuConverter(name, 1); | |
| 570 -+ ucv_out = openIcuConverter(name, 0); | |
| 571 -+ if (ucv_in != NULL && ucv_out != NULL) { | |
| 572 -+ enc = (xmlCharEncodingHandlerPtr) | |
| 573 -+ xmlMalloc(sizeof(xmlCharEncodingHandler)); | |
| 574 -+ if (enc == NULL) { | |
| 575 -+ closeIcuConverter(ucv_in); | |
| 576 -+ closeIcuConverter(ucv_out); | |
| 577 -+ return(NULL); | |
| 578 -+ } | |
| 579 -+ enc->name = xmlMemStrdup(name); | |
| 580 -+ enc->input = NULL; | |
| 581 -+ enc->output = NULL; | |
| 582 -+ enc->uconv_in = ucv_in; | |
| 583 -+ enc->uconv_out = ucv_out; | |
| 584 -+#ifdef DEBUG_ENCODING | |
| 585 -+ xmlGenericError(xmlGenericErrorContext, | |
| 586 -+ "Found ICU converter handler for encoding %s\n", name); | |
| 587 -+#endif | |
| 588 -+ return enc; | |
| 589 -+ } else if (ucv_in != NULL || ucv_out != NULL) { | |
| 590 -+ closeIcuConverter(ucv_in); | |
| 591 -+ closeIcuConverter(ucv_out); | |
| 592 -+ xmlEncodingErr(XML_ERR_INTERNAL_ERROR, | |
| 593 -+ "ICU converter : problems with filters for '%s'\n", name); | |
| 594 -+ } | |
| 595 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 596 - | |
| 597 - #ifdef DEBUG_ENCODING | |
| 598 - xmlGenericError(xmlGenericErrorContext, | |
| 599 -@@ -1737,6 +1822,75 @@ | |
| 600 - | |
| 601 - /************************************************************************ | |
| 602 - * * | |
| 603 -+ * ICU based generic conversion functions * | |
| 604 -+ * * | |
| 605 -+ ************************************************************************/ | |
| 606 -+ | |
| 607 -+#ifdef LIBXML_ICU_ENABLED | |
| 608 -+/** | |
| 609 -+ * xmlUconvWrapper: | |
| 610 -+ * @cd: ICU uconverter data structure | |
| 611 -+ * @toUnicode : non-zero if toUnicode. 0 otherwise. | |
| 612 -+ * @out: a pointer to an array of bytes to store the result | |
| 613 -+ * @outlen: the length of @out | |
| 614 -+ * @in: a pointer to an array of ISO Latin 1 chars | |
| 615 -+ * @inlen: the length of @in | |
| 616 -+ * | |
| 617 -+ * Returns 0 if success, or | |
| 618 -+ * -1 by lack of space, or | |
| 619 -+ * -2 if the transcoding fails (for *in is not valid utf8 string or | |
| 620 -+ * the result of transformation can't fit into the encoding we want), o r | |
| 621 -+ * -3 if there the last byte can't form a single output char. | |
| 622 -+ * | |
| 623 -+ * The value of @inlen after return is the number of octets consumed | |
| 624 -+ * as the return value is positive, else unpredictable. | |
| 625 -+ * The value of @outlen after return is the number of ocetes consumed. | |
| 626 -+ */ | |
| 627 -+static int | |
| 628 -+xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, | |
| 629 -+ const unsigned char *in, int *inlen) { | |
| 630 -+ const char *ucv_in = (const char *) in; | |
| 631 -+ char *ucv_out = (char *) out; | |
| 632 -+ UErrorCode err = U_ZERO_ERROR; | |
| 633 -+ | |
| 634 -+ if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { | |
| 635 -+ if (outlen != NULL) *outlen = 0; | |
| 636 -+ return(-1); | |
| 637 -+ } | |
| 638 -+ | |
| 639 -+ /* | |
| 640 -+ * TODO(jungshik) | |
| 641 -+ * 1. is ucnv_convert(To|From)Algorithmic better? | |
| 642 -+ * 2. had we better use an explicit pivot buffer? | |
| 643 -+ * 3. error returned comes from 'fromUnicode' only even | |
| 644 -+ * when toUnicode is true ! | |
| 645 -+ */ | |
| 646 -+ if (toUnicode) { | |
| 647 -+ /* encoding => UTF-16 => UTF-8 */ | |
| 648 -+ ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, | |
| 649 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 650 -+ 0, TRUE, &err); | |
| 651 -+ } else { | |
| 652 -+ /* UTF-8 => UTF-16 => encoding */ | |
| 653 -+ ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, | |
| 654 -+ &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, | |
| 655 -+ 0, TRUE, &err); | |
| 656 -+ } | |
| 657 -+ *inlen = ucv_in - (const char*) in; | |
| 658 -+ *outlen = ucv_out - (char *) out; | |
| 659 -+ if (U_SUCCESS(err)) | |
| 660 -+ return 0; | |
| 661 -+ if (err == U_BUFFER_OVERFLOW_ERROR) | |
| 662 -+ return -1; | |
| 663 -+ if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) | |
| 664 -+ return -2; | |
| 665 -+ /* if (err == U_TRUNCATED_CHAR_FOUND) */ | |
| 666 -+ return -3; | |
| 667 -+} | |
| 668 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 669 -+ | |
| 670 -+/************************************************************************ | |
| 671 -+ * * | |
| 672 - * The real API used by libxml for on-the-fly conversion * | |
| 673 - * * | |
| 674 - ************************************************************************/ | |
| 675 -@@ -1810,6 +1964,16 @@ | |
| 676 - if (ret == -1) ret = -3; | |
| 677 - } | |
| 678 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 679 -+#ifdef LIBXML_ICU_ENABLED | |
| 680 -+ else if (handler->uconv_in != NULL) { | |
| 681 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 682 -+ &written, in->content, &toconv); | |
| 683 -+ xmlBufferShrink(in, toconv); | |
| 684 -+ out->use += written; | |
| 685 -+ out->content[out->use] = 0; | |
| 686 -+ if (ret == -1) ret = -3; | |
| 687 -+ } | |
| 688 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 689 - #ifdef DEBUG_ENCODING | |
| 690 - switch (ret) { | |
| 691 - case 0: | |
| 692 -@@ -1915,6 +2079,17 @@ | |
| 693 - ret = -3; | |
| 694 - } | |
| 695 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 696 -+#ifdef LIBXML_ICU_ENABLED | |
| 697 -+ else if (handler->uconv_in != NULL) { | |
| 698 -+ ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], | |
| 699 -+ &written, in->content, &toconv); | |
| 700 -+ xmlBufferShrink(in, toconv); | |
| 701 -+ out->use += written; | |
| 702 -+ out->content[out->use] = 0; | |
| 703 -+ if (ret == -1) | |
| 704 -+ ret = -3; | |
| 705 -+ } | |
| 706 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 707 - switch (ret) { | |
| 708 - case 0: | |
| 709 - #ifdef DEBUG_ENCODING | |
| 710 -@@ -2015,6 +2190,15 @@ | |
| 711 - out->content[out->use] = 0; | |
| 712 - } | |
| 713 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 714 -+#ifdef LIBXML_ICU_ENABLED | |
| 715 -+ else if (handler->uconv_out != NULL) { | |
| 716 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 717 -+ &out->content[out->use], | |
| 718 -+ &written, NULL, &toconv); | |
| 719 -+ out->use += written; | |
| 720 -+ out->content[out->use] = 0; | |
| 721 -+ } | |
| 722 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 723 - #ifdef DEBUG_ENCODING | |
| 724 - xmlGenericError(xmlGenericErrorContext, | |
| 725 - "initialized encoder\n"); | |
| 726 -@@ -2061,6 +2245,26 @@ | |
| 727 - } | |
| 728 - } | |
| 729 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 730 -+#ifdef LIBXML_ICU_ENABLED | |
| 731 -+ else if (handler->uconv_out != NULL) { | |
| 732 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, | |
| 733 -+ &out->content[out->use], | |
| 734 -+ &written, in->content, &toconv); | |
| 735 -+ xmlBufferShrink(in, toconv); | |
| 736 -+ out->use += written; | |
| 737 -+ writtentot += written; | |
| 738 -+ out->content[out->use] = 0; | |
| 739 -+ if (ret == -1) { | |
| 740 -+ if (written > 0) { | |
| 741 -+ /* | |
| 742 -+ * Can be a limitation of iconv | |
| 743 -+ */ | |
| 744 -+ goto retry; | |
| 745 -+ } | |
| 746 -+ ret = -3; | |
| 747 -+ } | |
| 748 -+ } | |
| 749 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 750 - else { | |
| 751 - xmlEncodingErr(XML_I18N_NO_OUTPUT, | |
| 752 - "xmlCharEncOutFunc: no output function !\n", NULL); | |
| 753 -@@ -2173,6 +2377,22 @@ | |
| 754 - xmlFree(handler); | |
| 755 - } | |
| 756 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 757 -+#ifdef LIBXML_ICU_ENABLED | |
| 758 -+ if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { | |
| 759 -+ if (handler->name != NULL) | |
| 760 -+ xmlFree(handler->name); | |
| 761 -+ handler->name = NULL; | |
| 762 -+ if (handler->uconv_out != NULL) { | |
| 763 -+ closeIcuConverter(handler->uconv_out); | |
| 764 -+ handler->uconv_out = NULL; | |
| 765 -+ } | |
| 766 -+ if (handler->uconv_in != NULL) { | |
| 767 -+ closeIcuConverter(handler->uconv_in); | |
| 768 -+ handler->uconv_in = NULL; | |
| 769 -+ } | |
| 770 -+ xmlFree(handler); | |
| 771 -+ } | |
| 772 -+#endif | |
| 773 - #ifdef DEBUG_ENCODING | |
| 774 - if (ret) | |
| 775 - xmlGenericError(xmlGenericErrorContext, | |
| 776 -@@ -2248,6 +2468,22 @@ | |
| 777 - cur += toconv; | |
| 778 - } while (ret == -2); | |
| 779 - #endif | |
| 780 -+#ifdef LIBXML_ICU_ENABLED | |
| 781 -+ } else if (handler->uconv_out != NULL) { | |
| 782 -+ do { | |
| 783 -+ toconv = in->end - cur; | |
| 784 -+ written = 32000; | |
| 785 -+ ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], | |
| 786 -+ &written, cur, &toconv); | |
| 787 -+ if (ret < 0) { | |
| 788 -+ if (written > 0) | |
| 789 -+ ret = -2; | |
| 790 -+ else | |
| 791 -+ return(-1); | |
| 792 -+ } | |
| 793 -+ unused += written; | |
| 794 -+ cur += toconv; | |
| 795 -+ } while (ret == -2); | |
| 796 - } else { | |
| 797 - /* could not find a converter */ | |
| 798 - return(-1); | |
| 799 -@@ -2259,8 +2495,9 @@ | |
| 800 - } | |
| 801 - return(in->consumed + (in->cur - in->base)); | |
| 802 - } | |
| 803 -+#endif | |
| 804 - | |
| 805 --#ifndef LIBXML_ICONV_ENABLED | |
| 806 -+#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) | |
| 807 - #ifdef LIBXML_ISO8859X_ENABLED | |
| 808 - | |
| 809 - /** | |
| 810 -Index: libxml/include/libxml/encoding.h | |
| 811 -=================================================================== | |
| 812 ---- libxml.orig/include/libxml/encoding.h 2010-07-09 14:50:27.503114118 -0 700 | |
| 813 -+++ libxml/include/libxml/encoding.h 2010-07-09 14:53:27.251611643 -0700 | |
| 814 -@@ -26,6 +26,24 @@ | |
| 815 - | |
| 816 - #ifdef LIBXML_ICONV_ENABLED | |
| 817 - #include <iconv.h> | |
| 818 -+#else | |
| 819 -+#ifdef LIBXML_ICU_ENABLED | |
| 820 -+#include <unicode/ucnv.h> | |
| 821 -+#if 0 | |
| 822 -+/* Forward-declare UConverter here rather than pulling in <unicode/ucnv.h> | |
| 823 -+ * to prevent unwanted ICU symbols being exposed to users of libxml2. | |
| 824 -+ * One particular case is Qt4 conflicting on UChar32. | |
| 825 -+ */ | |
| 826 -+#include <stdint.h> | |
| 827 -+struct UConverter; | |
| 828 -+typedef struct UConverter UConverter; | |
| 829 -+#ifdef _MSC_VER | |
| 830 -+typedef wchar_t UChar; | |
| 831 -+#else | |
| 832 -+typedef uint16_t UChar; | |
| 833 -+#endif | |
| 834 -+#endif | |
| 835 -+#endif | |
| 836 - #endif | |
| 837 - #ifdef __cplusplus | |
| 838 - extern "C" { | |
| 839 -@@ -125,6 +143,13 @@ | |
| 840 - * Block defining the handlers for non UTF-8 encodings. | |
| 841 - * If iconv is supported, there are two extra fields. | |
| 842 - */ | |
| 843 -+#ifdef LIBXML_ICU_ENABLED | |
| 844 -+struct _uconv_t { | |
| 845 -+ UConverter *uconv; /* for conversion between an encoding and UTF-16 */ | |
| 846 -+ UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */ | |
| 847 -+}; | |
| 848 -+typedef struct _uconv_t uconv_t; | |
| 849 -+#endif | |
| 850 - | |
| 851 - typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; | |
| 852 - typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; | |
| 853 -@@ -136,6 +161,10 @@ | |
| 854 - iconv_t iconv_in; | |
| 855 - iconv_t iconv_out; | |
| 856 - #endif /* LIBXML_ICONV_ENABLED */ | |
| 857 -+#ifdef LIBXML_ICU_ENABLED | |
| 858 -+ uconv_t *uconv_in; | |
| 859 -+ uconv_t *uconv_out; | |
| 860 -+#endif /* LIBXML_ICU_ENABLED */ | |
| 861 - }; | |
| 862 - | |
| 863 - #ifdef __cplusplus | |
| 864 -Index: libxml/include/libxml/parser.h | |
| 865 -=================================================================== | |
| 866 ---- libxml.orig/include/libxml/parser.h 2010-07-09 14:51:21.190673740 -0 700 | |
| 867 -+++ libxml/include/libxml/parser.h 2010-07-09 14:53:19.571862214 -0700 | |
| 868 -@@ -1222,6 +1222,7 @@ | |
| 869 - XML_WITH_DEBUG_MEM = 29, | |
| 870 - XML_WITH_DEBUG_RUN = 30, | |
| 871 - XML_WITH_ZLIB = 31, | |
| 872 -+ XML_WITH_ICU = 32, | |
| 873 - XML_WITH_NONE = 99999 /* just to be sure of allocation size */ | |
| 874 - } xmlFeature; | |
| 875 - | |
| 876 -Index: libxml/parser.c | |
| 877 -=================================================================== | |
| 878 ---- libxml.orig/parser.c 2010-07-09 14:52:15.150057108 -0700 | |
| 879 -+++ libxml/parser.c 2010-07-09 14:53:06.190137405 -0700 | |
| 880 -@@ -954,6 +954,12 @@ | |
| 881 - #else | |
| 882 - return(0); | |
| 883 - #endif | |
| 884 -+ case XML_WITH_ICU: | |
| 885 -+#ifdef LIBXML_ICU_ENABLED | |
| 886 -+ return(1); | |
| 887 -+#else | |
| 888 -+ return(0); | |
| 889 -+#endif | |
| 890 - default: | |
| 891 - break; | |
| 892 - } | |
| OLD | NEW |