Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: third_party/libxml/encoding.c

Issue 2951008: Update libxml to 2.7.7. (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML 2 * encoding.c : implements the encoding conversion functions needed for XML
3 * 3 *
4 * Related specs: 4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes. 8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison- 10 * Worldwide Character Encoding -- Version 1.0", Addison-
(...skipping 1448 matching lines...) Expand 10 before | Expand all | Expand 10 after
1459 1459
1460 /** 1460 /**
1461 * xmlRegisterCharEncodingHandler: 1461 * xmlRegisterCharEncodingHandler:
1462 * @handler: the xmlCharEncodingHandlerPtr handler block 1462 * @handler: the xmlCharEncodingHandlerPtr handler block
1463 * 1463 *
1464 * Register the char encoding handler, surprising, isn't it ? 1464 * Register the char encoding handler, surprising, isn't it ?
1465 */ 1465 */
1466 void 1466 void
1467 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1467 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1468 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1468 if (handlers == NULL) xmlInitCharEncodingHandlers();
1469 if (handler == NULL) { 1469 if ((handler == NULL) || (handlers == NULL)) {
1470 xmlEncodingErr(XML_I18N_NO_HANDLER, 1470 xmlEncodingErr(XML_I18N_NO_HANDLER,
1471 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1471 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1472 return; 1472 return;
1473 } 1473 }
1474 1474
1475 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1475 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1476 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1476 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1477 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1477 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1478 "MAX_ENCODING_HANDLERS"); 1478 "MAX_ENCODING_HANDLERS");
1479 return; 1479 return;
(...skipping 23 matching lines...) Expand all
1503 return(NULL); 1503 return(NULL);
1504 case XML_CHAR_ENCODING_UTF16LE: 1504 case XML_CHAR_ENCODING_UTF16LE:
1505 return(xmlUTF16LEHandler); 1505 return(xmlUTF16LEHandler);
1506 case XML_CHAR_ENCODING_UTF16BE: 1506 case XML_CHAR_ENCODING_UTF16BE:
1507 return(xmlUTF16BEHandler); 1507 return(xmlUTF16BEHandler);
1508 case XML_CHAR_ENCODING_EBCDIC: 1508 case XML_CHAR_ENCODING_EBCDIC:
1509 handler = xmlFindCharEncodingHandler("EBCDIC"); 1509 handler = xmlFindCharEncodingHandler("EBCDIC");
1510 if (handler != NULL) return(handler); 1510 if (handler != NULL) return(handler);
1511 handler = xmlFindCharEncodingHandler("ebcdic"); 1511 handler = xmlFindCharEncodingHandler("ebcdic");
1512 if (handler != NULL) return(handler); 1512 if (handler != NULL) return(handler);
1513 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1514 if (handler != NULL) return(handler);
1513 break; 1515 break;
1514 case XML_CHAR_ENCODING_UCS4BE: 1516 case XML_CHAR_ENCODING_UCS4BE:
1515 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1517 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1516 if (handler != NULL) return(handler); 1518 if (handler != NULL) return(handler);
1517 handler = xmlFindCharEncodingHandler("UCS-4"); 1519 handler = xmlFindCharEncodingHandler("UCS-4");
1518 if (handler != NULL) return(handler); 1520 if (handler != NULL) return(handler);
1519 handler = xmlFindCharEncodingHandler("UCS4"); 1521 handler = xmlFindCharEncodingHandler("UCS4");
1520 if (handler != NULL) return(handler); 1522 if (handler != NULL) return(handler);
1521 break; 1523 break;
1522 case XML_CHAR_ENCODING_UCS4LE: 1524 case XML_CHAR_ENCODING_UCS4LE:
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 1651
1650 /* 1652 /*
1651 * Check first for directly registered encoding names 1653 * Check first for directly registered encoding names
1652 */ 1654 */
1653 for (i = 0;i < 99;i++) { 1655 for (i = 0;i < 99;i++) {
1654 upper[i] = toupper(name[i]); 1656 upper[i] = toupper(name[i]);
1655 if (upper[i] == 0) break; 1657 if (upper[i] == 0) break;
1656 } 1658 }
1657 upper[i] = 0; 1659 upper[i] = 0;
1658 1660
1659 for (i = 0;i < nbCharEncodingHandler; i++) 1661 if (handlers != NULL) {
1660 if (!strcmp(upper, handlers[i]->name)) { 1662 for (i = 0;i < nbCharEncodingHandler; i++) {
1663 if (!strcmp(upper, handlers[i]->name)) {
1661 #ifdef DEBUG_ENCODING 1664 #ifdef DEBUG_ENCODING
1662 xmlGenericError(xmlGenericErrorContext, 1665 xmlGenericError(xmlGenericErrorContext,
1663 » » "Found registered handler for encoding %s\n", name); 1666 "Found registered handler for encoding %s\n", name);
1664 #endif 1667 #endif
1665 » return(handlers[i]); 1668 return(handlers[i]);
1666 » } 1669 }
1670 }
1671 }
1667 1672
1668 #ifdef LIBXML_ICONV_ENABLED 1673 #ifdef LIBXML_ICONV_ENABLED
1669 /* check whether iconv can handle this */ 1674 /* check whether iconv can handle this */
1670 icv_in = iconv_open("UTF-8", name); 1675 icv_in = iconv_open("UTF-8", name);
1671 icv_out = iconv_open(name, "UTF-8"); 1676 icv_out = iconv_open(name, "UTF-8");
1672 if (icv_in == (iconv_t) -1) { 1677 if (icv_in == (iconv_t) -1) {
1673 icv_in = iconv_open("UTF-8", upper); 1678 icv_in = iconv_open("UTF-8", upper);
1674 } 1679 }
1675 if (icv_out == (iconv_t) -1) { 1680 if (icv_out == (iconv_t) -1) {
1676 icv_out = iconv_open(upper, "UTF-8"); 1681 icv_out = iconv_open(upper, "UTF-8");
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
1882 /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1887 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1883 return -3; 1888 return -3;
1884 } 1889 }
1885 #endif /* LIBXML_ICU_ENABLED */ 1890 #endif /* LIBXML_ICU_ENABLED */
1886 1891
1887 /************************************************************************ 1892 /************************************************************************
1888 * * 1893 * *
1889 * The real API used by libxml for on-the-fly conversion * 1894 * The real API used by libxml for on-the-fly conversion *
1890 * * 1895 * *
1891 ************************************************************************/ 1896 ************************************************************************/
1897 int
1898 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1899 xmlBufferPtr in, int len);
1892 1900
1893 /** 1901 /**
1894 * xmlCharEncFirstLine: 1902 * xmlCharEncFirstLineInt:
1895 * @handler: char enconding transformation data structure 1903 * @handler: char enconding transformation data structure
1896 * @out: an xmlBuffer for the output. 1904 * @out: an xmlBuffer for the output.
1897 * @in: an xmlBuffer for the input 1905 * @in: an xmlBuffer for the input
1898 * 1906 * @len: number of bytes to convert for the first line, or -1
1907 *
1899 * Front-end for the encoding handler input function, but handle only 1908 * Front-end for the encoding handler input function, but handle only
1900 * the very first line, i.e. limit itself to 45 chars. 1909 * the very first line, i.e. limit itself to 45 chars.
1901 * 1910 *
1902 * Returns the number of byte written if success, or 1911 * Returns the number of byte written if success, or
1903 * -1 general error 1912 * -1 general error
1904 * -2 if the transcoding fails (for *in is not valid utf8 string or 1913 * -2 if the transcoding fails (for *in is not valid utf8 string or
1905 * the result of transformation can't fit into the encoding we want), or 1914 * the result of transformation can't fit into the encoding we want), or
1906 */ 1915 */
1907 int 1916 int
1908 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1917 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1909 xmlBufferPtr in) { 1918 xmlBufferPtr in, int len) {
1910 int ret = -2; 1919 int ret = -2;
1911 int written; 1920 int written;
1912 int toconv; 1921 int toconv;
1913 1922
1914 if (handler == NULL) return(-1); 1923 if (handler == NULL) return(-1);
1915 if (out == NULL) return(-1); 1924 if (out == NULL) return(-1);
1916 if (in == NULL) return(-1); 1925 if (in == NULL) return(-1);
1917 1926
1918 /* calculate space available */ 1927 /* calculate space available */
1919 written = out->size - out->use; 1928 written = out->size - out->use;
1920 toconv = in->use; 1929 toconv = in->use;
1921 /* 1930 /*
1922 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1931 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1923 * 45 chars should be sufficient to reach the end of the encoding 1932 * 45 chars should be sufficient to reach the end of the encoding
1924 * declaration without going too far inside the document content. 1933 * declaration without going too far inside the document content.
1925 * on UTF-16 this means 90bytes, on UCS4 this means 180 1934 * on UTF-16 this means 90bytes, on UCS4 this means 180
1935 * The actual value depending on guessed encoding is passed as @len
1936 * if provided
1926 */ 1937 */
1927 if (toconv > 180) 1938 if (len >= 0) {
1928 » toconv = 180; 1939 if (toconv > len)
1940 toconv = len;
1941 } else {
1942 if (toconv > 180)
1943 toconv = 180;
1944 }
1929 if (toconv * 2 >= written) { 1945 if (toconv * 2 >= written) {
1930 xmlBufferGrow(out, toconv); 1946 xmlBufferGrow(out, toconv);
1931 written = out->size - out->use - 1; 1947 written = out->size - out->use - 1;
1932 } 1948 }
1933 1949
1934 if (handler->input != NULL) { 1950 if (handler->input != NULL) {
1935 ret = handler->input(&out->content[out->use], &written, 1951 ret = handler->input(&out->content[out->use], &written,
1936 in->content, &toconv); 1952 in->content, &toconv);
1937 xmlBufferShrink(in, toconv); 1953 xmlBufferShrink(in, toconv);
1938 out->use += written; 1954 out->use += written;
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1983 #endif /* DEBUG_ENCODING */ 1999 #endif /* DEBUG_ENCODING */
1984 /* 2000 /*
1985 * Ignore when input buffer is not on a boundary 2001 * Ignore when input buffer is not on a boundary
1986 */ 2002 */
1987 if (ret == -3) ret = 0; 2003 if (ret == -3) ret = 0;
1988 if (ret == -1) ret = 0; 2004 if (ret == -1) ret = 0;
1989 return(ret); 2005 return(ret);
1990 } 2006 }
1991 2007
1992 /** 2008 /**
2009 * xmlCharEncFirstLine:
2010 * @handler: char enconding transformation data structure
2011 * @out: an xmlBuffer for the output.
2012 * @in: an xmlBuffer for the input
2013 *
2014 * Front-end for the encoding handler input function, but handle only
2015 * the very first line, i.e. limit itself to 45 chars.
2016 *
2017 * Returns the number of byte written if success, or
2018 * -1 general error
2019 * -2 if the transcoding fails (for *in is not valid utf8 string or
2020 * the result of transformation can't fit into the encoding we want), or
2021 */
2022 int
2023 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2024 xmlBufferPtr in) {
2025 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2026 }
2027
2028 /**
1993 * xmlCharEncInFunc: 2029 * xmlCharEncInFunc:
1994 * @handler: char encoding transformation data structure 2030 * @handler: char encoding transformation data structure
1995 * @out: an xmlBuffer for the output. 2031 * @out: an xmlBuffer for the output.
1996 * @in: an xmlBuffer for the input 2032 * @in: an xmlBuffer for the input
1997 * 2033 *
1998 * Generic front-end for the encoding handler input function 2034 * Generic front-end for the encoding handler input function
1999 * 2035 *
2000 * Returns the number of byte written if success, or 2036 * Returns the number of byte written if success, or
2001 * -1 general error 2037 * -1 general error
2002 * -2 if the transcoding fails (for *in is not valid utf8 string or 2038 * -2 if the transcoding fails (for *in is not valid utf8 string or
2003 * the result of transformation can't fit into the encoding we want), or 2039 * the result of transformation can't fit into the encoding we want), or
2004 */ 2040 */
2005 int 2041 int
2006 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2042 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2007 xmlBufferPtr in) 2043 xmlBufferPtr in)
2008 { 2044 {
2009 int ret = -2; 2045 int ret = -2;
2010 int written; 2046 int written;
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
2176 toconv = in->use; 2212 toconv = in->use;
2177 if (toconv == 0) 2213 if (toconv == 0)
2178 return(0); 2214 return(0);
2179 if (toconv * 4 >= written) { 2215 if (toconv * 4 >= written) {
2180 xmlBufferGrow(out, toconv * 4); 2216 xmlBufferGrow(out, toconv * 4);
2181 written = out->size - out->use - 1; 2217 written = out->size - out->use - 1;
2182 } 2218 }
2183 if (handler->output != NULL) { 2219 if (handler->output != NULL) {
2184 ret = handler->output(&out->content[out->use], &written, 2220 ret = handler->output(&out->content[out->use], &written,
2185 in->content, &toconv); 2221 in->content, &toconv);
2186 » xmlBufferShrink(in, toconv); 2222 » if (written > 0) {
2187 » out->use += written; 2223 » xmlBufferShrink(in, toconv);
2188 » writtentot += written; 2224 » out->use += written;
2225 » writtentot += written;
2226 » }
2189 out->content[out->use] = 0; 2227 out->content[out->use] = 0;
2190 } 2228 }
2191 #ifdef LIBXML_ICONV_ENABLED 2229 #ifdef LIBXML_ICONV_ENABLED
2192 else if (handler->iconv_out != NULL) { 2230 else if (handler->iconv_out != NULL) {
2193 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2231 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2194 &written, in->content, &toconv); 2232 &written, in->content, &toconv);
2195 xmlBufferShrink(in, toconv); 2233 xmlBufferShrink(in, toconv);
2196 out->use += written; 2234 out->use += written;
2197 writtentot += written; 2235 writtentot += written;
2198 out->content[out->use] = 0; 2236 out->content[out->use] = 0;
(...skipping 1325 matching lines...) Expand 10 before | Expand all | Expand 10 after
3524 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14 ); 3562 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14 );
3525 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15 ); 3563 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15 );
3526 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16 ); 3564 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16 );
3527 } 3565 }
3528 3566
3529 #endif 3567 #endif
3530 #endif 3568 #endif
3531 3569
3532 #define bottom_encoding 3570 #define bottom_encoding
3533 #include "elfgcchack.h" 3571 #include "elfgcchack.h"
3572
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698