third_party/libxml/src/HTMLparser.c - Issue 2010803004: Roll libxml to bdec2183f34b37ee89ae1d330c6ad2bb4d76605f

Side by Side Diff: third_party/libxml/src/HTMLparser.c

Issue 2010803004: Roll libxml to bdec2183f34b37ee89ae1d330c6ad2bb4d76605f (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Update README.chromium Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * HTMLparser.c : an HTML 4.0 non-verifying parser	2 * HTMLparser.c : an HTML 4.0 non-verifying parser

3 *	3 *

4 * See Copyright for the status of this software.	4 * See Copyright for the status of this software.

5 *	5 *

6 * daniel@veillard.com	6 * daniel@veillard.com

7 */	7 */

8	8

9 #define IN_LIBXML	9 #define IN_LIBXML

10 #include "libxml.h"	10 #include "libxml.h"

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
98 /**	98 /**

99 * htmlParseErr:	99 * htmlParseErr:

100 * @ctxt: an HTML parser context	100 * @ctxt: an HTML parser context

101 * @error: the error number	101 * @error: the error number

102 * @msg: the error message	102 * @msg: the error message

103 * @str1: string infor	103 * @str1: string infor

104 * @str2: string infor	104 * @str2: string infor

105 *	105 *

106 * Handle a fatal parser error, i.e. violating Well-Formedness constraints	106 * Handle a fatal parser error, i.e. violating Well-Formedness constraints

107 */	107 */

108 static void	108 static void LIBXML_ATTR_FORMAT(3,0)

109 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,	109 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,

110 const char msg, const xmlChar str1, const xmlChar *str2)	110 const char msg, const xmlChar str1, const xmlChar *str2)

111 {	111 {

112 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&	112 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

113 (ctxt->instate == XML_PARSER_EOF))	113 (ctxt->instate == XML_PARSER_EOF))

114 return;	114 return;

115 if (ctxt != NULL)	115 if (ctxt != NULL)

116 ctxt->errNo = error;	116 ctxt->errNo = error;

117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,	117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,

118 XML_ERR_ERROR, NULL, 0,	118 XML_ERR_ERROR, NULL, 0,

119 (const char ) str1, (const char ) str2,	119 (const char ) str1, (const char ) str2,

120 NULL, 0, 0,	120 NULL, 0, 0,

121 msg, str1, str2);	121 msg, str1, str2);

122 if (ctxt != NULL)	122 if (ctxt != NULL)

123 ctxt->wellFormed = 0;	123 ctxt->wellFormed = 0;

124 }	124 }

125	125

126 /**	126 /**

127 * htmlParseErrInt:	127 * htmlParseErrInt:

128 * @ctxt: an HTML parser context	128 * @ctxt: an HTML parser context

129 * @error: the error number	129 * @error: the error number

130 * @msg: the error message	130 * @msg: the error message

131 * @val: integer info	131 * @val: integer info

132 *	132 *

133 * Handle a fatal parser error, i.e. violating Well-Formedness constraints	133 * Handle a fatal parser error, i.e. violating Well-Formedness constraints

134 */	134 */

135 static void	135 static void LIBXML_ATTR_FORMAT(3,0)

136 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,	136 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,

137 const char *msg, int val)	137 const char *msg, int val)

138 {	138 {

139 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&	139 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&

140 (ctxt->instate == XML_PARSER_EOF))	140 (ctxt->instate == XML_PARSER_EOF))

141 return;	141 return;

142 if (ctxt != NULL)	142 if (ctxt != NULL)

143 ctxt->errNo = error;	143 ctxt->errNo = error;

144 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,	144 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,

145 XML_ERR_ERROR, NULL, 0, NULL, NULL,	145 XML_ERR_ERROR, NULL, 0, NULL, NULL,

(...skipping 150 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296	296

297 #define UPPER (toupper(*ctxt->input->cur))	297 #define UPPER (toupper(*ctxt->input->cur))

298	298

299 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input-> col+=(val)	299 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input-> col+=(val)

300	300

301 #define NXT(val) ctxt->input->cur[(val)]	301 #define NXT(val) ctxt->input->cur[(val)]

302	302

303 #define UPP(val) (toupper(ctxt->input->cur[(val)]))	303 #define UPP(val) (toupper(ctxt->input->cur[(val)]))

304	304

305 #define CUR_PTR ctxt->input->cur	305 #define CUR_PTR ctxt->input->cur

	306 #define BASE_PTR ctxt->input->base

306	307

307 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \	308 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \

308 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \	309 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \

309 xmlParserInputShrink(ctxt->input)	310 xmlParserInputShrink(ctxt->input)

310	311

311 #define GROW if ((ctxt->progressive == 0) && \	312 #define GROW if ((ctxt->progressive == 0) && \

312 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \	313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \

313 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)	314 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)

314	315

315 #define CURRENT ((int) (*ctxt->input->cur))	316 #define CURRENT ((int) (*ctxt->input->cur))

(...skipping 2148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2464 if (((in >= 0x61) && (in <= 0x7A)) \|\|	2465 if (((in >= 0x61) && (in <= 0x7A)) \|\|

2465 ((in >= 0x41) && (in <= 0x5A)) \|\|	2466 ((in >= 0x41) && (in <= 0x5A)) \|\|

2466 (in == '_') \|\| (in == ':')) {	2467 (in == '_') \|\| (in == ':')) {

2467 in++;	2468 in++;

2468 while (((in >= 0x61) && (in <= 0x7A)) \|\|	2469 while (((in >= 0x61) && (in <= 0x7A)) \|\|

2469 ((in >= 0x41) && (in <= 0x5A)) \|\|	2470 ((in >= 0x41) && (in <= 0x5A)) \|\|

2470 ((in >= 0x30) && (in <= 0x39)) \|\|	2471 ((in >= 0x30) && (in <= 0x39)) \|\|

2471 (in == '_') \|\| (in == '-') \|\|	2472 (in == '_') \|\| (in == '-') \|\|

2472 (in == ':') \|\| (in == '.'))	2473 (in == ':') \|\| (in == '.'))

2473 in++;	2474 in++;

	2475

	2476 if (in == ctxt->input->end)

	2477 return(NULL);

	2478

2474 if ((in > 0) && (in < 0x80)) {	2479 if ((in > 0) && (in < 0x80)) {

2475 count = in - ctxt->input->cur;	2480 count = in - ctxt->input->cur;

2476 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);	2481 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);

2477 ctxt->input->cur = in;	2482 ctxt->input->cur = in;

2478 ctxt->nbChars += count;	2483 ctxt->nbChars += count;

2479 ctxt->input->col += count;	2484 ctxt->input->col += count;

2480 return(ret);	2485 return(ret);

2481 }	2486 }

2482 }	2487 }

2483 return(htmlParseNameComplex(ctxt));	2488 return(htmlParseNameComplex(ctxt));

2484 }	2489 }

2485	2490

2486 static const xmlChar *	2491 static const xmlChar *

2487 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {	2492 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {

2488 int len = 0, l;	2493 int len = 0, l;

2489 int c;	2494 int c;

2490 int count = 0;	2495 int count = 0;

	2496 const xmlChar *base = ctxt->input->base;

2491	2497

2492 /*	2498 /*

2493 * Handler for more complex cases	2499 * Handler for more complex cases

2494 */	2500 */

2495 GROW;	2501 GROW;

2496 c = CUR_CHAR(l);	2502 c = CUR_CHAR(l);

2497 if ((c == ' ') \|\| (c == '>') \|\| (c == '/') \|\| /* accelerators */	2503 if ((c == ' ') \|\| (c == '>') \|\| (c == '/') \|\| /* accelerators */

2498 (!IS_LETTER(c) && (c != '_') &&	2504 (!IS_LETTER(c) && (c != '_') &&

2499 (c != ':'))) {	2505 (c != ':'))) {

2500 return(NULL);	2506 return(NULL);

2501 }	2507 }

2502	2508

2503 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */	2509 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */

2504 ((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|	2510 ((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|

2505 (c == '.') \|\| (c == '-') \|\|	2511 (c == '.') \|\| (c == '-') \|\|

2506 (c == '_') \|\| (c == ':') \|\|	2512 (c == '_') \|\| (c == ':') \|\|

2507 (IS_COMBINING(c)) \|\|	2513 (IS_COMBINING(c)) \|\|

2508 (IS_EXTENDER(c)))) {	2514 (IS_EXTENDER(c)))) {

2509 if (count++ > 100) {	2515 if (count++ > 100) {

2510 count = 0;	2516 count = 0;

2511 GROW;	2517 GROW;

2512 }	2518 }

2513 len += l;	2519 len += l;

2514 NEXTL(l);	2520 NEXTL(l);

2515 c = CUR_CHAR(l);	2521 c = CUR_CHAR(l);

	2522 if (ctxt->input->base != base) {

	2523 /*

	2524 * We changed encoding from an unknown encoding

	2525 * Input buffer changed location, so we better start again

	2526 */

	2527 return(htmlParseNameComplex(ctxt));

	2528 }

2516 }	2529 }

	2530

	2531 if (ctxt->input->base > ctxt->input->cur - len)

	2532 return(NULL);

	2533

2517 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));	2534 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));

2518 }	2535 }

2519	2536

2520	2537

2521 /**	2538 /**

2522 * htmlParseHTMLAttribute:	2539 * htmlParseHTMLAttribute:

2523 * @ctxt: an HTML parser context	2540 * @ctxt: an HTML parser context

2524 * @stop: a char stop value	2541 * @stop: a char stop value

2525 *	2542 *

2526 * parse an HTML attribute value till the stop (quote), if	2543 * parse an HTML attribute value till the stop (quote), if

(...skipping 231 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2758 *	2775 *

2759 * parse an HTML Literal	2776 * parse an HTML Literal

2760 *	2777 *

2761 * [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")	2778 * [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")

2762 *	2779 *

2763 * Returns the SystemLiteral parsed or NULL	2780 * Returns the SystemLiteral parsed or NULL

2764 */	2781 */

2765	2782

2766 static xmlChar *	2783 static xmlChar *

2767 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {	2784 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {

2768 const xmlChar *q;	2785 size_t len = 0, startPosition = 0;

2769 xmlChar *ret = NULL;	2786 xmlChar *ret = NULL;

2770	2787

2771 if (CUR == '"') {	2788 if (CUR == '"') {

2772 NEXT;	2789 NEXT;

2773 » q = CUR_PTR;	2790

2774 » while ((IS_CHAR_CH(CUR)) && (CUR != '"'))	2791 if (CUR_PTR < BASE_PTR)

	2792 return(ret);

	2793 startPosition = CUR_PTR - BASE_PTR;

	2794

	2795 » while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {

2775 NEXT;	2796 NEXT;

	2797 len++;

	2798 }

2776 if (!IS_CHAR_CH(CUR)) {	2799 if (!IS_CHAR_CH(CUR)) {

2777 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2800 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2778 "Unfinished SystemLiteral\n", NULL, NULL);	2801 "Unfinished SystemLiteral\n", NULL, NULL);

2779 } else {	2802 } else {

2780 » ret = xmlStrndup(q, CUR_PTR - q);	2803 » ret = xmlStrndup((BASE_PTR+startPosition), len);

2781 NEXT;	2804 NEXT;

2782 }	2805 }

2783 } else if (CUR == '\'') {	2806 } else if (CUR == '\'') {

2784 NEXT;	2807 NEXT;

2785 » q = CUR_PTR;	2808

2786 » while ((IS_CHAR_CH(CUR)) && (CUR != '\''))	2809 if (CUR_PTR < BASE_PTR)

	2810 return(ret);

	2811 startPosition = CUR_PTR - BASE_PTR;

	2812

	2813 » while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {

2787 NEXT;	2814 NEXT;

	2815 len++;

	2816 }

2788 if (!IS_CHAR_CH(CUR)) {	2817 if (!IS_CHAR_CH(CUR)) {

2789 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2818 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2790 "Unfinished SystemLiteral\n", NULL, NULL);	2819 "Unfinished SystemLiteral\n", NULL, NULL);

2791 } else {	2820 } else {

2792 » ret = xmlStrndup(q, CUR_PTR - q);	2821 » ret = xmlStrndup((BASE_PTR+startPosition), len);

2793 NEXT;	2822 NEXT;

2794 }	2823 }

2795 } else {	2824 } else {

2796 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,	2825 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,

2797 " or ' expected\n", NULL, NULL);	2826 " or ' expected\n", NULL, NULL);

2798 }	2827 }

2799	2828

2800 return(ret);	2829 return(ret);

2801 }	2830 }

2802	2831

2803 /**	2832 /**

2804 * htmlParsePubidLiteral:	2833 * htmlParsePubidLiteral:

2805 * @ctxt: an HTML parser context	2834 * @ctxt: an HTML parser context

2806 *	2835 *

2807 * parse an HTML public literal	2836 * parse an HTML public literal

2808 *	2837 *

2809 * [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"	2838 * [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"

2810 *	2839 *

2811 * Returns the PubidLiteral parsed or NULL.	2840 * Returns the PubidLiteral parsed or NULL.

2812 */	2841 */

2813	2842

2814 static xmlChar *	2843 static xmlChar *

2815 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {	2844 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {

2816 const xmlChar *q;	2845 size_t len = 0, startPosition = 0;

2817 xmlChar *ret = NULL;	2846 xmlChar *ret = NULL;

2818 /*	2847 /*

2819 * Name ::= (Letter \| '_') (NameChar)*	2848 * Name ::= (Letter \| '_') (NameChar)*

2820 */	2849 */

2821 if (CUR == '"') {	2850 if (CUR == '"') {

2822 NEXT;	2851 NEXT;

2823 » q = CUR_PTR;	2852

2824 » while (IS_PUBIDCHAR_CH(CUR)) NEXT;	2853 if (CUR_PTR < BASE_PTR)

	2854 return(ret);

	2855 startPosition = CUR_PTR - BASE_PTR;

	2856

	2857 while (IS_PUBIDCHAR_CH(CUR)) {

	2858 len++;

	2859 NEXT;

	2860 }

	2861

2825 if (CUR != '"') {	2862 if (CUR != '"') {

2826 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2863 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2827 "Unfinished PubidLiteral\n", NULL, NULL);	2864 "Unfinished PubidLiteral\n", NULL, NULL);

2828 } else {	2865 } else {

2829 » ret = xmlStrndup(q, CUR_PTR - q);	2866 » ret = xmlStrndup((BASE_PTR + startPosition), len);

2830 NEXT;	2867 NEXT;

2831 }	2868 }

2832 } else if (CUR == '\'') {	2869 } else if (CUR == '\'') {

2833 NEXT;	2870 NEXT;

2834 » q = CUR_PTR;	2871

2835 » while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\''))	2872 if (CUR_PTR < BASE_PTR)

2836 » NEXT;	2873 return(ret);

	2874 startPosition = CUR_PTR - BASE_PTR;

	2875

	2876 while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){

	2877 len++;

	2878 NEXT;

	2879 }

	2880

2837 if (CUR != '\'') {	2881 if (CUR != '\'') {

2838 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,	2882 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,

2839 "Unfinished PubidLiteral\n", NULL, NULL);	2883 "Unfinished PubidLiteral\n", NULL, NULL);

2840 } else {	2884 } else {

2841 » ret = xmlStrndup(q, CUR_PTR - q);	2885 » ret = xmlStrndup((BASE_PTR + startPosition), len);

2842 NEXT;	2886 NEXT;

2843 }	2887 }

2844 } else {	2888 } else {

2845 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,	2889 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,

2846 "PubidLiteral \" or ' expected\n", NULL, NULL);	2890 "PubidLiteral \" or ' expected\n", NULL, NULL);

2847 }	2891 }

2848	2892

2849 return(ret);	2893 return(ret);

2850 }	2894 }

2851	2895

(...skipping 4260 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7112 xmlFreeParserInputBuffer(input);	7156 xmlFreeParserInputBuffer(input);

7113 return (NULL);	7157 return (NULL);

7114 }	7158 }

7115 inputPush(ctxt, stream);	7159 inputPush(ctxt, stream);

7116 return (htmlDoRead(ctxt, URL, encoding, options, 1));	7160 return (htmlDoRead(ctxt, URL, encoding, options, 1));

7117 }	7161 }

7118	7162

7119 #define bottom_HTMLparser	7163 #define bottom_HTMLparser

7120 #include "elfgcchack.h"	7164 #include "elfgcchack.h"

7121 #endif /* LIBXML_HTML_ENABLED */	7165 #endif /* LIBXML_HTML_ENABLED */

OLD	NEW

« no previous file with comments | « third_party/libxml/README.chromium ('k') | third_party/libxml/src/SAX2.c » ('j') | no next file with comments »