Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Side by Side Diff: third_party/libxml/src/HTMLparser.c

Issue 2010803004: Roll libxml to bdec2183f34b37ee89ae1d330c6ad2bb4d76605f (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update README.chromium Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/README.chromium ('k') | third_party/libxml/src/SAX2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * HTMLparser.c : an HTML 4.0 non-verifying parser 2 * HTMLparser.c : an HTML 4.0 non-verifying parser
3 * 3 *
4 * See Copyright for the status of this software. 4 * See Copyright for the status of this software.
5 * 5 *
6 * daniel@veillard.com 6 * daniel@veillard.com
7 */ 7 */
8 8
9 #define IN_LIBXML 9 #define IN_LIBXML
10 #include "libxml.h" 10 #include "libxml.h"
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 /** 98 /**
99 * htmlParseErr: 99 * htmlParseErr:
100 * @ctxt: an HTML parser context 100 * @ctxt: an HTML parser context
101 * @error: the error number 101 * @error: the error number
102 * @msg: the error message 102 * @msg: the error message
103 * @str1: string infor 103 * @str1: string infor
104 * @str2: string infor 104 * @str2: string infor
105 * 105 *
106 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 106 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
107 */ 107 */
108 static void 108 static void LIBXML_ATTR_FORMAT(3,0)
109 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, 109 htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
110 const char *msg, const xmlChar *str1, const xmlChar *str2) 110 const char *msg, const xmlChar *str1, const xmlChar *str2)
111 { 111 {
112 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 112 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
113 (ctxt->instate == XML_PARSER_EOF)) 113 (ctxt->instate == XML_PARSER_EOF))
114 return; 114 return;
115 if (ctxt != NULL) 115 if (ctxt != NULL)
116 ctxt->errNo = error; 116 ctxt->errNo = error;
117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, 117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
118 XML_ERR_ERROR, NULL, 0, 118 XML_ERR_ERROR, NULL, 0,
119 (const char *) str1, (const char *) str2, 119 (const char *) str1, (const char *) str2,
120 NULL, 0, 0, 120 NULL, 0, 0,
121 msg, str1, str2); 121 msg, str1, str2);
122 if (ctxt != NULL) 122 if (ctxt != NULL)
123 ctxt->wellFormed = 0; 123 ctxt->wellFormed = 0;
124 } 124 }
125 125
126 /** 126 /**
127 * htmlParseErrInt: 127 * htmlParseErrInt:
128 * @ctxt: an HTML parser context 128 * @ctxt: an HTML parser context
129 * @error: the error number 129 * @error: the error number
130 * @msg: the error message 130 * @msg: the error message
131 * @val: integer info 131 * @val: integer info
132 * 132 *
133 * Handle a fatal parser error, i.e. violating Well-Formedness constraints 133 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
134 */ 134 */
135 static void 135 static void LIBXML_ATTR_FORMAT(3,0)
136 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 136 htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
137 const char *msg, int val) 137 const char *msg, int val)
138 { 138 {
139 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 139 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
140 (ctxt->instate == XML_PARSER_EOF)) 140 (ctxt->instate == XML_PARSER_EOF))
141 return; 141 return;
142 if (ctxt != NULL) 142 if (ctxt != NULL)
143 ctxt->errNo = error; 143 ctxt->errNo = error;
144 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error, 144 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
145 XML_ERR_ERROR, NULL, 0, NULL, NULL, 145 XML_ERR_ERROR, NULL, 0, NULL, NULL,
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 296
297 #define UPPER (toupper(*ctxt->input->cur)) 297 #define UPPER (toupper(*ctxt->input->cur))
298 298
299 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input-> col+=(val) 299 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input-> col+=(val)
300 300
301 #define NXT(val) ctxt->input->cur[(val)] 301 #define NXT(val) ctxt->input->cur[(val)]
302 302
303 #define UPP(val) (toupper(ctxt->input->cur[(val)])) 303 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
304 304
305 #define CUR_PTR ctxt->input->cur 305 #define CUR_PTR ctxt->input->cur
306 #define BASE_PTR ctxt->input->base
306 307
307 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ 308 #define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
308 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ 309 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
309 xmlParserInputShrink(ctxt->input) 310 xmlParserInputShrink(ctxt->input)
310 311
311 #define GROW if ((ctxt->progressive == 0) && \ 312 #define GROW if ((ctxt->progressive == 0) && \
312 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ 313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
313 xmlParserInputGrow(ctxt->input, INPUT_CHUNK) 314 xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
314 315
315 #define CURRENT ((int) (*ctxt->input->cur)) 316 #define CURRENT ((int) (*ctxt->input->cur))
(...skipping 2148 matching lines...) Expand 10 before | Expand all | Expand 10 after
2464 if (((*in >= 0x61) && (*in <= 0x7A)) || 2465 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2465 ((*in >= 0x41) && (*in <= 0x5A)) || 2466 ((*in >= 0x41) && (*in <= 0x5A)) ||
2466 (*in == '_') || (*in == ':')) { 2467 (*in == '_') || (*in == ':')) {
2467 in++; 2468 in++;
2468 while (((*in >= 0x61) && (*in <= 0x7A)) || 2469 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2469 ((*in >= 0x41) && (*in <= 0x5A)) || 2470 ((*in >= 0x41) && (*in <= 0x5A)) ||
2470 ((*in >= 0x30) && (*in <= 0x39)) || 2471 ((*in >= 0x30) && (*in <= 0x39)) ||
2471 (*in == '_') || (*in == '-') || 2472 (*in == '_') || (*in == '-') ||
2472 (*in == ':') || (*in == '.')) 2473 (*in == ':') || (*in == '.'))
2473 in++; 2474 in++;
2475
2476 if (in == ctxt->input->end)
2477 return(NULL);
2478
2474 if ((*in > 0) && (*in < 0x80)) { 2479 if ((*in > 0) && (*in < 0x80)) {
2475 count = in - ctxt->input->cur; 2480 count = in - ctxt->input->cur;
2476 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); 2481 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2477 ctxt->input->cur = in; 2482 ctxt->input->cur = in;
2478 ctxt->nbChars += count; 2483 ctxt->nbChars += count;
2479 ctxt->input->col += count; 2484 ctxt->input->col += count;
2480 return(ret); 2485 return(ret);
2481 } 2486 }
2482 } 2487 }
2483 return(htmlParseNameComplex(ctxt)); 2488 return(htmlParseNameComplex(ctxt));
2484 } 2489 }
2485 2490
2486 static const xmlChar * 2491 static const xmlChar *
2487 htmlParseNameComplex(xmlParserCtxtPtr ctxt) { 2492 htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2488 int len = 0, l; 2493 int len = 0, l;
2489 int c; 2494 int c;
2490 int count = 0; 2495 int count = 0;
2496 const xmlChar *base = ctxt->input->base;
2491 2497
2492 /* 2498 /*
2493 * Handler for more complex cases 2499 * Handler for more complex cases
2494 */ 2500 */
2495 GROW; 2501 GROW;
2496 c = CUR_CHAR(l); 2502 c = CUR_CHAR(l);
2497 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ 2503 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2498 (!IS_LETTER(c) && (c != '_') && 2504 (!IS_LETTER(c) && (c != '_') &&
2499 (c != ':'))) { 2505 (c != ':'))) {
2500 return(NULL); 2506 return(NULL);
2501 } 2507 }
2502 2508
2503 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ 2509 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2504 ((IS_LETTER(c)) || (IS_DIGIT(c)) || 2510 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2505 (c == '.') || (c == '-') || 2511 (c == '.') || (c == '-') ||
2506 (c == '_') || (c == ':') || 2512 (c == '_') || (c == ':') ||
2507 (IS_COMBINING(c)) || 2513 (IS_COMBINING(c)) ||
2508 (IS_EXTENDER(c)))) { 2514 (IS_EXTENDER(c)))) {
2509 if (count++ > 100) { 2515 if (count++ > 100) {
2510 count = 0; 2516 count = 0;
2511 GROW; 2517 GROW;
2512 } 2518 }
2513 len += l; 2519 len += l;
2514 NEXTL(l); 2520 NEXTL(l);
2515 c = CUR_CHAR(l); 2521 c = CUR_CHAR(l);
2522 if (ctxt->input->base != base) {
2523 /*
2524 * We changed encoding from an unknown encoding
2525 * Input buffer changed location, so we better start again
2526 */
2527 return(htmlParseNameComplex(ctxt));
2528 }
2516 } 2529 }
2530
2531 if (ctxt->input->base > ctxt->input->cur - len)
2532 return(NULL);
2533
2517 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); 2534 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2518 } 2535 }
2519 2536
2520 2537
2521 /** 2538 /**
2522 * htmlParseHTMLAttribute: 2539 * htmlParseHTMLAttribute:
2523 * @ctxt: an HTML parser context 2540 * @ctxt: an HTML parser context
2524 * @stop: a char stop value 2541 * @stop: a char stop value
2525 * 2542 *
2526 * parse an HTML attribute value till the stop (quote), if 2543 * parse an HTML attribute value till the stop (quote), if
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
2758 * 2775 *
2759 * parse an HTML Literal 2776 * parse an HTML Literal
2760 * 2777 *
2761 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 2778 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2762 * 2779 *
2763 * Returns the SystemLiteral parsed or NULL 2780 * Returns the SystemLiteral parsed or NULL
2764 */ 2781 */
2765 2782
2766 static xmlChar * 2783 static xmlChar *
2767 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { 2784 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2768 const xmlChar *q; 2785 size_t len = 0, startPosition = 0;
2769 xmlChar *ret = NULL; 2786 xmlChar *ret = NULL;
2770 2787
2771 if (CUR == '"') { 2788 if (CUR == '"') {
2772 NEXT; 2789 NEXT;
2773 » q = CUR_PTR; 2790
2774 » while ((IS_CHAR_CH(CUR)) && (CUR != '"')) 2791 if (CUR_PTR < BASE_PTR)
2792 return(ret);
2793 startPosition = CUR_PTR - BASE_PTR;
2794
2795 » while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {
2775 NEXT; 2796 NEXT;
2797 len++;
2798 }
2776 if (!IS_CHAR_CH(CUR)) { 2799 if (!IS_CHAR_CH(CUR)) {
2777 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 2800 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2778 "Unfinished SystemLiteral\n", NULL, NULL); 2801 "Unfinished SystemLiteral\n", NULL, NULL);
2779 } else { 2802 } else {
2780 » ret = xmlStrndup(q, CUR_PTR - q); 2803 » ret = xmlStrndup((BASE_PTR+startPosition), len);
2781 NEXT; 2804 NEXT;
2782 } 2805 }
2783 } else if (CUR == '\'') { 2806 } else if (CUR == '\'') {
2784 NEXT; 2807 NEXT;
2785 » q = CUR_PTR; 2808
2786 » while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) 2809 if (CUR_PTR < BASE_PTR)
2810 return(ret);
2811 startPosition = CUR_PTR - BASE_PTR;
2812
2813 » while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {
2787 NEXT; 2814 NEXT;
2815 len++;
2816 }
2788 if (!IS_CHAR_CH(CUR)) { 2817 if (!IS_CHAR_CH(CUR)) {
2789 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 2818 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2790 "Unfinished SystemLiteral\n", NULL, NULL); 2819 "Unfinished SystemLiteral\n", NULL, NULL);
2791 } else { 2820 } else {
2792 » ret = xmlStrndup(q, CUR_PTR - q); 2821 » ret = xmlStrndup((BASE_PTR+startPosition), len);
2793 NEXT; 2822 NEXT;
2794 } 2823 }
2795 } else { 2824 } else {
2796 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, 2825 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2797 " or ' expected\n", NULL, NULL); 2826 " or ' expected\n", NULL, NULL);
2798 } 2827 }
2799 2828
2800 return(ret); 2829 return(ret);
2801 } 2830 }
2802 2831
2803 /** 2832 /**
2804 * htmlParsePubidLiteral: 2833 * htmlParsePubidLiteral:
2805 * @ctxt: an HTML parser context 2834 * @ctxt: an HTML parser context
2806 * 2835 *
2807 * parse an HTML public literal 2836 * parse an HTML public literal
2808 * 2837 *
2809 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 2838 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2810 * 2839 *
2811 * Returns the PubidLiteral parsed or NULL. 2840 * Returns the PubidLiteral parsed or NULL.
2812 */ 2841 */
2813 2842
2814 static xmlChar * 2843 static xmlChar *
2815 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { 2844 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2816 const xmlChar *q; 2845 size_t len = 0, startPosition = 0;
2817 xmlChar *ret = NULL; 2846 xmlChar *ret = NULL;
2818 /* 2847 /*
2819 * Name ::= (Letter | '_') (NameChar)* 2848 * Name ::= (Letter | '_') (NameChar)*
2820 */ 2849 */
2821 if (CUR == '"') { 2850 if (CUR == '"') {
2822 NEXT; 2851 NEXT;
2823 » q = CUR_PTR; 2852
2824 » while (IS_PUBIDCHAR_CH(CUR)) NEXT; 2853 if (CUR_PTR < BASE_PTR)
2854 return(ret);
2855 startPosition = CUR_PTR - BASE_PTR;
2856
2857 while (IS_PUBIDCHAR_CH(CUR)) {
2858 len++;
2859 NEXT;
2860 }
2861
2825 if (CUR != '"') { 2862 if (CUR != '"') {
2826 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 2863 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2827 "Unfinished PubidLiteral\n", NULL, NULL); 2864 "Unfinished PubidLiteral\n", NULL, NULL);
2828 } else { 2865 } else {
2829 » ret = xmlStrndup(q, CUR_PTR - q); 2866 » ret = xmlStrndup((BASE_PTR + startPosition), len);
2830 NEXT; 2867 NEXT;
2831 } 2868 }
2832 } else if (CUR == '\'') { 2869 } else if (CUR == '\'') {
2833 NEXT; 2870 NEXT;
2834 » q = CUR_PTR; 2871
2835 » while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')) 2872 if (CUR_PTR < BASE_PTR)
2836 » NEXT; 2873 return(ret);
2874 startPosition = CUR_PTR - BASE_PTR;
2875
2876 while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){
2877 len++;
2878 NEXT;
2879 }
2880
2837 if (CUR != '\'') { 2881 if (CUR != '\'') {
2838 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, 2882 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2839 "Unfinished PubidLiteral\n", NULL, NULL); 2883 "Unfinished PubidLiteral\n", NULL, NULL);
2840 } else { 2884 } else {
2841 » ret = xmlStrndup(q, CUR_PTR - q); 2885 » ret = xmlStrndup((BASE_PTR + startPosition), len);
2842 NEXT; 2886 NEXT;
2843 } 2887 }
2844 } else { 2888 } else {
2845 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, 2889 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2846 "PubidLiteral \" or ' expected\n", NULL, NULL); 2890 "PubidLiteral \" or ' expected\n", NULL, NULL);
2847 } 2891 }
2848 2892
2849 return(ret); 2893 return(ret);
2850 } 2894 }
2851 2895
(...skipping 4260 matching lines...) Expand 10 before | Expand all | Expand 10 after
7112 xmlFreeParserInputBuffer(input); 7156 xmlFreeParserInputBuffer(input);
7113 return (NULL); 7157 return (NULL);
7114 } 7158 }
7115 inputPush(ctxt, stream); 7159 inputPush(ctxt, stream);
7116 return (htmlDoRead(ctxt, URL, encoding, options, 1)); 7160 return (htmlDoRead(ctxt, URL, encoding, options, 1));
7117 } 7161 }
7118 7162
7119 #define bottom_HTMLparser 7163 #define bottom_HTMLparser
7120 #include "elfgcchack.h" 7164 #include "elfgcchack.h"
7121 #endif /* LIBXML_HTML_ENABLED */ 7165 #endif /* LIBXML_HTML_ENABLED */
OLDNEW
« no previous file with comments | « third_party/libxml/README.chromium ('k') | third_party/libxml/src/SAX2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698