third_party/libxml/src/HTMLparser.c - Issue 1752223002: Roll libxml to 2.9.3

Side by Side Diff: third_party/libxml/src/HTMLparser.c

Issue 1752223002: Roll libxml to 2.9.3 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Re-cherry-pick fprintf formatting fix. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * HTMLparser.c : an HTML 4.0 non-verifying parser	2 * HTMLparser.c : an HTML 4.0 non-verifying parser

3 *	3 *

4 * See Copyright for the status of this software.	4 * See Copyright for the status of this software.

5 *	5 *

6 * daniel@veillard.com	6 * daniel@veillard.com

7 */	7 */

8	8

9 #define IN_LIBXML	9 #define IN_LIBXML

10 #include "libxml.h"	10 #include "libxml.h"

(...skipping 2930 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2941 */	2941 */

2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);	2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);

2943 } else if (ctxt->sax->characters != NULL) {	2943 } else if (ctxt->sax->characters != NULL) {

2944 ctxt->sax->characters(ctxt->userData, buf, nbchar);	2944 ctxt->sax->characters(ctxt->userData, buf, nbchar);

2945 }	2945 }

2946 }	2946 }

2947 }	2947 }

2948	2948

2949	2949

2950 /**	2950 /**

2951 * htmlParseCharData:	2951 * htmlParseCharDataInternal:

2952 * @ctxt: an HTML parser context	2952 * @ctxt: an HTML parser context

	2953 * @readahead: optional read ahead character in ascii range

2953 *	2954 *

2954 * parse a CharData section.	2955 * parse a CharData section.

2955 * if we are within a CDATA section ']]>' marks an end of section.	2956 * if we are within a CDATA section ']]>' marks an end of section.

2956 *	2957 *

2957 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)	2958 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)

2958 */	2959 */

2959	2960

2960 static void	2961 static void

2961 htmlParseCharData(htmlParserCtxtPtr ctxt) {	2962 htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {

2962 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];	2963 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];

2963 int nbchar = 0;	2964 int nbchar = 0;

2964 int cur, l;	2965 int cur, l;

2965 int chunk = 0;	2966 int chunk = 0;

2966	2967

	2968 if (readahead)

	2969 buf[nbchar++] = readahead;

	2970

2967 SHRINK;	2971 SHRINK;

2968 cur = CUR_CHAR(l);	2972 cur = CUR_CHAR(l);

2969 while (((cur != '<') \|\| (ctxt->token == '<')) &&	2973 while (((cur != '<') \|\| (ctxt->token == '<')) &&

2970 ((cur != '&') \|\| (ctxt->token == '&')) &&	2974 ((cur != '&') \|\| (ctxt->token == '&')) &&

2971 (cur != 0)) {	2975 (cur != 0)) {

2972 if (!(IS_CHAR(cur))) {	2976 if (!(IS_CHAR(cur))) {

2973 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,	2977 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,

2974 "Invalid char in CDATA 0x%X\n", cur);	2978 "Invalid char in CDATA 0x%X\n", cur);

2975 } else {	2979 } else {

2976 COPY_BUF(l,buf,nbchar,cur);	2980 COPY_BUF(l,buf,nbchar,cur);

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3036 } else {	3040 } else {

3037 /*	3041 /*

3038 * Loop detection	3042 * Loop detection

3039 */	3043 */

3040 if (cur == 0)	3044 if (cur == 0)

3041 ctxt->instate = XML_PARSER_EOF;	3045 ctxt->instate = XML_PARSER_EOF;

3042 }	3046 }

3043 }	3047 }

3044	3048

3045 /**	3049 /**

	3050 * htmlParseCharData:

	3051 * @ctxt: an HTML parser context

	3052 *

	3053 * parse a CharData section.

	3054 * if we are within a CDATA section ']]>' marks an end of section.

	3055 *

	3056 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)

	3057 */

	3058

	3059 static void

	3060 htmlParseCharData(htmlParserCtxtPtr ctxt) {

	3061 htmlParseCharDataInternal(ctxt, 0);

	3062 }

	3063

	3064 /**

3046 * htmlParseExternalID:	3065 * htmlParseExternalID:

3047 * @ctxt: an HTML parser context	3066 * @ctxt: an HTML parser context

3048 * @publicID: a xmlChar** receiving PubidLiteral	3067 * @publicID: a xmlChar** receiving PubidLiteral

3049 *	3068 *

3050 * Parse an External ID or a Public ID	3069 * Parse an External ID or a Public ID

3051 *	3070 *

3052 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral	3071 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral

3053 * \| 'PUBLIC' S PubidLiteral S SystemLiteral	3072 * \| 'PUBLIC' S PubidLiteral S SystemLiteral

3054 *	3073 *

3055 * [83] PublicID ::= 'PUBLIC' S PubidLiteral	3074 * [83] PublicID ::= 'PUBLIC' S PubidLiteral

(...skipping 182 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3238 state = ctxt->instate;	3257 state = ctxt->instate;

3239 ctxt->instate = XML_PARSER_COMMENT;	3258 ctxt->instate = XML_PARSER_COMMENT;

3240 SHRINK;	3259 SHRINK;

3241 SKIP(4);	3260 SKIP(4);

3242 buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));	3261 buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));

3243 if (buf == NULL) {	3262 if (buf == NULL) {

3244 htmlErrMemory(ctxt, "buffer allocation failed\n");	3263 htmlErrMemory(ctxt, "buffer allocation failed\n");

3245 ctxt->instate = state;	3264 ctxt->instate = state;

3246 return;	3265 return;

3247 }	3266 }

	3267 len = 0;

	3268 buf[len] = 0;

3248 q = CUR_CHAR(ql);	3269 q = CUR_CHAR(ql);

	3270 if (!IS_CHAR(q))

	3271 goto unfinished;

3249 NEXTL(ql);	3272 NEXTL(ql);

3250 r = CUR_CHAR(rl);	3273 r = CUR_CHAR(rl);

	3274 if (!IS_CHAR(r))

	3275 goto unfinished;

3251 NEXTL(rl);	3276 NEXTL(rl);

3252 cur = CUR_CHAR(l);	3277 cur = CUR_CHAR(l);

3253 len = 0;

3254 while (IS_CHAR(cur) &&	3278 while (IS_CHAR(cur) &&

3255 ((cur != '>') \|\|	3279 ((cur != '>') \|\|

3256 (r != '-') \|\| (q != '-'))) {	3280 (r != '-') \|\| (q != '-'))) {

3257 if (len + 5 >= size) {	3281 if (len + 5 >= size) {

3258 xmlChar *tmp;	3282 xmlChar *tmp;

3259	3283

3260 size *= 2;	3284 size *= 2;

3261 tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));	3285 tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));

3262 if (tmp == NULL) {	3286 if (tmp == NULL) {

3263 xmlFree(buf);	3287 xmlFree(buf);

(...skipping 10 matching lines...) Expand all Loading...
3274 rl = l;	3298 rl = l;

3275 NEXTL(l);	3299 NEXTL(l);

3276 cur = CUR_CHAR(l);	3300 cur = CUR_CHAR(l);

3277 if (cur == 0) {	3301 if (cur == 0) {

3278 SHRINK;	3302 SHRINK;

3279 GROW;	3303 GROW;

3280 cur = CUR_CHAR(l);	3304 cur = CUR_CHAR(l);

3281 }	3305 }

3282 }	3306 }

3283 buf[len] = 0;	3307 buf[len] = 0;

3284 if (!IS_CHAR(cur)) {	3308 if (IS_CHAR(cur)) {

3285 » htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,

3286 » "Comment not terminated \n<!--%.50s\n", buf, NULL);

3287 » xmlFree(buf);

3288 } else {

3289 NEXT;	3309 NEXT;

3290 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&	3310 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&

3291 (!ctxt->disableSAX))	3311 (!ctxt->disableSAX))

3292 ctxt->sax->comment(ctxt->userData, buf);	3312 ctxt->sax->comment(ctxt->userData, buf);

3293 xmlFree(buf);	3313 xmlFree(buf);

	3314 ctxt->instate = state;

	3315 return;

3294 }	3316 }

3295 ctxt->instate = state;	3317

	3318 unfinished:

	3319 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,

	3320 » » "Comment not terminated \n<!--%.50s\n", buf, NULL);

	3321 xmlFree(buf);

3296 }	3322 }

3297	3323

3298 /**	3324 /**

3299 * htmlParseCharRef:	3325 * htmlParseCharRef:

3300 * @ctxt: an HTML parser context	3326 * @ctxt: an HTML parser context

3301 *	3327 *

3302 * parse Reference declarations	3328 * parse Reference declarations

3303 *	3329 *

3304 * [66] CharRef ::= '&#' [0-9]+ ';' \|	3330 * [66] CharRef ::= '&#' [0-9]+ ';' \|

3305 * '&#x' [0-9a-fA-F]+ ';'	3331 * '&#x' [0-9a-fA-F]+ ';'

(...skipping 377 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3683	3709

3684 atts = ctxt->atts;	3710 atts = ctxt->atts;

3685 maxatts = ctxt->maxatts;	3711 maxatts = ctxt->maxatts;

3686	3712

3687 GROW;	3713 GROW;

3688 name = htmlParseHTMLName(ctxt);	3714 name = htmlParseHTMLName(ctxt);

3689 if (name == NULL) {	3715 if (name == NULL) {

3690 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,	3716 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,

3691 "htmlParseStartTag: invalid element name\n",	3717 "htmlParseStartTag: invalid element name\n",

3692 NULL, NULL);	3718 NULL, NULL);

	3719 /* if recover preserve text on classic misconstructs */

	3720 if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) \|\| (CUR == '<') \|\|

	3721 (CUR == '=') \|\| (CUR == '>') \|\| (((CUR >= '0') && (CUR <= '9'))))) {

	3722 htmlParseCharDataInternal(ctxt, '<');

	3723 return(-1);

	3724 }

	3725

	3726

3693 /* Dump the bogus tag like browsers do */	3727 /* Dump the bogus tag like browsers do */

3694 while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&	3728 while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&

3695 (ctxt->instate != XML_PARSER_EOF))	3729 (ctxt->instate != XML_PARSER_EOF))

3696 NEXT;	3730 NEXT;

3697 return -1;	3731 return -1;

3698 }	3732 }

3699 if (xmlStrEqual(name, BAD_CAST"meta"))	3733 if (xmlStrEqual(name, BAD_CAST"meta"))

3700 meta = 1;	3734 meta = 1;

3701	3735

3702 /*	3736 /*

(...skipping 1991 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5694 ctxt->checkIndex = 0;	5728 ctxt->checkIndex = 0;

5695 }	5729 }

5696 if ((avail == 1) && (terminate)) {	5730 if ((avail == 1) && (terminate)) {

5697 cur = in->cur[0];	5731 cur = in->cur[0];

5698 if ((cur != '<') && (cur != '&')) {	5732 if ((cur != '<') && (cur != '&')) {

5699 if (ctxt->sax != NULL) {	5733 if (ctxt->sax != NULL) {

5700 if (IS_BLANK_CH(cur)) {	5734 if (IS_BLANK_CH(cur)) {

5701 if (ctxt->keepBlanks) {	5735 if (ctxt->keepBlanks) {

5702 if (ctxt->sax->characters != NULL)	5736 if (ctxt->sax->characters != NULL)

5703 ctxt->sax->characters(	5737 ctxt->sax->characters(

5704 » » » » » » ctxt->userData, &cur, 1);	5738 » » » » » » ctxt->userData, &in->cur[0], 1);

5705 } else {	5739 } else {

5706 if (ctxt->sax->ignorableWhitespace != NULL)	5740 if (ctxt->sax->ignorableWhitespace != NULL)

5707 ctxt->sax->ignorableWhitespace(	5741 ctxt->sax->ignorableWhitespace(

5708 » » » » » » ctxt->userData, &cur, 1);	5742 » » » » » » ctxt->userData, &in->cur[0], 1);

5709 }	5743 }

5710 } else {	5744 } else {

5711 htmlCheckParagraph(ctxt);	5745 htmlCheckParagraph(ctxt);

5712 if (ctxt->sax->characters != NULL)	5746 if (ctxt->sax->characters != NULL)

5713 ctxt->sax->characters(	5747 ctxt->sax->characters(

5714 » » » » » ctxt->userData, &cur, 1);	5748 » » » » » ctxt->userData, &in->cur[0], 1);

5715 }	5749 }

5716 }	5750 }

5717 ctxt->token = 0;	5751 ctxt->token = 0;

5718 ctxt->checkIndex = 0;	5752 ctxt->checkIndex = 0;

5719 in->cur++;	5753 in->cur++;

5720 break;	5754 break;

5721 }	5755 }

5722 }	5756 }

5723 if (avail < 2)	5757 if (avail < 2)

5724 goto done;	5758 goto done;

(...skipping 1353 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7078 xmlFreeParserInputBuffer(input);	7112 xmlFreeParserInputBuffer(input);

7079 return (NULL);	7113 return (NULL);

7080 }	7114 }

7081 inputPush(ctxt, stream);	7115 inputPush(ctxt, stream);

7082 return (htmlDoRead(ctxt, URL, encoding, options, 1));	7116 return (htmlDoRead(ctxt, URL, encoding, options, 1));

7083 }	7117 }

7084	7118

7085 #define bottom_HTMLparser	7119 #define bottom_HTMLparser

7086 #include "elfgcchack.h"	7120 #include "elfgcchack.h"

7087 #endif /* LIBXML_HTML_ENABLED */	7121 #endif /* LIBXML_HTML_ENABLED */

OLD	NEW

« no previous file with comments | « third_party/libxml/src/COPYING ('k') | third_party/libxml/src/HTMLtree.c » ('j') | third_party/libxml/win32/config.h » ('J')