Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Side by Side Diff: third_party/libxml/src/HTMLparser.c

Issue 1752223002: Roll libxml to 2.9.3 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-cherry-pick fprintf formatting fix. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * HTMLparser.c : an HTML 4.0 non-verifying parser 2 * HTMLparser.c : an HTML 4.0 non-verifying parser
3 * 3 *
4 * See Copyright for the status of this software. 4 * See Copyright for the status of this software.
5 * 5 *
6 * daniel@veillard.com 6 * daniel@veillard.com
7 */ 7 */
8 8
9 #define IN_LIBXML 9 #define IN_LIBXML
10 #include "libxml.h" 10 #include "libxml.h"
(...skipping 2930 matching lines...) Expand 10 before | Expand all | Expand 10 after
2941 */ 2941 */
2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); 2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2943 } else if (ctxt->sax->characters != NULL) { 2943 } else if (ctxt->sax->characters != NULL) {
2944 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2944 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2945 } 2945 }
2946 } 2946 }
2947 } 2947 }
2948 2948
2949 2949
2950 /** 2950 /**
2951 * htmlParseCharData: 2951 * htmlParseCharDataInternal:
2952 * @ctxt: an HTML parser context 2952 * @ctxt: an HTML parser context
2953 * @readahead: optional read ahead character in ascii range
2953 * 2954 *
2954 * parse a CharData section. 2955 * parse a CharData section.
2955 * if we are within a CDATA section ']]>' marks an end of section. 2956 * if we are within a CDATA section ']]>' marks an end of section.
2956 * 2957 *
2957 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 2958 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2958 */ 2959 */
2959 2960
2960 static void 2961 static void
2961 htmlParseCharData(htmlParserCtxtPtr ctxt) { 2962 htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
2962 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5]; 2963 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
2963 int nbchar = 0; 2964 int nbchar = 0;
2964 int cur, l; 2965 int cur, l;
2965 int chunk = 0; 2966 int chunk = 0;
2966 2967
2968 if (readahead)
2969 buf[nbchar++] = readahead;
2970
2967 SHRINK; 2971 SHRINK;
2968 cur = CUR_CHAR(l); 2972 cur = CUR_CHAR(l);
2969 while (((cur != '<') || (ctxt->token == '<')) && 2973 while (((cur != '<') || (ctxt->token == '<')) &&
2970 ((cur != '&') || (ctxt->token == '&')) && 2974 ((cur != '&') || (ctxt->token == '&')) &&
2971 (cur != 0)) { 2975 (cur != 0)) {
2972 if (!(IS_CHAR(cur))) { 2976 if (!(IS_CHAR(cur))) {
2973 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 2977 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2974 "Invalid char in CDATA 0x%X\n", cur); 2978 "Invalid char in CDATA 0x%X\n", cur);
2975 } else { 2979 } else {
2976 COPY_BUF(l,buf,nbchar,cur); 2980 COPY_BUF(l,buf,nbchar,cur);
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
3036 } else { 3040 } else {
3037 /* 3041 /*
3038 * Loop detection 3042 * Loop detection
3039 */ 3043 */
3040 if (cur == 0) 3044 if (cur == 0)
3041 ctxt->instate = XML_PARSER_EOF; 3045 ctxt->instate = XML_PARSER_EOF;
3042 } 3046 }
3043 } 3047 }
3044 3048
3045 /** 3049 /**
3050 * htmlParseCharData:
3051 * @ctxt: an HTML parser context
3052 *
3053 * parse a CharData section.
3054 * if we are within a CDATA section ']]>' marks an end of section.
3055 *
3056 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3057 */
3058
3059 static void
3060 htmlParseCharData(htmlParserCtxtPtr ctxt) {
3061 htmlParseCharDataInternal(ctxt, 0);
3062 }
3063
3064 /**
3046 * htmlParseExternalID: 3065 * htmlParseExternalID:
3047 * @ctxt: an HTML parser context 3066 * @ctxt: an HTML parser context
3048 * @publicID: a xmlChar** receiving PubidLiteral 3067 * @publicID: a xmlChar** receiving PubidLiteral
3049 * 3068 *
3050 * Parse an External ID or a Public ID 3069 * Parse an External ID or a Public ID
3051 * 3070 *
3052 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral 3071 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3053 * | 'PUBLIC' S PubidLiteral S SystemLiteral 3072 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3054 * 3073 *
3055 * [83] PublicID ::= 'PUBLIC' S PubidLiteral 3074 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
3238 state = ctxt->instate; 3257 state = ctxt->instate;
3239 ctxt->instate = XML_PARSER_COMMENT; 3258 ctxt->instate = XML_PARSER_COMMENT;
3240 SHRINK; 3259 SHRINK;
3241 SKIP(4); 3260 SKIP(4);
3242 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); 3261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3243 if (buf == NULL) { 3262 if (buf == NULL) {
3244 htmlErrMemory(ctxt, "buffer allocation failed\n"); 3263 htmlErrMemory(ctxt, "buffer allocation failed\n");
3245 ctxt->instate = state; 3264 ctxt->instate = state;
3246 return; 3265 return;
3247 } 3266 }
3267 len = 0;
3268 buf[len] = 0;
3248 q = CUR_CHAR(ql); 3269 q = CUR_CHAR(ql);
3270 if (!IS_CHAR(q))
3271 goto unfinished;
3249 NEXTL(ql); 3272 NEXTL(ql);
3250 r = CUR_CHAR(rl); 3273 r = CUR_CHAR(rl);
3274 if (!IS_CHAR(r))
3275 goto unfinished;
3251 NEXTL(rl); 3276 NEXTL(rl);
3252 cur = CUR_CHAR(l); 3277 cur = CUR_CHAR(l);
3253 len = 0;
3254 while (IS_CHAR(cur) && 3278 while (IS_CHAR(cur) &&
3255 ((cur != '>') || 3279 ((cur != '>') ||
3256 (r != '-') || (q != '-'))) { 3280 (r != '-') || (q != '-'))) {
3257 if (len + 5 >= size) { 3281 if (len + 5 >= size) {
3258 xmlChar *tmp; 3282 xmlChar *tmp;
3259 3283
3260 size *= 2; 3284 size *= 2;
3261 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); 3285 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3262 if (tmp == NULL) { 3286 if (tmp == NULL) {
3263 xmlFree(buf); 3287 xmlFree(buf);
(...skipping 10 matching lines...) Expand all
3274 rl = l; 3298 rl = l;
3275 NEXTL(l); 3299 NEXTL(l);
3276 cur = CUR_CHAR(l); 3300 cur = CUR_CHAR(l);
3277 if (cur == 0) { 3301 if (cur == 0) {
3278 SHRINK; 3302 SHRINK;
3279 GROW; 3303 GROW;
3280 cur = CUR_CHAR(l); 3304 cur = CUR_CHAR(l);
3281 } 3305 }
3282 } 3306 }
3283 buf[len] = 0; 3307 buf[len] = 0;
3284 if (!IS_CHAR(cur)) { 3308 if (IS_CHAR(cur)) {
3285 » htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3286 » "Comment not terminated \n<!--%.50s\n", buf, NULL);
3287 » xmlFree(buf);
3288 } else {
3289 NEXT; 3309 NEXT;
3290 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && 3310 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3291 (!ctxt->disableSAX)) 3311 (!ctxt->disableSAX))
3292 ctxt->sax->comment(ctxt->userData, buf); 3312 ctxt->sax->comment(ctxt->userData, buf);
3293 xmlFree(buf); 3313 xmlFree(buf);
3314 ctxt->instate = state;
3315 return;
3294 } 3316 }
3295 ctxt->instate = state; 3317
3318 unfinished:
3319 htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3320 » » "Comment not terminated \n<!--%.50s\n", buf, NULL);
3321 xmlFree(buf);
3296 } 3322 }
3297 3323
3298 /** 3324 /**
3299 * htmlParseCharRef: 3325 * htmlParseCharRef:
3300 * @ctxt: an HTML parser context 3326 * @ctxt: an HTML parser context
3301 * 3327 *
3302 * parse Reference declarations 3328 * parse Reference declarations
3303 * 3329 *
3304 * [66] CharRef ::= '&#' [0-9]+ ';' | 3330 * [66] CharRef ::= '&#' [0-9]+ ';' |
3305 * '&#x' [0-9a-fA-F]+ ';' 3331 * '&#x' [0-9a-fA-F]+ ';'
(...skipping 377 matching lines...) Expand 10 before | Expand all | Expand 10 after
3683 3709
3684 atts = ctxt->atts; 3710 atts = ctxt->atts;
3685 maxatts = ctxt->maxatts; 3711 maxatts = ctxt->maxatts;
3686 3712
3687 GROW; 3713 GROW;
3688 name = htmlParseHTMLName(ctxt); 3714 name = htmlParseHTMLName(ctxt);
3689 if (name == NULL) { 3715 if (name == NULL) {
3690 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 3716 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3691 "htmlParseStartTag: invalid element name\n", 3717 "htmlParseStartTag: invalid element name\n",
3692 NULL, NULL); 3718 NULL, NULL);
3719 /* if recover preserve text on classic misconstructs */
3720 if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
3721 (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
3722 htmlParseCharDataInternal(ctxt, '<');
3723 return(-1);
3724 }
3725
3726
3693 /* Dump the bogus tag like browsers do */ 3727 /* Dump the bogus tag like browsers do */
3694 while ((IS_CHAR_CH(CUR)) && (CUR != '>') && 3728 while ((IS_CHAR_CH(CUR)) && (CUR != '>') &&
3695 (ctxt->instate != XML_PARSER_EOF)) 3729 (ctxt->instate != XML_PARSER_EOF))
3696 NEXT; 3730 NEXT;
3697 return -1; 3731 return -1;
3698 } 3732 }
3699 if (xmlStrEqual(name, BAD_CAST"meta")) 3733 if (xmlStrEqual(name, BAD_CAST"meta"))
3700 meta = 1; 3734 meta = 1;
3701 3735
3702 /* 3736 /*
(...skipping 1991 matching lines...) Expand 10 before | Expand all | Expand 10 after
5694 ctxt->checkIndex = 0; 5728 ctxt->checkIndex = 0;
5695 } 5729 }
5696 if ((avail == 1) && (terminate)) { 5730 if ((avail == 1) && (terminate)) {
5697 cur = in->cur[0]; 5731 cur = in->cur[0];
5698 if ((cur != '<') && (cur != '&')) { 5732 if ((cur != '<') && (cur != '&')) {
5699 if (ctxt->sax != NULL) { 5733 if (ctxt->sax != NULL) {
5700 if (IS_BLANK_CH(cur)) { 5734 if (IS_BLANK_CH(cur)) {
5701 if (ctxt->keepBlanks) { 5735 if (ctxt->keepBlanks) {
5702 if (ctxt->sax->characters != NULL) 5736 if (ctxt->sax->characters != NULL)
5703 ctxt->sax->characters( 5737 ctxt->sax->characters(
5704 » » » » » » ctxt->userData, &cur, 1); 5738 » » » » » » ctxt->userData, &in->cur[0], 1);
5705 } else { 5739 } else {
5706 if (ctxt->sax->ignorableWhitespace != NULL) 5740 if (ctxt->sax->ignorableWhitespace != NULL)
5707 ctxt->sax->ignorableWhitespace( 5741 ctxt->sax->ignorableWhitespace(
5708 » » » » » » ctxt->userData, &cur, 1); 5742 » » » » » » ctxt->userData, &in->cur[0], 1);
5709 } 5743 }
5710 } else { 5744 } else {
5711 htmlCheckParagraph(ctxt); 5745 htmlCheckParagraph(ctxt);
5712 if (ctxt->sax->characters != NULL) 5746 if (ctxt->sax->characters != NULL)
5713 ctxt->sax->characters( 5747 ctxt->sax->characters(
5714 » » » » » ctxt->userData, &cur, 1); 5748 » » » » » ctxt->userData, &in->cur[0], 1);
5715 } 5749 }
5716 } 5750 }
5717 ctxt->token = 0; 5751 ctxt->token = 0;
5718 ctxt->checkIndex = 0; 5752 ctxt->checkIndex = 0;
5719 in->cur++; 5753 in->cur++;
5720 break; 5754 break;
5721 } 5755 }
5722 } 5756 }
5723 if (avail < 2) 5757 if (avail < 2)
5724 goto done; 5758 goto done;
(...skipping 1353 matching lines...) Expand 10 before | Expand all | Expand 10 after
7078 xmlFreeParserInputBuffer(input); 7112 xmlFreeParserInputBuffer(input);
7079 return (NULL); 7113 return (NULL);
7080 } 7114 }
7081 inputPush(ctxt, stream); 7115 inputPush(ctxt, stream);
7082 return (htmlDoRead(ctxt, URL, encoding, options, 1)); 7116 return (htmlDoRead(ctxt, URL, encoding, options, 1));
7083 } 7117 }
7084 7118
7085 #define bottom_HTMLparser 7119 #define bottom_HTMLparser
7086 #include "elfgcchack.h" 7120 #include "elfgcchack.h"
7087 #endif /* LIBXML_HTML_ENABLED */ 7121 #endif /* LIBXML_HTML_ENABLED */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698