Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(300)

Side by Side Diff: third_party/libxml/src/HTMLparser.c

Issue 1193533007: Upgrade to libxml 2.9.2 and libxslt 1.1.28 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: no iconv Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/libxml/src/DOCBparser.c ('k') | third_party/libxml/src/HTMLtree.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * HTMLparser.c : an HTML 4.0 non-verifying parser 2 * HTMLparser.c : an HTML 4.0 non-verifying parser
3 * 3 *
4 * See Copyright for the status of this software. 4 * See Copyright for the status of this software.
5 * 5 *
6 * daniel@veillard.com 6 * daniel@veillard.com
7 */ 7 */
8 8
9 #define IN_LIBXML 9 #define IN_LIBXML
10 #include "libxml.h" 10 #include "libxml.h"
(...skipping 26 matching lines...) Expand all
37 #include <libxml/xmlerror.h> 37 #include <libxml/xmlerror.h>
38 #include <libxml/HTMLparser.h> 38 #include <libxml/HTMLparser.h>
39 #include <libxml/HTMLtree.h> 39 #include <libxml/HTMLtree.h>
40 #include <libxml/entities.h> 40 #include <libxml/entities.h>
41 #include <libxml/encoding.h> 41 #include <libxml/encoding.h>
42 #include <libxml/valid.h> 42 #include <libxml/valid.h>
43 #include <libxml/xmlIO.h> 43 #include <libxml/xmlIO.h>
44 #include <libxml/globals.h> 44 #include <libxml/globals.h>
45 #include <libxml/uri.h> 45 #include <libxml/uri.h>
46 46
47 #include "buf.h"
48 #include "enc.h"
49
47 #define HTML_MAX_NAMELEN 1000 50 #define HTML_MAX_NAMELEN 1000
48 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 51 #define HTML_PARSER_BIG_BUFFER_SIZE 1000
49 #define HTML_PARSER_BUFFER_SIZE 100 52 #define HTML_PARSER_BUFFER_SIZE 100
50 53
51 /* #define DEBUG */ 54 /* #define DEBUG */
52 /* #define DEBUG_PUSH */ 55 /* #define DEBUG_PUSH */
53 56
54 static int htmlOmittedDefaultValue = 1; 57 static int htmlOmittedDefaultValue = 1;
55 58
56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, 59 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
(...skipping 663 matching lines...) Expand 10 before | Expand all | Expand 10 after
720 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep t", NULL } ; 723 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep t", NULL } ;
721 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; 724 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
722 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ; 725 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
723 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; 726 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
724 static const char* const align_attr[] = { "align", NULL } ; 727 static const char* const align_attr[] = { "align", NULL } ;
725 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ; 728 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
726 static const char* const map_contents[] = { BLOCK, "area", NULL } ; 729 static const char* const map_contents[] = { BLOCK, "area", NULL } ;
727 static const char* const name_attr[] = { "name", NULL } ; 730 static const char* const name_attr[] = { "name", NULL } ;
728 static const char* const action_attr[] = { "action", NULL } ; 731 static const char* const action_attr[] = { "action", NULL } ;
729 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; 732 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
730 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ; 733 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
731 static const char* const content_attr[] = { "content", NULL } ; 734 static const char* const content_attr[] = { "content", NULL } ;
732 static const char* const type_attr[] = { "type", NULL } ; 735 static const char* const type_attr[] = { "type", NULL } ;
733 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; 736 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
734 static const char* const object_contents[] = { FLOW, "param", NULL } ; 737 static const char* const object_contents[] = { FLOW, "param", NULL } ;
735 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema p", "name", "tabindex", NULL } ; 738 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema p", "name", "tabindex", NULL } ;
736 static const char* const object_depr[] = { "align", "border", "hspace", "vspace" , NULL } ; 739 static const char* const object_depr[] = { "align", "border", "hspace", "vspace" , NULL } ;
737 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; 740 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
738 static const char* const option_elt[] = { "option", NULL } ; 741 static const char* const option_elt[] = { "option", NULL } ;
739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; 742 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte d", "value", NULL } ; 743 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte d", "value", NULL } ;
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", 1076 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1074 "head", "dd", NULL, 1077 "head", "dd", NULL,
1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", 1078 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1076 "head", "dt", NULL, 1079 "head", "dt", NULL,
1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", 1080 "ul", "p", "head", "ol", "menu", "dir", "address", "pre",
1078 "listing", "xmp", NULL, 1081 "listing", "xmp", NULL,
1079 "ol", "p", "head", "ul", NULL, 1082 "ol", "p", "head", "ul", NULL,
1080 "menu", "p", "head", "ul", NULL, 1083 "menu", "p", "head", "ul", NULL,
1081 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL , 1084 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL ,
1082 "div", "p", "head", NULL, 1085 "div", "p", "head", NULL,
1083 "noscript",» "p", "head", NULL, 1086 "noscript",» "p", NULL,
1084 "center", "font", "b", "i", "p", "head", NULL, 1087 "center", "font", "b", "i", "p", "head", NULL,
1085 "a",» » "a", NULL, 1088 "a",» » "a", "head", NULL,
1086 "caption", "p", NULL, 1089 "caption", "p", NULL,
1087 "colgroup", "caption", "colgroup", "col", "p", NULL, 1090 "colgroup", "caption", "colgroup", "col", "p", NULL,
1088 "col", "caption", "col", "p", NULL, 1091 "col", "caption", "col", "p", NULL,
1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", 1092 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
1090 "listing", "xmp", "a", NULL, 1093 "listing", "xmp", "a", NULL,
1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, 1094 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
1092 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, 1095 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,
1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, 1096 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
1094 "thead", "caption", "col", "colgroup", NULL, 1097 "thead", "caption", "col", "colgroup", NULL,
1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", 1098 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1096 "tbody", "p", NULL, 1099 "tbody", "p", NULL,
1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", 1100 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1098 "tfoot", "tbody", "p", NULL, 1101 "tfoot", "tbody", "p", NULL,
1099 "optgroup", "option", NULL, 1102 "optgroup", "option", NULL,
1100 "option", "option", NULL, 1103 "option", "option", NULL,
1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", 1104 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
1102 "pre", "listing", "xmp", "a", NULL, 1105 "pre", "listing", "xmp", "a", NULL,
1106 /* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */
1107 "tt", "head", NULL,
1108 "i", "head", NULL,
1109 "b", "head", NULL,
1110 "u", "head", NULL,
1111 "s", "head", NULL,
1112 "strike", "head", NULL,
1113 "big", "head", NULL,
1114 "small", "head", NULL,
1115
1116 "em", "head", NULL,
1117 "strong", "head", NULL,
1118 "dfn", "head", NULL,
1119 "code", "head", NULL,
1120 "samp", "head", NULL,
1121 "kbd", "head", NULL,
1122 "var", "head", NULL,
1123 "cite", "head", NULL,
1124 "abbr", "head", NULL,
1125 "acronym", "head", NULL,
1126
1127 /* "a" */
1128 "img", "head", NULL,
1129 /* "applet" */
1130 /* "embed" */
1131 /* "object" */
1132 "font", "head", NULL,
1133 /* "basefont" */
1134 "br", "head", NULL,
1135 /* "script" */
1136 "map", "head", NULL,
1137 "q", "head", NULL,
1138 "sub", "head", NULL,
1139 "sup", "head", NULL,
1140 "span", "head", NULL,
1141 "bdo", "head", NULL,
1142 "iframe", "head", NULL,
1103 NULL 1143 NULL
1104 }; 1144 };
1105 1145
1106 /* 1146 /*
1107 * The list of HTML elements which are supposed not to have 1147 * The list of HTML elements which are supposed not to have
1108 * CDATA content and where a p element will be implied 1148 * CDATA content and where a p element will be implied
1109 * 1149 *
1110 * TODO: extend that list by reading the HTML SGML DTD on 1150 * TODO: extend that list by reading the HTML SGML DTD on
1111 * implied paragraph 1151 * implied paragraph
1112 */ 1152 */
(...skipping 17 matching lines...) Expand all
1130 "onmousemove", 1170 "onmousemove",
1131 "onmouseout", 1171 "onmouseout",
1132 "onkeypress", 1172 "onkeypress",
1133 "onkeydown", 1173 "onkeydown",
1134 "onkeyup", 1174 "onkeyup",
1135 "onload", 1175 "onload",
1136 "onunload", 1176 "onunload",
1137 "onfocus", 1177 "onfocus",
1138 "onblur", 1178 "onblur",
1139 "onsubmit", 1179 "onsubmit",
1140 "onrest", 1180 "onreset",
1141 "onchange", 1181 "onchange",
1142 "onselect" 1182 "onselect"
1143 }; 1183 };
1144 1184
1145 /* 1185 /*
1146 * This table is used by the htmlparser to know what to do with 1186 * This table is used by the htmlparser to know what to do with
1147 * broken html pages. By assigning different priorities to different 1187 * broken html pages. By assigning different priorities to different
1148 * elements the parser can decide how to handle extra endtags. 1188 * elements the parser can decide how to handle extra endtags.
1149 * Endtags are only allowed to close elements with lower or equal 1189 * Endtags are only allowed to close elements with lower or equal
1150 * priority. 1190 * priority.
(...skipping 1729 matching lines...) Expand 10 before | Expand all | Expand 10 after
2880 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2920 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2881 } 2921 }
2882 nbchar = 0; 2922 nbchar = 0;
2883 } 2923 }
2884 GROW; 2924 GROW;
2885 NEXTL(l); 2925 NEXTL(l);
2886 cur = CUR_CHAR(l); 2926 cur = CUR_CHAR(l);
2887 } 2927 }
2888 2928
2889 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { 2929 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) {
2890 » htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, 2930 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2891 » "Invalid char in CDATA 0x%X\n", cur); 2931 "Invalid char in CDATA 0x%X\n", cur);
2892 » NEXT; 2932 if (ctxt->input->cur < ctxt->input->end) {
2933 NEXT;
2934 }
2893 } 2935 }
2894 2936
2895 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2937 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2896 if (ctxt->sax->cdataBlock!= NULL) { 2938 if (ctxt->sax->cdataBlock!= NULL) {
2897 /* 2939 /*
2898 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE 2940 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2899 */ 2941 */
2900 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); 2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2901 } else if (ctxt->sax->characters != NULL) { 2943 } else if (ctxt->sax->characters != NULL) {
2902 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2944 ctxt->sax->characters(ctxt->userData, buf, nbchar);
(...skipping 29 matching lines...) Expand all
2932 "Invalid char in CDATA 0x%X\n", cur); 2974 "Invalid char in CDATA 0x%X\n", cur);
2933 } else { 2975 } else {
2934 COPY_BUF(l,buf,nbchar,cur); 2976 COPY_BUF(l,buf,nbchar,cur);
2935 } 2977 }
2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { 2978 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
2937 /* 2979 /*
2938 * Ok the segment is to be consumed as chars. 2980 * Ok the segment is to be consumed as chars.
2939 */ 2981 */
2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 2982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2941 if (areBlanks(ctxt, buf, nbchar)) { 2983 if (areBlanks(ctxt, buf, nbchar)) {
2942 » » if (ctxt->sax->ignorableWhitespace != NULL) 2984 » » if (ctxt->keepBlanks) {
2943 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, 2985 » » » if (ctxt->sax->characters != NULL)
2944 » » » buf, nbchar); 2986 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar);
2987 » » } else {
2988 » » » if (ctxt->sax->ignorableWhitespace != NULL)
2989 » » » ctxt->sax->ignorableWhitespace(ctxt->userData,
2990 » » » buf, nbchar);
2991 » » }
2945 } else { 2992 } else {
2946 htmlCheckParagraph(ctxt); 2993 htmlCheckParagraph(ctxt);
2947 if (ctxt->sax->characters != NULL) 2994 if (ctxt->sax->characters != NULL)
2948 ctxt->sax->characters(ctxt->userData, buf, nbchar); 2995 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2949 } 2996 }
2950 } 2997 }
2951 nbchar = 0; 2998 nbchar = 0;
2952 } 2999 }
2953 NEXTL(l); 3000 NEXTL(l);
2954 chunk++; 3001 chunk++;
(...skipping 10 matching lines...) Expand all
2965 } 3012 }
2966 } 3013 }
2967 if (nbchar != 0) { 3014 if (nbchar != 0) {
2968 buf[nbchar] = 0; 3015 buf[nbchar] = 0;
2969 3016
2970 /* 3017 /*
2971 * Ok the segment is to be consumed as chars. 3018 * Ok the segment is to be consumed as chars.
2972 */ 3019 */
2973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { 3020 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2974 if (areBlanks(ctxt, buf, nbchar)) { 3021 if (areBlanks(ctxt, buf, nbchar)) {
2975 » » if (ctxt->sax->ignorableWhitespace != NULL) 3022 » » if (ctxt->keepBlanks) {
2976 » » ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); 3023 » » if (ctxt->sax->characters != NULL)
3024 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar);
3025 » » } else {
3026 » » if (ctxt->sax->ignorableWhitespace != NULL)
3027 » » » ctxt->sax->ignorableWhitespace(ctxt->userData,
3028 » » » buf, nbchar);
3029 » » }
2977 } else { 3030 } else {
2978 htmlCheckParagraph(ctxt); 3031 htmlCheckParagraph(ctxt);
2979 if (ctxt->sax->characters != NULL) 3032 if (ctxt->sax->characters != NULL)
2980 ctxt->sax->characters(ctxt->userData, buf, nbchar); 3033 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2981 } 3034 }
2982 } 3035 }
2983 } else { 3036 } else {
2984 /* 3037 /*
2985 * Loop detection 3038 * Loop detection
2986 */ 3039 */
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
3268 SKIP(3); 3321 SKIP(3);
3269 while (CUR != ';') { 3322 while (CUR != ';') {
3270 if ((CUR >= '0') && (CUR <= '9')) 3323 if ((CUR >= '0') && (CUR <= '9'))
3271 val = val * 16 + (CUR - '0'); 3324 val = val * 16 + (CUR - '0');
3272 else if ((CUR >= 'a') && (CUR <= 'f')) 3325 else if ((CUR >= 'a') && (CUR <= 'f'))
3273 val = val * 16 + (CUR - 'a') + 10; 3326 val = val * 16 + (CUR - 'a') + 10;
3274 else if ((CUR >= 'A') && (CUR <= 'F')) 3327 else if ((CUR >= 'A') && (CUR <= 'F'))
3275 val = val * 16 + (CUR - 'A') + 10; 3328 val = val * 16 + (CUR - 'A') + 10;
3276 else { 3329 else {
3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, 3330 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3278 » » "htmlParseCharRef: missing semicolumn\n", 3331 » » "htmlParseCharRef: missing semicolon\n",
3279 NULL, NULL); 3332 NULL, NULL);
3280 break; 3333 break;
3281 } 3334 }
3282 NEXT; 3335 NEXT;
3283 } 3336 }
3284 if (CUR == ';') 3337 if (CUR == ';')
3285 NEXT; 3338 NEXT;
3286 } else if ((CUR == '&') && (NXT(1) == '#')) { 3339 } else if ((CUR == '&') && (NXT(1) == '#')) {
3287 SKIP(2); 3340 SKIP(2);
3288 while (CUR != ';') { 3341 while (CUR != ';') {
3289 if ((CUR >= '0') && (CUR <= '9')) 3342 if ((CUR >= '0') && (CUR <= '9'))
3290 val = val * 10 + (CUR - '0'); 3343 val = val * 10 + (CUR - '0');
3291 else { 3344 else {
3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, 3345 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3293 » » "htmlParseCharRef: missing semicolumn\n", 3346 » » "htmlParseCharRef: missing semicolon\n",
3294 NULL, NULL); 3347 NULL, NULL);
3295 break; 3348 break;
3296 } 3349 }
3297 NEXT; 3350 NEXT;
3298 } 3351 }
3299 if (CUR == ';') 3352 if (CUR == ';')
3300 NEXT; 3353 NEXT;
3301 } else { 3354 } else {
3302 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, 3355 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3303 "htmlParseCharRef: invalid value\n", NULL, NULL); 3356 "htmlParseCharRef: invalid value\n", NULL, NULL);
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
3426 NEXT; 3479 NEXT;
3427 SKIP_BLANKS; 3480 SKIP_BLANKS;
3428 val = htmlParseAttValue(ctxt); 3481 val = htmlParseAttValue(ctxt);
3429 } 3482 }
3430 3483
3431 *value = val; 3484 *value = val;
3432 return(name); 3485 return(name);
3433 } 3486 }
3434 3487
3435 /** 3488 /**
3436 * htmlCheckEncoding: 3489 * htmlCheckEncodingDirect:
3437 * @ctxt: an HTML parser context 3490 * @ctxt: an HTML parser context
3438 * @attvalue: the attribute value 3491 * @attvalue: the attribute value
3439 * 3492 *
3440 * Checks an http-equiv attribute from a Meta tag to detect 3493 * Checks an attribute value to detect
3441 * the encoding 3494 * the encoding
3442 * If a new encoding is detected the parser is switched to decode 3495 * If a new encoding is detected the parser is switched to decode
3443 * it and pass UTF8 3496 * it and pass UTF8
3444 */ 3497 */
3445 static void 3498 static void
3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { 3499 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
3447 const xmlChar *encoding;
3448 3500
3449 if ((ctxt == NULL) || (attvalue == NULL)) 3501 if ((ctxt == NULL) || (encoding == NULL) ||
3502 (ctxt->options & HTML_PARSE_IGNORE_ENC))
3450 return; 3503 return;
3451 3504
3452 /* do not change encoding */ 3505 /* do not change encoding */
3453 if (ctxt->input->encoding != NULL) 3506 if (ctxt->input->encoding != NULL)
3454 return; 3507 return;
3455 3508
3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset=");
3457 if (encoding != NULL) {
3458 encoding += 8;
3459 } else {
3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset =");
3461 if (encoding != NULL)
3462 encoding += 9;
3463 }
3464 if (encoding != NULL) { 3509 if (encoding != NULL) {
3465 xmlCharEncoding enc; 3510 xmlCharEncoding enc;
3466 xmlCharEncodingHandlerPtr handler; 3511 xmlCharEncodingHandlerPtr handler;
3467 3512
3468 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; 3513 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
3469 3514
3470 if (ctxt->input->encoding != NULL) 3515 if (ctxt->input->encoding != NULL)
3471 xmlFree((xmlChar *) ctxt->input->encoding); 3516 xmlFree((xmlChar *) ctxt->input->encoding);
3472 ctxt->input->encoding = xmlStrdup(encoding); 3517 ctxt->input->encoding = xmlStrdup(encoding);
3473 3518
(...skipping 17 matching lines...) Expand all
3491 ctxt->charset = XML_CHAR_ENCODING_UTF8; 3536 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3492 } else { 3537 } else {
3493 /* 3538 /*
3494 * fallback for unknown encodings 3539 * fallback for unknown encodings
3495 */ 3540 */
3496 handler = xmlFindCharEncodingHandler((const char *) encoding); 3541 handler = xmlFindCharEncodingHandler((const char *) encoding);
3497 if (handler != NULL) { 3542 if (handler != NULL) {
3498 xmlSwitchToEncoding(ctxt, handler); 3543 xmlSwitchToEncoding(ctxt, handler);
3499 ctxt->charset = XML_CHAR_ENCODING_UTF8; 3544 ctxt->charset = XML_CHAR_ENCODING_UTF8;
3500 } else { 3545 } else {
3501 » » ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; 3546 » » htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
3547 » » "htmlCheckEncoding: unknown encoding %s\n",
3548 » » » encoding, NULL);
3502 } 3549 }
3503 } 3550 }
3504 3551
3505 if ((ctxt->input->buf != NULL) && 3552 if ((ctxt->input->buf != NULL) &&
3506 (ctxt->input->buf->encoder != NULL) && 3553 (ctxt->input->buf->encoder != NULL) &&
3507 (ctxt->input->buf->raw != NULL) && 3554 (ctxt->input->buf->raw != NULL) &&
3508 (ctxt->input->buf->buffer != NULL)) { 3555 (ctxt->input->buf->buffer != NULL)) {
3509 int nbchars; 3556 int nbchars;
3510 int processed; 3557 int processed;
3511 3558
3512 /* 3559 /*
3513 * convert as much as possible to the parser reading buffer. 3560 * convert as much as possible to the parser reading buffer.
3514 */ 3561 */
3515 processed = ctxt->input->cur - ctxt->input->base; 3562 processed = ctxt->input->cur - ctxt->input->base;
3516 » xmlBufferShrink(ctxt->input->buf->buffer, processed); 3563 » xmlBufShrink(ctxt->input->buf->buffer, processed);
3517 » nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, 3564 » nbchars = xmlCharEncInput(ctxt->input->buf, 1);
3518 » » ctxt->input->buf->buffer,
3519 » » » » ctxt->input->buf->raw);
3520 if (nbchars < 0) { 3565 if (nbchars < 0) {
3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 3566 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3522 "htmlCheckEncoding: encoder error\n", 3567 "htmlCheckEncoding: encoder error\n",
3523 NULL, NULL); 3568 NULL, NULL);
3524 } 3569 }
3525 » ctxt->input->base = 3570 xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
3526 » ctxt->input->cur = ctxt->input->buf->buffer->content;
3527 ctxt->input->end =
3528 &ctxt->input->base[ctxt->input->buf->buffer->use];
3529 } 3571 }
3530 } 3572 }
3531 } 3573 }
3532 3574
3533 /** 3575 /**
3576 * htmlCheckEncoding:
3577 * @ctxt: an HTML parser context
3578 * @attvalue: the attribute value
3579 *
3580 * Checks an http-equiv attribute from a Meta tag to detect
3581 * the encoding
3582 * If a new encoding is detected the parser is switched to decode
3583 * it and pass UTF8
3584 */
3585 static void
3586 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3587 const xmlChar *encoding;
3588
3589 if (!attvalue)
3590 return;
3591
3592 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
3593 if (encoding != NULL) {
3594 encoding += 7;
3595 }
3596 /*
3597 * skip blank
3598 */
3599 if (encoding && IS_BLANK_CH(*encoding))
3600 encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
3601 if (encoding && *encoding == '=') {
3602 encoding ++;
3603 htmlCheckEncodingDirect(ctxt, encoding);
3604 }
3605 }
3606
3607 /**
3534 * htmlCheckMeta: 3608 * htmlCheckMeta:
3535 * @ctxt: an HTML parser context 3609 * @ctxt: an HTML parser context
3536 * @atts: the attributes values 3610 * @atts: the attributes values
3537 * 3611 *
3538 * Checks an attributes from a Meta tag 3612 * Checks an attributes from a Meta tag
3539 */ 3613 */
3540 static void 3614 static void
3541 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { 3615 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3542 int i; 3616 int i;
3543 const xmlChar *att, *value; 3617 const xmlChar *att, *value;
3544 int http = 0; 3618 int http = 0;
3545 const xmlChar *content = NULL; 3619 const xmlChar *content = NULL;
3546 3620
3547 if ((ctxt == NULL) || (atts == NULL)) 3621 if ((ctxt == NULL) || (atts == NULL))
3548 return; 3622 return;
3549 3623
3550 i = 0; 3624 i = 0;
3551 att = atts[i++]; 3625 att = atts[i++];
3552 while (att != NULL) { 3626 while (att != NULL) {
3553 value = atts[i++]; 3627 value = atts[i++];
3554 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) 3628 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
3555 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) 3629 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
3556 http = 1; 3630 http = 1;
3631 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
3632 htmlCheckEncodingDirect(ctxt, value);
3557 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) 3633 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
3558 content = value; 3634 content = value;
3559 att = atts[i++]; 3635 att = atts[i++];
3560 } 3636 }
3561 if ((http) && (content != NULL)) 3637 if ((http) && (content != NULL))
3562 htmlCheckEncoding(ctxt, content); 3638 htmlCheckEncoding(ctxt, content);
3563 3639
3564 } 3640 }
3565 3641
3566 /** 3642 /**
(...skipping 21 matching lines...) Expand all
3588 const xmlChar *name; 3664 const xmlChar *name;
3589 const xmlChar *attname; 3665 const xmlChar *attname;
3590 xmlChar *attvalue; 3666 xmlChar *attvalue;
3591 const xmlChar **atts; 3667 const xmlChar **atts;
3592 int nbatts = 0; 3668 int nbatts = 0;
3593 int maxatts; 3669 int maxatts;
3594 int meta = 0; 3670 int meta = 0;
3595 int i; 3671 int i;
3596 int discardtag = 0; 3672 int discardtag = 0;
3597 3673
3598 if (ctxt->instate == XML_PARSER_EOF)
3599 return(-1);
3600 if ((ctxt == NULL) || (ctxt->input == NULL)) { 3674 if ((ctxt == NULL) || (ctxt->input == NULL)) {
3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 3675 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3602 "htmlParseStartTag: context error\n", NULL, NULL); 3676 "htmlParseStartTag: context error\n", NULL, NULL);
3603 return -1; 3677 return -1;
3604 } 3678 }
3679 if (ctxt->instate == XML_PARSER_EOF)
3680 return(-1);
3605 if (CUR != '<') return -1; 3681 if (CUR != '<') return -1;
3606 NEXT; 3682 NEXT;
3607 3683
3608 atts = ctxt->atts; 3684 atts = ctxt->atts;
3609 maxatts = ctxt->maxatts; 3685 maxatts = ctxt->maxatts;
3610 3686
3611 GROW; 3687 GROW;
3612 name = htmlParseHTMLName(ctxt); 3688 name = htmlParseHTMLName(ctxt);
3613 if (name == NULL) { 3689 if (name == NULL) {
3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, 3690 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after
3876 } 3952 }
3877 } 3953 }
3878 3954
3879 /* 3955 /*
3880 * SAX: End of Tag 3956 * SAX: End of Tag
3881 */ 3957 */
3882 oldname = ctxt->name; 3958 oldname = ctxt->name;
3883 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { 3959 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
3884 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 3960 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
3885 ctxt->sax->endElement(ctxt->userData, name); 3961 ctxt->sax->endElement(ctxt->userData, name);
3962 htmlNodeInfoPop(ctxt);
3886 htmlnamePop(ctxt); 3963 htmlnamePop(ctxt);
3887 ret = 1; 3964 ret = 1;
3888 } else { 3965 } else {
3889 ret = 0; 3966 ret = 0;
3890 } 3967 }
3891 3968
3892 return (ret); 3969 return (ret);
3893 } 3970 }
3894 3971
3895 3972
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after
4282 * 4359 *
4283 * [39] element ::= EmptyElemTag | STag content ETag 4360 * [39] element ::= EmptyElemTag | STag content ETag
4284 * 4361 *
4285 * [41] Attribute ::= Name Eq AttValue 4362 * [41] Attribute ::= Name Eq AttValue
4286 */ 4363 */
4287 4364
4288 static void 4365 static void
4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { 4366 htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4290 const xmlChar *name; 4367 const xmlChar *name;
4291 const htmlElemDesc * info; 4368 const htmlElemDesc * info;
4292 htmlParserNodeInfo node_info; 4369 htmlParserNodeInfo node_info = { 0, };
4293 int failed; 4370 int failed;
4294 4371
4295 if ((ctxt == NULL) || (ctxt->input == NULL)) { 4372 if ((ctxt == NULL) || (ctxt->input == NULL)) {
4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 4373 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4297 "htmlParseElementInternal: context error\n", NULL, NULL); 4374 "htmlParseElementInternal: context error\n", NULL, NULL);
4298 return; 4375 return;
4299 } 4376 }
4300 4377
4301 if (ctxt->instate == XML_PARSER_EOF) 4378 if (ctxt->instate == XML_PARSER_EOF)
4302 return; 4379 return;
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after
4663 if (CUR == 0) 4740 if (CUR == 0)
4664 htmlAutoCloseOnEnd(ctxt); 4741 htmlAutoCloseOnEnd(ctxt);
4665 4742
4666 4743
4667 /* 4744 /*
4668 * SAX: end of the document processing. 4745 * SAX: end of the document processing.
4669 */ 4746 */
4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 4747 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4671 ctxt->sax->endDocument(ctxt->userData); 4748 ctxt->sax->endDocument(ctxt->userData);
4672 4749
4673 if (ctxt->myDoc != NULL) { 4750 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
4674 dtd = xmlGetIntSubset(ctxt->myDoc); 4751 dtd = xmlGetIntSubset(ctxt->myDoc);
4675 if (dtd == NULL) 4752 if (dtd == NULL)
4676 ctxt->myDoc->intSubset = 4753 ctxt->myDoc->intSubset =
4677 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", 4754 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", 4755 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); 4756 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
4680 } 4757 }
4681 if (! ctxt->wellFormed) return(-1); 4758 if (! ctxt->wellFormed) return(-1);
4682 return(0); 4759 return(0);
4683 } 4760 }
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
4868 if (buf == NULL) return(NULL); 4945 if (buf == NULL) return(NULL);
4869 4946
4870 input = xmlNewInputStream(ctxt); 4947 input = xmlNewInputStream(ctxt);
4871 if (input == NULL) { 4948 if (input == NULL) {
4872 xmlFreeParserCtxt(ctxt); 4949 xmlFreeParserCtxt(ctxt);
4873 return(NULL); 4950 return(NULL);
4874 } 4951 }
4875 4952
4876 input->filename = NULL; 4953 input->filename = NULL;
4877 input->buf = buf; 4954 input->buf = buf;
4878 input->base = input->buf->buffer->content; 4955 xmlBufResetInput(buf->buffer, input);
4879 input->cur = input->buf->buffer->content;
4880 input->end = &input->buf->buffer->content[input->buf->buffer->use];
4881 4956
4882 inputPush(ctxt, input); 4957 inputPush(ctxt, input);
4883 return(ctxt); 4958 return(ctxt);
4884 } 4959 }
4885 4960
4886 /** 4961 /**
4887 * htmlCreateDocParserCtxt: 4962 * htmlCreateDocParserCtxt:
4888 * @cur: a pointer to an array of xmlChar 4963 * @cur: a pointer to an array of xmlChar
4889 * @encoding: a free form C string describing the HTML document encoding, or NU LL 4964 * @encoding: a free form C string describing the HTML document encoding, or NU LL
4890 * 4965 *
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
4987 if (base < 0) 5062 if (base < 0)
4988 return (-1); 5063 return (-1);
4989 5064
4990 if (ctxt->checkIndex > base) 5065 if (ctxt->checkIndex > base)
4991 base = ctxt->checkIndex; 5066 base = ctxt->checkIndex;
4992 5067
4993 if (in->buf == NULL) { 5068 if (in->buf == NULL) {
4994 buf = in->base; 5069 buf = in->base;
4995 len = in->length; 5070 len = in->length;
4996 } else { 5071 } else {
4997 buf = in->buf->buffer->content; 5072 buf = xmlBufContent(in->buf->buffer);
4998 len = in->buf->buffer->use; 5073 len = xmlBufUse(in->buf->buffer);
4999 } 5074 }
5000 5075
5001 /* take into account the sequence length */ 5076 /* take into account the sequence length */
5002 if (third) 5077 if (third)
5003 len -= 2; 5078 len -= 2;
5004 else if (next) 5079 else if (next)
5005 len--; 5080 len--;
5006 for (; base < len; base++) { 5081 for (; base < len; base++) {
5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) { 5082 if ((!incomment) && (base + 4 < len) && (!iscomment)) {
5008 if ((buf[base] == '<') && (buf[base + 1] == '!') && 5083 if ((buf[base] == '<') && (buf[base + 1] == '!') &&
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
5080 #endif 5155 #endif
5081 return (-1); 5156 return (-1);
5082 } 5157 }
5083 5158
5084 /** 5159 /**
5085 * htmlParseLookupChars: 5160 * htmlParseLookupChars:
5086 * @ctxt: an HTML parser context 5161 * @ctxt: an HTML parser context
5087 * @stop: Array of chars, which stop the lookup. 5162 * @stop: Array of chars, which stop the lookup.
5088 * @stopLen: Length of stop-Array 5163 * @stopLen: Length of stop-Array
5089 * 5164 *
5090 * Try to find if any char of the stop-Array is available in the input 5165 * Try to find if any char of the stop-Array is available in the input
5091 * stream. 5166 * stream.
5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex 5167 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
5093 * to avoid rescanning sequences of bytes, it DOES change the state of the 5168 * to avoid rescanning sequences of bytes, it DOES change the state of the
5094 * parser, do not use liberally. 5169 * parser, do not use liberally.
5095 * 5170 *
5096 * Returns the index to the current parsing point if a stopChar 5171 * Returns the index to the current parsing point if a stopChar
5097 * is available, -1 otherwise. 5172 * is available, -1 otherwise.
5098 */ 5173 */
5099 static int 5174 static int
5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, 5175 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
5101 int stopLen) 5176 int stopLen)
5102 { 5177 {
5103 int base, len; 5178 int base, len;
5104 htmlParserInputPtr in; 5179 htmlParserInputPtr in;
5105 const xmlChar *buf; 5180 const xmlChar *buf;
5106 int incomment = 0; 5181 int incomment = 0;
5107 int i; 5182 int i;
5108 5183
5109 in = ctxt->input; 5184 in = ctxt->input;
5110 if (in == NULL) 5185 if (in == NULL)
5111 return (-1); 5186 return (-1);
5112 5187
5113 base = in->cur - in->base; 5188 base = in->cur - in->base;
5114 if (base < 0) 5189 if (base < 0)
5115 return (-1); 5190 return (-1);
5116 5191
5117 if (ctxt->checkIndex > base) 5192 if (ctxt->checkIndex > base)
5118 base = ctxt->checkIndex; 5193 base = ctxt->checkIndex;
5119 5194
5120 if (in->buf == NULL) { 5195 if (in->buf == NULL) {
5121 buf = in->base; 5196 buf = in->base;
5122 len = in->length; 5197 len = in->length;
5123 } else { 5198 } else {
5124 buf = in->buf->buffer->content; 5199 buf = xmlBufContent(in->buf->buffer);
5125 len = in->buf->buffer->use; 5200 len = xmlBufUse(in->buf->buffer);
5126 } 5201 }
5127 5202
5128 for (; base < len; base++) { 5203 for (; base < len; base++) {
5129 if (!incomment && (base + 4 < len)) { 5204 if (!incomment && (base + 4 < len)) {
5130 if ((buf[base] == '<') && (buf[base + 1] == '!') && 5205 if ((buf[base] == '<') && (buf[base + 1] == '!') &&
5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { 5206 (buf[base + 2] == '-') && (buf[base + 3] == '-')) {
5132 incomment = 1; 5207 incomment = 1;
5133 /* do not increment past <! - some people use <!--> */ 5208 /* do not increment past <! - some people use <!--> */
5134 base += 2; 5209 base += 2;
5135 } 5210 }
(...skipping 28 matching lines...) Expand all
5164 * 5239 *
5165 * Returns zero if no parsing was possible 5240 * Returns zero if no parsing was possible
5166 */ 5241 */
5167 static int 5242 static int
5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { 5243 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
5169 int ret = 0; 5244 int ret = 0;
5170 htmlParserInputPtr in; 5245 htmlParserInputPtr in;
5171 int avail = 0; 5246 int avail = 0;
5172 xmlChar cur, next; 5247 xmlChar cur, next;
5173 5248
5249 htmlParserNodeInfo node_info;
5250
5174 #ifdef DEBUG_PUSH 5251 #ifdef DEBUG_PUSH
5175 switch (ctxt->instate) { 5252 switch (ctxt->instate) {
5176 case XML_PARSER_EOF: 5253 case XML_PARSER_EOF:
5177 xmlGenericError(xmlGenericErrorContext, 5254 xmlGenericError(xmlGenericErrorContext,
5178 "HPP: try EOF\n"); break; 5255 "HPP: try EOF\n"); break;
5179 case XML_PARSER_START: 5256 case XML_PARSER_START:
5180 xmlGenericError(xmlGenericErrorContext, 5257 xmlGenericError(xmlGenericErrorContext,
5181 "HPP: try START\n"); break; 5258 "HPP: try START\n"); break;
5182 case XML_PARSER_MISC: 5259 case XML_PARSER_MISC:
5183 xmlGenericError(xmlGenericErrorContext, 5260 xmlGenericError(xmlGenericErrorContext,
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
5224 } 5301 }
5225 #endif 5302 #endif
5226 5303
5227 while (1) { 5304 while (1) {
5228 5305
5229 in = ctxt->input; 5306 in = ctxt->input;
5230 if (in == NULL) break; 5307 if (in == NULL) break;
5231 if (in->buf == NULL) 5308 if (in->buf == NULL)
5232 avail = in->length - (in->cur - in->base); 5309 avail = in->length - (in->cur - in->base);
5233 else 5310 else
5234 » avail = in->buf->buffer->use - (in->cur - in->base); 5311 » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5235 if ((avail == 0) && (terminate)) { 5312 if ((avail == 0) && (terminate)) {
5236 htmlAutoCloseOnEnd(ctxt); 5313 htmlAutoCloseOnEnd(ctxt);
5237 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 5314 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5238 /* 5315 /*
5239 * SAX: end of the document processing. 5316 * SAX: end of the document processing.
5240 */ 5317 */
5241 ctxt->instate = XML_PARSER_EOF; 5318 ctxt->instate = XML_PARSER_EOF;
5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 5319 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5243 ctxt->sax->endDocument(ctxt->userData); 5320 ctxt->sax->endDocument(ctxt->userData);
5244 } 5321 }
(...skipping 15 matching lines...) Expand all
5260 case XML_PARSER_START: 5337 case XML_PARSER_START:
5261 /* 5338 /*
5262 * Very first chars read from the document flow. 5339 * Very first chars read from the document flow.
5263 */ 5340 */
5264 cur = in->cur[0]; 5341 cur = in->cur[0];
5265 if (IS_BLANK_CH(cur)) { 5342 if (IS_BLANK_CH(cur)) {
5266 SKIP_BLANKS; 5343 SKIP_BLANKS;
5267 if (in->buf == NULL) 5344 if (in->buf == NULL)
5268 avail = in->length - (in->cur - in->base); 5345 avail = in->length - (in->cur - in->base);
5269 else 5346 else
5270 » » » avail = in->buf->buffer->use - (in->cur - in->base); 5347 » » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base );
5271 } 5348 }
5272 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) 5349 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5273 ctxt->sax->setDocumentLocator(ctxt->userData, 5350 ctxt->sax->setDocumentLocator(ctxt->userData,
5274 &xmlDefaultSAXLocator); 5351 &xmlDefaultSAXLocator);
5275 if ((ctxt->sax) && (ctxt->sax->startDocument) && 5352 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5276 (!ctxt->disableSAX)) 5353 (!ctxt->disableSAX))
5277 ctxt->sax->startDocument(ctxt->userData); 5354 ctxt->sax->startDocument(ctxt->userData);
5278 5355
5279 cur = in->cur[0]; 5356 cur = in->cur[0];
5280 next = in->cur[1]; 5357 next = in->cur[1];
(...skipping 21 matching lines...) Expand all
5302 xmlGenericError(xmlGenericErrorContext, 5379 xmlGenericError(xmlGenericErrorContext,
5303 "HPP: entering MISC\n"); 5380 "HPP: entering MISC\n");
5304 #endif 5381 #endif
5305 } 5382 }
5306 break; 5383 break;
5307 case XML_PARSER_MISC: 5384 case XML_PARSER_MISC:
5308 SKIP_BLANKS; 5385 SKIP_BLANKS;
5309 if (in->buf == NULL) 5386 if (in->buf == NULL)
5310 avail = in->length - (in->cur - in->base); 5387 avail = in->length - (in->cur - in->base);
5311 else 5388 else
5312 » » avail = in->buf->buffer->use - (in->cur - in->base); 5389 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5313 » » if (avail < 2) 5390 » » /*
5391 » » * no chars in buffer
5392 » » */
5393 » » if (avail < 1)
5314 goto done; 5394 goto done;
5395 /*
5396 * not enouth chars in buffer
5397 */
5398 if (avail < 2) {
5399 if (!terminate)
5400 goto done;
5401 else
5402 next = ' ';
5403 } else {
5404 next = in->cur[1];
5405 }
5315 cur = in->cur[0]; 5406 cur = in->cur[0];
5316 next = in->cur[1];
5317 if ((cur == '<') && (next == '!') && 5407 if ((cur == '<') && (next == '!') &&
5318 (in->cur[2] == '-') && (in->cur[3] == '-')) { 5408 (in->cur[2] == '-') && (in->cur[3] == '-')) {
5319 if ((!terminate) && 5409 if ((!terminate) &&
5320 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) ) 5410 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) )
5321 goto done; 5411 goto done;
5322 #ifdef DEBUG_PUSH 5412 #ifdef DEBUG_PUSH
5323 xmlGenericError(xmlGenericErrorContext, 5413 xmlGenericError(xmlGenericErrorContext,
5324 "HPP: Parsing Comment\n"); 5414 "HPP: Parsing Comment\n");
5325 #endif 5415 #endif
5326 htmlParseComment(ctxt); 5416 htmlParseComment(ctxt);
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
5362 xmlGenericError(xmlGenericErrorContext, 5452 xmlGenericError(xmlGenericErrorContext,
5363 "HPP: entering START_TAG\n"); 5453 "HPP: entering START_TAG\n");
5364 #endif 5454 #endif
5365 } 5455 }
5366 break; 5456 break;
5367 case XML_PARSER_PROLOG: 5457 case XML_PARSER_PROLOG:
5368 SKIP_BLANKS; 5458 SKIP_BLANKS;
5369 if (in->buf == NULL) 5459 if (in->buf == NULL)
5370 avail = in->length - (in->cur - in->base); 5460 avail = in->length - (in->cur - in->base);
5371 else 5461 else
5372 » » avail = in->buf->buffer->use - (in->cur - in->base); 5462 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5373 if (avail < 2) 5463 if (avail < 2)
5374 goto done; 5464 goto done;
5375 cur = in->cur[0]; 5465 cur = in->cur[0];
5376 next = in->cur[1]; 5466 next = in->cur[1];
5377 if ((cur == '<') && (next == '!') && 5467 if ((cur == '<') && (next == '!') &&
5378 (in->cur[2] == '-') && (in->cur[3] == '-')) { 5468 (in->cur[2] == '-') && (in->cur[3] == '-')) {
5379 if ((!terminate) && 5469 if ((!terminate) &&
5380 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) ) 5470 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0) )
5381 goto done; 5471 goto done;
5382 #ifdef DEBUG_PUSH 5472 #ifdef DEBUG_PUSH
(...skipping 20 matching lines...) Expand all
5403 #ifdef DEBUG_PUSH 5493 #ifdef DEBUG_PUSH
5404 xmlGenericError(xmlGenericErrorContext, 5494 xmlGenericError(xmlGenericErrorContext,
5405 "HPP: entering START_TAG\n"); 5495 "HPP: entering START_TAG\n");
5406 #endif 5496 #endif
5407 } 5497 }
5408 break; 5498 break;
5409 case XML_PARSER_EPILOG: 5499 case XML_PARSER_EPILOG:
5410 if (in->buf == NULL) 5500 if (in->buf == NULL)
5411 avail = in->length - (in->cur - in->base); 5501 avail = in->length - (in->cur - in->base);
5412 else 5502 else
5413 » » avail = in->buf->buffer->use - (in->cur - in->base); 5503 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base);
5414 if (avail < 1) 5504 if (avail < 1)
5415 goto done; 5505 goto done;
5416 cur = in->cur[0]; 5506 cur = in->cur[0];
5417 if (IS_BLANK_CH(cur)) { 5507 if (IS_BLANK_CH(cur)) {
5418 htmlParseCharData(ctxt); 5508 htmlParseCharData(ctxt);
5419 goto done; 5509 goto done;
5420 } 5510 }
5421 if (avail < 2) 5511 if (avail < 2)
5422 goto done; 5512 goto done;
5423 next = in->cur[1]; 5513 next = in->cur[1];
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
5456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 5546 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5457 ctxt->sax->endDocument(ctxt->userData); 5547 ctxt->sax->endDocument(ctxt->userData);
5458 goto done; 5548 goto done;
5459 } 5549 }
5460 break; 5550 break;
5461 case XML_PARSER_START_TAG: { 5551 case XML_PARSER_START_TAG: {
5462 const xmlChar *name; 5552 const xmlChar *name;
5463 int failed; 5553 int failed;
5464 const htmlElemDesc * info; 5554 const htmlElemDesc * info;
5465 5555
5466 » » if (avail < 2) 5556 » » /*
5557 » » * no chars in buffer
5558 » » */
5559 » » if (avail < 1)
5467 goto done; 5560 goto done;
5561 /*
5562 * not enouth chars in buffer
5563 */
5564 if (avail < 2) {
5565 if (!terminate)
5566 goto done;
5567 else
5568 next = ' ';
5569 } else {
5570 next = in->cur[1];
5571 }
5468 cur = in->cur[0]; 5572 cur = in->cur[0];
5469 if (cur != '<') { 5573 if (cur != '<') {
5470 ctxt->instate = XML_PARSER_CONTENT; 5574 ctxt->instate = XML_PARSER_CONTENT;
5471 #ifdef DEBUG_PUSH 5575 #ifdef DEBUG_PUSH
5472 xmlGenericError(xmlGenericErrorContext, 5576 xmlGenericError(xmlGenericErrorContext,
5473 "HPP: entering CONTENT\n"); 5577 "HPP: entering CONTENT\n");
5474 #endif 5578 #endif
5475 break; 5579 break;
5476 } 5580 }
5477 » » if (in->cur[1] == '/') { 5581 » » if (next == '/') {
5478 ctxt->instate = XML_PARSER_END_TAG; 5582 ctxt->instate = XML_PARSER_END_TAG;
5479 ctxt->checkIndex = 0; 5583 ctxt->checkIndex = 0;
5480 #ifdef DEBUG_PUSH 5584 #ifdef DEBUG_PUSH
5481 xmlGenericError(xmlGenericErrorContext, 5585 xmlGenericError(xmlGenericErrorContext,
5482 "HPP: entering END_TAG\n"); 5586 "HPP: entering END_TAG\n");
5483 #endif 5587 #endif
5484 break; 5588 break;
5485 } 5589 }
5486 if ((!terminate) && 5590 if ((!terminate) &&
5487 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) 5591 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0))
5488 goto done; 5592 goto done;
5489 5593
5594 /* Capture start position */
5595 if (ctxt->record_info) {
5596 node_info.begin_pos = ctxt->input->consumed +
5597 (CUR_PTR - ctxt->input->base);
5598 node_info.begin_line = ctxt->input->line;
5599 }
5600
5601
5490 failed = htmlParseStartTag(ctxt); 5602 failed = htmlParseStartTag(ctxt);
5491 name = ctxt->name; 5603 name = ctxt->name;
5492 if ((failed == -1) || 5604 if ((failed == -1) ||
5493 (name == NULL)) { 5605 (name == NULL)) {
5494 if (CUR == '>') 5606 if (CUR == '>')
5495 NEXT; 5607 NEXT;
5496 break; 5608 break;
5497 } 5609 }
5498 5610
5499 /* 5611 /*
(...skipping 29 matching lines...) Expand all
5529 name, NULL); 5641 name, NULL);
5530 5642
5531 /* 5643 /*
5532 * end of parsing of this node. 5644 * end of parsing of this node.
5533 */ 5645 */
5534 if (xmlStrEqual(name, ctxt->name)) { 5646 if (xmlStrEqual(name, ctxt->name)) {
5535 nodePop(ctxt); 5647 nodePop(ctxt);
5536 htmlnamePop(ctxt); 5648 htmlnamePop(ctxt);
5537 } 5649 }
5538 5650
5651 if (ctxt->record_info)
5652 htmlNodeInfoPush(ctxt, &node_info);
5653
5539 ctxt->instate = XML_PARSER_CONTENT; 5654 ctxt->instate = XML_PARSER_CONTENT;
5540 #ifdef DEBUG_PUSH 5655 #ifdef DEBUG_PUSH
5541 xmlGenericError(xmlGenericErrorContext, 5656 xmlGenericError(xmlGenericErrorContext,
5542 "HPP: entering CONTENT\n"); 5657 "HPP: entering CONTENT\n");
5543 #endif 5658 #endif
5544 break; 5659 break;
5545 } 5660 }
5546 5661
5547 /* 5662 /*
5548 * Check for an Empty Element from DTD definition 5663 * Check for an Empty Element from DTD definition
5549 */ 5664 */
5550 if ((info != NULL) && (info->empty)) { 5665 if ((info != NULL) && (info->empty)) {
5551 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) 5666 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5552 ctxt->sax->endElement(ctxt->userData, name); 5667 ctxt->sax->endElement(ctxt->userData, name);
5553 htmlnamePop(ctxt); 5668 htmlnamePop(ctxt);
5554 } 5669 }
5670
5671 if (ctxt->record_info)
5672 htmlNodeInfoPush(ctxt, &node_info);
5673
5555 ctxt->instate = XML_PARSER_CONTENT; 5674 ctxt->instate = XML_PARSER_CONTENT;
5556 #ifdef DEBUG_PUSH 5675 #ifdef DEBUG_PUSH
5557 xmlGenericError(xmlGenericErrorContext, 5676 xmlGenericError(xmlGenericErrorContext,
5558 "HPP: entering CONTENT\n"); 5677 "HPP: entering CONTENT\n");
5559 #endif 5678 #endif
5560 break; 5679 break;
5561 } 5680 }
5562 case XML_PARSER_CONTENT: { 5681 case XML_PARSER_CONTENT: {
5563 long cons; 5682 long cons;
5564 /* 5683 /*
5565 * Handle preparsed entities and charRef 5684 * Handle preparsed entities and charRef
5566 */ 5685 */
5567 if (ctxt->token != 0) { 5686 if (ctxt->token != 0) {
5568 xmlChar chr[2] = { 0 , 0 } ; 5687 xmlChar chr[2] = { 0 , 0 } ;
5569 5688
5570 chr[0] = (xmlChar) ctxt->token; 5689 chr[0] = (xmlChar) ctxt->token;
5571 htmlCheckParagraph(ctxt); 5690 htmlCheckParagraph(ctxt);
5572 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) 5691 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5573 ctxt->sax->characters(ctxt->userData, chr, 1); 5692 ctxt->sax->characters(ctxt->userData, chr, 1);
5574 ctxt->token = 0; 5693 ctxt->token = 0;
5575 ctxt->checkIndex = 0; 5694 ctxt->checkIndex = 0;
5576 } 5695 }
5577 if ((avail == 1) && (terminate)) { 5696 if ((avail == 1) && (terminate)) {
5578 cur = in->cur[0]; 5697 cur = in->cur[0];
5579 if ((cur != '<') && (cur != '&')) { 5698 if ((cur != '<') && (cur != '&')) {
5580 if (ctxt->sax != NULL) { 5699 if (ctxt->sax != NULL) {
5581 if (IS_BLANK_CH(cur)) { 5700 if (IS_BLANK_CH(cur)) {
5582 » » » » if (ctxt->sax->ignorableWhitespace != NULL) 5701 » » » » if (ctxt->keepBlanks) {
5583 » » » » ctxt->sax->ignorableWhitespace( 5702 » » » » if (ctxt->sax->characters != NULL)
5584 » » » » » ctxt->userData, &cur, 1); 5703 » » » » » ctxt->sax->characters(
5704 » » » » » » ctxt->userData, &cur, 1);
5705 » » » » } else {
5706 » » » » if (ctxt->sax->ignorableWhitespace != NULL)
5707 » » » » » ctxt->sax->ignorableWhitespace(
5708 » » » » » » ctxt->userData, &cur, 1);
5709 » » » » }
5585 } else { 5710 } else {
5586 htmlCheckParagraph(ctxt); 5711 htmlCheckParagraph(ctxt);
5587 if (ctxt->sax->characters != NULL) 5712 if (ctxt->sax->characters != NULL)
5588 ctxt->sax->characters( 5713 ctxt->sax->characters(
5589 ctxt->userData, &cur, 1); 5714 ctxt->userData, &cur, 1);
5590 } 5715 }
5591 } 5716 }
5592 ctxt->token = 0; 5717 ctxt->token = 0;
5593 ctxt->checkIndex = 0; 5718 ctxt->checkIndex = 0;
5594 in->cur++; 5719 in->cur++;
5595 break; 5720 break;
5596 } 5721 }
5597 } 5722 }
5598 if (avail < 2) 5723 if (avail < 2)
5599 goto done; 5724 goto done;
5600 cur = in->cur[0]; 5725 cur = in->cur[0];
5601 next = in->cur[1]; 5726 next = in->cur[1];
5602 cons = ctxt->nbChars; 5727 cons = ctxt->nbChars;
5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || 5728 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) ||
5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { 5729 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5605 /* 5730 /*
5606 * Handle SCRIPT/STYLE separately 5731 * Handle SCRIPT/STYLE separately
5607 */ 5732 */
5608 if (!terminate) { 5733 if (!terminate) {
5609 int idx; 5734 int idx;
5610 xmlChar val; 5735 xmlChar val;
5611 5736
5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); 5737 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0);
5613 if (idx < 0) 5738 if (idx < 0)
5614 goto done; 5739 goto done;
5615 val = in->cur[idx + 2]; 5740 val = in->cur[idx + 2];
5616 if (val == 0) /* bad cut of input */ 5741 if (val == 0) /* bad cut of input */
5617 goto done; 5742 goto done;
5618 } 5743 }
5619 htmlParseScript(ctxt); 5744 htmlParseScript(ctxt);
5620 if ((cur == '<') && (next == '/')) { 5745 if ((cur == '<') && (next == '/')) {
5621 ctxt->instate = XML_PARSER_END_TAG; 5746 ctxt->instate = XML_PARSER_END_TAG;
5622 ctxt->checkIndex = 0; 5747 ctxt->checkIndex = 0;
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after
5859 htmlAutoCloseOnEnd(ctxt); 5984 htmlAutoCloseOnEnd(ctxt);
5860 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { 5985 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5861 /* 5986 /*
5862 * SAX: end of the document processing. 5987 * SAX: end of the document processing.
5863 */ 5988 */
5864 ctxt->instate = XML_PARSER_EOF; 5989 ctxt->instate = XML_PARSER_EOF;
5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) 5990 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5866 ctxt->sax->endDocument(ctxt->userData); 5991 ctxt->sax->endDocument(ctxt->userData);
5867 } 5992 }
5868 } 5993 }
5869 if ((ctxt->myDoc != NULL) && 5994 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || 5995 ((terminate) || (ctxt->instate == XML_PARSER_EOF) ||
5871 (ctxt->instate == XML_PARSER_EPILOG))) { 5996 (ctxt->instate == XML_PARSER_EPILOG))) {
5872 xmlDtdPtr dtd; 5997 xmlDtdPtr dtd;
5873 dtd = xmlGetIntSubset(ctxt->myDoc); 5998 dtd = xmlGetIntSubset(ctxt->myDoc);
5874 if (dtd == NULL) 5999 if (dtd == NULL)
5875 ctxt->myDoc->intSubset = 6000 ctxt->myDoc->intSubset =
5876 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", 6001 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", 6002 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); 6003 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
5879 } 6004 }
(...skipping 17 matching lines...) Expand all
5897 int 6022 int
5898 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, 6023 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
5899 int terminate) { 6024 int terminate) {
5900 if ((ctxt == NULL) || (ctxt->input == NULL)) { 6025 if ((ctxt == NULL) || (ctxt->input == NULL)) {
5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, 6026 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
5902 "htmlParseChunk: context error\n", NULL, NULL); 6027 "htmlParseChunk: context error\n", NULL, NULL);
5903 return(XML_ERR_INTERNAL_ERROR); 6028 return(XML_ERR_INTERNAL_ERROR);
5904 } 6029 }
5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 6030 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { 6031 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
5907 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; 6032 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
5908 » int cur = ctxt->input->cur - ctxt->input->base; 6033 » size_t cur = ctxt->input->cur - ctxt->input->base;
5909 int res; 6034 int res;
5910 6035
5911 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 6036 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
5912 if (res < 0) { 6037 if (res < 0) {
5913 ctxt->errNo = XML_PARSER_EOF; 6038 ctxt->errNo = XML_PARSER_EOF;
5914 ctxt->disableSAX = 1; 6039 ctxt->disableSAX = 1;
5915 return (XML_PARSER_EOF); 6040 return (XML_PARSER_EOF);
5916 } 6041 }
5917 » ctxt->input->base = ctxt->input->buf->buffer->content + base; 6042 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
5918 » ctxt->input->cur = ctxt->input->base + cur;
5919 » ctxt->input->end =
5920 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
5921 #ifdef DEBUG_PUSH 6043 #ifdef DEBUG_PUSH
5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); 6044 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
5923 #endif 6045 #endif
5924 6046
5925 #if 0 6047 #if 0
5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) 6048 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
5927 htmlParseTryOrFinish(ctxt, terminate); 6049 htmlParseTryOrFinish(ctxt, terminate);
5928 #endif 6050 #endif
5929 } else if (ctxt->instate != XML_PARSER_EOF) { 6051 } else if (ctxt->instate != XML_PARSER_EOF) {
5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { 6052 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
5931 xmlParserInputBufferPtr in = ctxt->input->buf; 6053 xmlParserInputBufferPtr in = ctxt->input->buf;
5932 if ((in->encoder != NULL) && (in->buffer != NULL) && 6054 if ((in->encoder != NULL) && (in->buffer != NULL) &&
5933 (in->raw != NULL)) { 6055 (in->raw != NULL)) {
5934 int nbchars; 6056 int nbchars;
6057 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
6058 size_t current = ctxt->input->cur - ctxt->input->base;
5935 6059
5936 » » nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); 6060 » » nbchars = xmlCharEncInput(in, terminate);
5937 if (nbchars < 0) { 6061 if (nbchars < 0) {
5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, 6062 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
5939 "encoder error\n", NULL, NULL); 6063 "encoder error\n", NULL, NULL);
5940 return(XML_ERR_INVALID_ENCODING); 6064 return(XML_ERR_INVALID_ENCODING);
5941 } 6065 }
6066 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
5942 } 6067 }
5943 } 6068 }
5944 } 6069 }
5945 htmlParseTryOrFinish(ctxt, terminate); 6070 htmlParseTryOrFinish(ctxt, terminate);
5946 if (terminate) { 6071 if (terminate) {
5947 if ((ctxt->instate != XML_PARSER_EOF) && 6072 if ((ctxt->instate != XML_PARSER_EOF) &&
5948 (ctxt->instate != XML_PARSER_EPILOG) && 6073 (ctxt->instate != XML_PARSER_EPILOG) &&
5949 (ctxt->instate != XML_PARSER_MISC)) { 6074 (ctxt->instate != XML_PARSER_MISC)) {
5950 ctxt->errNo = XML_ERR_DOCUMENT_END; 6075 ctxt->errNo = XML_ERR_DOCUMENT_END;
5951 ctxt->wellFormed = 0; 6076 ctxt->wellFormed = 0;
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
6025 xmlFree(buf); 6150 xmlFree(buf);
6026 return(NULL); 6151 return(NULL);
6027 } 6152 }
6028 6153
6029 if (filename == NULL) 6154 if (filename == NULL)
6030 inputStream->filename = NULL; 6155 inputStream->filename = NULL;
6031 else 6156 else
6032 inputStream->filename = (char *) 6157 inputStream->filename = (char *)
6033 xmlCanonicPath((const xmlChar *) filename); 6158 xmlCanonicPath((const xmlChar *) filename);
6034 inputStream->buf = buf; 6159 inputStream->buf = buf;
6035 inputStream->base = inputStream->buf->buffer->content; 6160 xmlBufResetInput(buf->buffer, inputStream);
6036 inputStream->cur = inputStream->buf->buffer->content;
6037 inputStream->end =
6038 » &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
6039 6161
6040 inputPush(ctxt, inputStream); 6162 inputPush(ctxt, inputStream);
6041 6163
6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && 6164 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
6043 (ctxt->input->buf != NULL)) { 6165 (ctxt->input->buf != NULL)) {
6044 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; 6166 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
6045 » int cur = ctxt->input->cur - ctxt->input->base; 6167 » size_t cur = ctxt->input->cur - ctxt->input->base;
6046 6168
6047 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); 6169 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6048 6170
6049 » ctxt->input->base = ctxt->input->buf->buffer->content + base; 6171 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
6050 » ctxt->input->cur = ctxt->input->base + cur;
6051 » ctxt->input->end =
6052 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
6053 #ifdef DEBUG_PUSH 6172 #ifdef DEBUG_PUSH
6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); 6173 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6055 #endif 6174 #endif
6056 } 6175 }
6057 ctxt->progressive = 1; 6176 ctxt->progressive = 1;
6058 6177
6059 return(ctxt); 6178 return(ctxt);
6060 } 6179 }
6061 #endif /* LIBXML_PUSH_ENABLED */ 6180 #endif /* LIBXML_PUSH_ENABLED */
6062 6181
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
6162 xmlFree(canonicFilename); 6281 xmlFree(canonicFilename);
6163 if (inputStream == NULL) { 6282 if (inputStream == NULL) {
6164 xmlFreeParserCtxt(ctxt); 6283 xmlFreeParserCtxt(ctxt);
6165 return(NULL); 6284 return(NULL);
6166 } 6285 }
6167 6286
6168 inputPush(ctxt, inputStream); 6287 inputPush(ctxt, inputStream);
6169 6288
6170 /* set encoding */ 6289 /* set encoding */
6171 if (encoding) { 6290 if (encoding) {
6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) + 1); 6291 size_t l = strlen(encoding);
6173 » if (content) { 6292
6174 » strcpy ((char *)content, (char *)content_line); 6293 » if (l < 1000) {
6175 strcat ((char *)content, (char *)encoding); 6294 » content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);
6176 htmlCheckEncoding (ctxt, content); 6295 » if (content) {
6177 » xmlFree (content); 6296 » » strcpy ((char *)content, (char *)content_line);
6297 » » strcat ((char *)content, (char *)encoding);
6298 » » htmlCheckEncoding (ctxt, content);
6299 » » xmlFree (content);
6300 » }
6178 } 6301 }
6179 } 6302 }
6180 6303
6181 return(ctxt); 6304 return(ctxt);
6182 } 6305 }
6183 6306
6184 /** 6307 /**
6185 * htmlSAXParseFile: 6308 * htmlSAXParseFile:
6186 * @filename: the filename 6309 * @filename: the filename
6187 * @encoding: a free form C string describing the HTML document encoding, or NU LL 6310 * @encoding: a free form C string describing the HTML document encoding, or NU LL
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after
6444 ctxt->standalone = -1; 6567 ctxt->standalone = -1;
6445 ctxt->hasExternalSubset = 0; 6568 ctxt->hasExternalSubset = 0;
6446 ctxt->hasPErefs = 0; 6569 ctxt->hasPErefs = 0;
6447 ctxt->html = 1; 6570 ctxt->html = 1;
6448 ctxt->external = 0; 6571 ctxt->external = 0;
6449 ctxt->instate = XML_PARSER_START; 6572 ctxt->instate = XML_PARSER_START;
6450 ctxt->token = 0; 6573 ctxt->token = 0;
6451 6574
6452 ctxt->wellFormed = 1; 6575 ctxt->wellFormed = 1;
6453 ctxt->nsWellFormed = 1; 6576 ctxt->nsWellFormed = 1;
6577 ctxt->disableSAX = 0;
6454 ctxt->valid = 1; 6578 ctxt->valid = 1;
6455 ctxt->vctxt.userData = ctxt; 6579 ctxt->vctxt.userData = ctxt;
6456 ctxt->vctxt.error = xmlParserValidityError; 6580 ctxt->vctxt.error = xmlParserValidityError;
6457 ctxt->vctxt.warning = xmlParserValidityWarning; 6581 ctxt->vctxt.warning = xmlParserValidityWarning;
6458 ctxt->record_info = 0; 6582 ctxt->record_info = 0;
6459 ctxt->nbChars = 0; 6583 ctxt->nbChars = 0;
6460 ctxt->checkIndex = 0; 6584 ctxt->checkIndex = 0;
6461 ctxt->inSubset = 0; 6585 ctxt->inSubset = 0;
6462 ctxt->errNo = XML_ERR_OK; 6586 ctxt->errNo = XML_ERR_OK;
6463 ctxt->depth = 0; 6587 ctxt->depth = 0;
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
6523 } else 6647 } else
6524 ctxt->recovery = 0; 6648 ctxt->recovery = 0;
6525 if (options & HTML_PARSE_COMPACT) { 6649 if (options & HTML_PARSE_COMPACT) {
6526 ctxt->options |= HTML_PARSE_COMPACT; 6650 ctxt->options |= HTML_PARSE_COMPACT;
6527 options -= HTML_PARSE_COMPACT; 6651 options -= HTML_PARSE_COMPACT;
6528 } 6652 }
6529 if (options & XML_PARSE_HUGE) { 6653 if (options & XML_PARSE_HUGE) {
6530 ctxt->options |= XML_PARSE_HUGE; 6654 ctxt->options |= XML_PARSE_HUGE;
6531 options -= XML_PARSE_HUGE; 6655 options -= XML_PARSE_HUGE;
6532 } 6656 }
6657 if (options & HTML_PARSE_NODEFDTD) {
6658 ctxt->options |= HTML_PARSE_NODEFDTD;
6659 options -= HTML_PARSE_NODEFDTD;
6660 }
6661 if (options & HTML_PARSE_IGNORE_ENC) {
6662 ctxt->options |= HTML_PARSE_IGNORE_ENC;
6663 options -= HTML_PARSE_IGNORE_ENC;
6664 }
6665 if (options & HTML_PARSE_NOIMPLIED) {
6666 ctxt->options |= HTML_PARSE_NOIMPLIED;
6667 options -= HTML_PARSE_NOIMPLIED;
6668 }
6533 ctxt->dictNames = 0; 6669 ctxt->dictNames = 0;
6534 return (options); 6670 return (options);
6535 } 6671 }
6536 6672
6537 /** 6673 /**
6538 * htmlDoRead: 6674 * htmlDoRead:
6539 * @ctxt: an HTML parser context 6675 * @ctxt: an HTML parser context
6540 * @URL: the base URL to use for the document 6676 * @URL: the base URL to use for the document
6541 * @encoding: the document encoding, or NULL 6677 * @encoding: the document encoding, or NULL
6542 * @options: a combination of htmlParserOption(s) 6678 * @options: a combination of htmlParserOption(s)
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
6669 */ 6805 */
6670 htmlDocPtr 6806 htmlDocPtr
6671 htmlReadFd(int fd, const char *URL, const char *encoding, int options) 6807 htmlReadFd(int fd, const char *URL, const char *encoding, int options)
6672 { 6808 {
6673 htmlParserCtxtPtr ctxt; 6809 htmlParserCtxtPtr ctxt;
6674 xmlParserInputBufferPtr input; 6810 xmlParserInputBufferPtr input;
6675 xmlParserInputPtr stream; 6811 xmlParserInputPtr stream;
6676 6812
6677 if (fd < 0) 6813 if (fd < 0)
6678 return (NULL); 6814 return (NULL);
6815 xmlInitParser();
6679 6816
6680 xmlInitParser(); 6817 xmlInitParser();
6681 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 6818 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
6682 if (input == NULL) 6819 if (input == NULL)
6683 return (NULL); 6820 return (NULL);
6684 ctxt = xmlNewParserCtxt(); 6821 ctxt = xmlNewParserCtxt();
6685 if (ctxt == NULL) { 6822 if (ctxt == NULL) {
6686 xmlFreeParserInputBuffer(input); 6823 xmlFreeParserInputBuffer(input);
6687 return (NULL); 6824 return (NULL);
6688 } 6825 }
(...skipping 27 matching lines...) Expand all
6716 htmlParserCtxtPtr ctxt; 6853 htmlParserCtxtPtr ctxt;
6717 xmlParserInputBufferPtr input; 6854 xmlParserInputBufferPtr input;
6718 xmlParserInputPtr stream; 6855 xmlParserInputPtr stream;
6719 6856
6720 if (ioread == NULL) 6857 if (ioread == NULL)
6721 return (NULL); 6858 return (NULL);
6722 xmlInitParser(); 6859 xmlInitParser();
6723 6860
6724 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 6861 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
6725 XML_CHAR_ENCODING_NONE); 6862 XML_CHAR_ENCODING_NONE);
6726 if (input == NULL) 6863 if (input == NULL) {
6864 if (ioclose != NULL)
6865 ioclose(ioctx);
6727 return (NULL); 6866 return (NULL);
6867 }
6728 ctxt = htmlNewParserCtxt(); 6868 ctxt = htmlNewParserCtxt();
6729 if (ctxt == NULL) { 6869 if (ctxt == NULL) {
6730 xmlFreeParserInputBuffer(input); 6870 xmlFreeParserInputBuffer(input);
6731 return (NULL); 6871 return (NULL);
6732 } 6872 }
6733 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 6873 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6734 if (stream == NULL) { 6874 if (stream == NULL) {
6735 xmlFreeParserInputBuffer(input); 6875 xmlFreeParserInputBuffer(input);
6736 xmlFreeParserCtxt(ctxt); 6876 xmlFreeParserCtxt(ctxt);
6737 return (NULL); 6877 return (NULL);
(...skipping 18 matching lines...) Expand all
6756 htmlDocPtr 6896 htmlDocPtr
6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, 6897 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
6758 const char *URL, const char *encoding, int options) 6898 const char *URL, const char *encoding, int options)
6759 { 6899 {
6760 xmlParserInputPtr stream; 6900 xmlParserInputPtr stream;
6761 6901
6762 if (cur == NULL) 6902 if (cur == NULL)
6763 return (NULL); 6903 return (NULL);
6764 if (ctxt == NULL) 6904 if (ctxt == NULL)
6765 return (NULL); 6905 return (NULL);
6906 xmlInitParser();
6766 6907
6767 htmlCtxtReset(ctxt); 6908 htmlCtxtReset(ctxt);
6768 6909
6769 stream = xmlNewStringInputStream(ctxt, cur); 6910 stream = xmlNewStringInputStream(ctxt, cur);
6770 if (stream == NULL) { 6911 if (stream == NULL) {
6771 return (NULL); 6912 return (NULL);
6772 } 6913 }
6773 inputPush(ctxt, stream); 6914 inputPush(ctxt, stream);
6774 return (htmlDoRead(ctxt, URL, encoding, options, 1)); 6915 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6775 } 6916 }
(...skipping 13 matching lines...) Expand all
6789 htmlDocPtr 6930 htmlDocPtr
6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, 6931 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
6791 const char *encoding, int options) 6932 const char *encoding, int options)
6792 { 6933 {
6793 xmlParserInputPtr stream; 6934 xmlParserInputPtr stream;
6794 6935
6795 if (filename == NULL) 6936 if (filename == NULL)
6796 return (NULL); 6937 return (NULL);
6797 if (ctxt == NULL) 6938 if (ctxt == NULL)
6798 return (NULL); 6939 return (NULL);
6940 xmlInitParser();
6799 6941
6800 htmlCtxtReset(ctxt); 6942 htmlCtxtReset(ctxt);
6801 6943
6802 stream = xmlLoadExternalEntity(filename, NULL, ctxt); 6944 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
6803 if (stream == NULL) { 6945 if (stream == NULL) {
6804 return (NULL); 6946 return (NULL);
6805 } 6947 }
6806 inputPush(ctxt, stream); 6948 inputPush(ctxt, stream);
6807 return (htmlDoRead(ctxt, NULL, encoding, options, 1)); 6949 return (htmlDoRead(ctxt, NULL, encoding, options, 1));
6808 } 6950 }
(...skipping 16 matching lines...) Expand all
6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, 6967 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
6826 const char *URL, const char *encoding, int options) 6968 const char *URL, const char *encoding, int options)
6827 { 6969 {
6828 xmlParserInputBufferPtr input; 6970 xmlParserInputBufferPtr input;
6829 xmlParserInputPtr stream; 6971 xmlParserInputPtr stream;
6830 6972
6831 if (ctxt == NULL) 6973 if (ctxt == NULL)
6832 return (NULL); 6974 return (NULL);
6833 if (buffer == NULL) 6975 if (buffer == NULL)
6834 return (NULL); 6976 return (NULL);
6977 xmlInitParser();
6835 6978
6836 htmlCtxtReset(ctxt); 6979 htmlCtxtReset(ctxt);
6837 6980
6838 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); 6981 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
6839 if (input == NULL) { 6982 if (input == NULL) {
6840 return(NULL); 6983 return(NULL);
6841 } 6984 }
6842 6985
6843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 6986 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6844 if (stream == NULL) { 6987 if (stream == NULL) {
(...skipping 22 matching lines...) Expand all
6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, 7010 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
6868 const char *URL, const char *encoding, int options) 7011 const char *URL, const char *encoding, int options)
6869 { 7012 {
6870 xmlParserInputBufferPtr input; 7013 xmlParserInputBufferPtr input;
6871 xmlParserInputPtr stream; 7014 xmlParserInputPtr stream;
6872 7015
6873 if (fd < 0) 7016 if (fd < 0)
6874 return (NULL); 7017 return (NULL);
6875 if (ctxt == NULL) 7018 if (ctxt == NULL)
6876 return (NULL); 7019 return (NULL);
7020 xmlInitParser();
6877 7021
6878 htmlCtxtReset(ctxt); 7022 htmlCtxtReset(ctxt);
6879 7023
6880 7024
6881 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); 7025 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
6882 if (input == NULL) 7026 if (input == NULL)
6883 return (NULL); 7027 return (NULL);
6884 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 7028 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6885 if (stream == NULL) { 7029 if (stream == NULL) {
6886 xmlFreeParserInputBuffer(input); 7030 xmlFreeParserInputBuffer(input);
(...skipping 24 matching lines...) Expand all
6911 const char *URL, 7055 const char *URL,
6912 const char *encoding, int options) 7056 const char *encoding, int options)
6913 { 7057 {
6914 xmlParserInputBufferPtr input; 7058 xmlParserInputBufferPtr input;
6915 xmlParserInputPtr stream; 7059 xmlParserInputPtr stream;
6916 7060
6917 if (ioread == NULL) 7061 if (ioread == NULL)
6918 return (NULL); 7062 return (NULL);
6919 if (ctxt == NULL) 7063 if (ctxt == NULL)
6920 return (NULL); 7064 return (NULL);
7065 xmlInitParser();
6921 7066
6922 htmlCtxtReset(ctxt); 7067 htmlCtxtReset(ctxt);
6923 7068
6924 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, 7069 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
6925 XML_CHAR_ENCODING_NONE); 7070 XML_CHAR_ENCODING_NONE);
6926 if (input == NULL) 7071 if (input == NULL) {
7072 if (ioclose != NULL)
7073 ioclose(ioctx);
6927 return (NULL); 7074 return (NULL);
7075 }
6928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); 7076 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
6929 if (stream == NULL) { 7077 if (stream == NULL) {
6930 xmlFreeParserInputBuffer(input); 7078 xmlFreeParserInputBuffer(input);
6931 return (NULL); 7079 return (NULL);
6932 } 7080 }
6933 inputPush(ctxt, stream); 7081 inputPush(ctxt, stream);
6934 return (htmlDoRead(ctxt, URL, encoding, options, 1)); 7082 return (htmlDoRead(ctxt, URL, encoding, options, 1));
6935 } 7083 }
6936 7084
6937 #define bottom_HTMLparser 7085 #define bottom_HTMLparser
6938 #include "elfgcchack.h" 7086 #include "elfgcchack.h"
6939 #endif /* LIBXML_HTML_ENABLED */ 7087 #endif /* LIBXML_HTML_ENABLED */
OLDNEW
« no previous file with comments | « third_party/libxml/src/DOCBparser.c ('k') | third_party/libxml/src/HTMLtree.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698