OLD | NEW |
1 /* | 1 /* |
2 * HTMLparser.c : an HTML 4.0 non-verifying parser | 2 * HTMLparser.c : an HTML 4.0 non-verifying parser |
3 * | 3 * |
4 * See Copyright for the status of this software. | 4 * See Copyright for the status of this software. |
5 * | 5 * |
6 * daniel@veillard.com | 6 * daniel@veillard.com |
7 */ | 7 */ |
8 | 8 |
9 #define IN_LIBXML | 9 #define IN_LIBXML |
10 #include "libxml.h" | 10 #include "libxml.h" |
(...skipping 26 matching lines...) Expand all Loading... |
37 #include <libxml/xmlerror.h> | 37 #include <libxml/xmlerror.h> |
38 #include <libxml/HTMLparser.h> | 38 #include <libxml/HTMLparser.h> |
39 #include <libxml/HTMLtree.h> | 39 #include <libxml/HTMLtree.h> |
40 #include <libxml/entities.h> | 40 #include <libxml/entities.h> |
41 #include <libxml/encoding.h> | 41 #include <libxml/encoding.h> |
42 #include <libxml/valid.h> | 42 #include <libxml/valid.h> |
43 #include <libxml/xmlIO.h> | 43 #include <libxml/xmlIO.h> |
44 #include <libxml/globals.h> | 44 #include <libxml/globals.h> |
45 #include <libxml/uri.h> | 45 #include <libxml/uri.h> |
46 | 46 |
| 47 #include "buf.h" |
| 48 #include "enc.h" |
| 49 |
47 #define HTML_MAX_NAMELEN 1000 | 50 #define HTML_MAX_NAMELEN 1000 |
48 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 | 51 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
49 #define HTML_PARSER_BUFFER_SIZE 100 | 52 #define HTML_PARSER_BUFFER_SIZE 100 |
50 | 53 |
51 /* #define DEBUG */ | 54 /* #define DEBUG */ |
52 /* #define DEBUG_PUSH */ | 55 /* #define DEBUG_PUSH */ |
53 | 56 |
54 static int htmlOmittedDefaultValue = 1; | 57 static int htmlOmittedDefaultValue = 1; |
55 | 58 |
56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, | 59 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, |
(...skipping 663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
720 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec
ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma
p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep
t", NULL } ; | 723 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec
ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma
p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep
t", NULL } ; |
721 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; | 724 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; |
722 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus",
"onblur", NULL } ; | 725 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus",
"onblur", NULL } ; |
723 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; | 726 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; |
724 static const char* const align_attr[] = { "align", NULL } ; | 727 static const char* const align_attr[] = { "align", NULL } ; |
725 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang",
"type", "rel", "rev", "media", NULL } ; | 728 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang",
"type", "rel", "rev", "media", NULL } ; |
726 static const char* const map_contents[] = { BLOCK, "area", NULL } ; | 729 static const char* const map_contents[] = { BLOCK, "area", NULL } ; |
727 static const char* const name_attr[] = { "name", NULL } ; | 730 static const char* const name_attr[] = { "name", NULL } ; |
728 static const char* const action_attr[] = { "action", NULL } ; | 731 static const char* const action_attr[] = { "action", NULL } ; |
729 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; | 732 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
730 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme",
NULL } ; | 733 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme",
"charset", NULL } ; |
731 static const char* const content_attr[] = { "content", NULL } ; | 734 static const char* const content_attr[] = { "content", NULL } ; |
732 static const char* const type_attr[] = { "type", NULL } ; | 735 static const char* const type_attr[] = { "type", NULL } ; |
733 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; | 736 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
734 static const char* const object_contents[] = { FLOW, "param", NULL } ; | 737 static const char* const object_contents[] = { FLOW, "param", NULL } ; |
735 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba
se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema
p", "name", "tabindex", NULL } ; | 738 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba
se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema
p", "name", "tabindex", NULL } ; |
736 static const char* const object_depr[] = { "align", "border", "hspace", "vspace"
, NULL } ; | 739 static const char* const object_depr[] = { "align", "border", "hspace", "vspace"
, NULL } ; |
737 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; | 740 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; |
738 static const char* const option_elt[] = { "option", NULL } ; | 741 static const char* const option_elt[] = { "option", NULL } ; |
739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; | 742 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; |
740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; | 743 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; |
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1076 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
1074 "head", "dd", NULL, | 1077 "head", "dd", NULL, |
1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1078 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
1076 "head", "dt", NULL, | 1079 "head", "dt", NULL, |
1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", | 1080 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", |
1078 "listing", "xmp", NULL, | 1081 "listing", "xmp", NULL, |
1079 "ol", "p", "head", "ul", NULL, | 1082 "ol", "p", "head", "ul", NULL, |
1080 "menu", "p", "head", "ul", NULL, | 1083 "menu", "p", "head", "ul", NULL, |
1081 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, | 1084 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, |
1082 "div", "p", "head", NULL, | 1085 "div", "p", "head", NULL, |
1083 "noscript",» "p", "head", NULL, | 1086 "noscript",» "p", NULL, |
1084 "center", "font", "b", "i", "p", "head", NULL, | 1087 "center", "font", "b", "i", "p", "head", NULL, |
1085 "a",» » "a", NULL, | 1088 "a",» » "a", "head", NULL, |
1086 "caption", "p", NULL, | 1089 "caption", "p", NULL, |
1087 "colgroup", "caption", "colgroup", "col", "p", NULL, | 1090 "colgroup", "caption", "colgroup", "col", "p", NULL, |
1088 "col", "caption", "col", "p", NULL, | 1091 "col", "caption", "col", "p", NULL, |
1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", | 1092 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", |
1090 "listing", "xmp", "a", NULL, | 1093 "listing", "xmp", "a", NULL, |
1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1094 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
1092 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1095 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, | 1096 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, |
1094 "thead", "caption", "col", "colgroup", NULL, | 1097 "thead", "caption", "col", "colgroup", NULL, |
1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1098 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
1096 "tbody", "p", NULL, | 1099 "tbody", "p", NULL, |
1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1100 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
1098 "tfoot", "tbody", "p", NULL, | 1101 "tfoot", "tbody", "p", NULL, |
1099 "optgroup", "option", NULL, | 1102 "optgroup", "option", NULL, |
1100 "option", "option", NULL, | 1103 "option", "option", NULL, |
1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", | 1104 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
1102 "pre", "listing", "xmp", "a", NULL, | 1105 "pre", "listing", "xmp", "a", NULL, |
| 1106 /* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
| 1107 "tt", "head", NULL, |
| 1108 "i", "head", NULL, |
| 1109 "b", "head", NULL, |
| 1110 "u", "head", NULL, |
| 1111 "s", "head", NULL, |
| 1112 "strike", "head", NULL, |
| 1113 "big", "head", NULL, |
| 1114 "small", "head", NULL, |
| 1115 |
| 1116 "em", "head", NULL, |
| 1117 "strong", "head", NULL, |
| 1118 "dfn", "head", NULL, |
| 1119 "code", "head", NULL, |
| 1120 "samp", "head", NULL, |
| 1121 "kbd", "head", NULL, |
| 1122 "var", "head", NULL, |
| 1123 "cite", "head", NULL, |
| 1124 "abbr", "head", NULL, |
| 1125 "acronym", "head", NULL, |
| 1126 |
| 1127 /* "a" */ |
| 1128 "img", "head", NULL, |
| 1129 /* "applet" */ |
| 1130 /* "embed" */ |
| 1131 /* "object" */ |
| 1132 "font", "head", NULL, |
| 1133 /* "basefont" */ |
| 1134 "br", "head", NULL, |
| 1135 /* "script" */ |
| 1136 "map", "head", NULL, |
| 1137 "q", "head", NULL, |
| 1138 "sub", "head", NULL, |
| 1139 "sup", "head", NULL, |
| 1140 "span", "head", NULL, |
| 1141 "bdo", "head", NULL, |
| 1142 "iframe", "head", NULL, |
1103 NULL | 1143 NULL |
1104 }; | 1144 }; |
1105 | 1145 |
1106 /* | 1146 /* |
1107 * The list of HTML elements which are supposed not to have | 1147 * The list of HTML elements which are supposed not to have |
1108 * CDATA content and where a p element will be implied | 1148 * CDATA content and where a p element will be implied |
1109 * | 1149 * |
1110 * TODO: extend that list by reading the HTML SGML DTD on | 1150 * TODO: extend that list by reading the HTML SGML DTD on |
1111 * implied paragraph | 1151 * implied paragraph |
1112 */ | 1152 */ |
(...skipping 17 matching lines...) Expand all Loading... |
1130 "onmousemove", | 1170 "onmousemove", |
1131 "onmouseout", | 1171 "onmouseout", |
1132 "onkeypress", | 1172 "onkeypress", |
1133 "onkeydown", | 1173 "onkeydown", |
1134 "onkeyup", | 1174 "onkeyup", |
1135 "onload", | 1175 "onload", |
1136 "onunload", | 1176 "onunload", |
1137 "onfocus", | 1177 "onfocus", |
1138 "onblur", | 1178 "onblur", |
1139 "onsubmit", | 1179 "onsubmit", |
1140 "onrest", | 1180 "onreset", |
1141 "onchange", | 1181 "onchange", |
1142 "onselect" | 1182 "onselect" |
1143 }; | 1183 }; |
1144 | 1184 |
1145 /* | 1185 /* |
1146 * This table is used by the htmlparser to know what to do with | 1186 * This table is used by the htmlparser to know what to do with |
1147 * broken html pages. By assigning different priorities to different | 1187 * broken html pages. By assigning different priorities to different |
1148 * elements the parser can decide how to handle extra endtags. | 1188 * elements the parser can decide how to handle extra endtags. |
1149 * Endtags are only allowed to close elements with lower or equal | 1189 * Endtags are only allowed to close elements with lower or equal |
1150 * priority. | 1190 * priority. |
(...skipping 1729 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2880 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2920 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
2881 } | 2921 } |
2882 nbchar = 0; | 2922 nbchar = 0; |
2883 } | 2923 } |
2884 GROW; | 2924 GROW; |
2885 NEXTL(l); | 2925 NEXTL(l); |
2886 cur = CUR_CHAR(l); | 2926 cur = CUR_CHAR(l); |
2887 } | 2927 } |
2888 | 2928 |
2889 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { | 2929 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { |
2890 » htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 2930 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
2891 » "Invalid char in CDATA 0x%X\n", cur); | 2931 "Invalid char in CDATA 0x%X\n", cur); |
2892 » NEXT; | 2932 if (ctxt->input->cur < ctxt->input->end) { |
| 2933 NEXT; |
| 2934 } |
2893 } | 2935 } |
2894 | 2936 |
2895 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2937 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
2896 if (ctxt->sax->cdataBlock!= NULL) { | 2938 if (ctxt->sax->cdataBlock!= NULL) { |
2897 /* | 2939 /* |
2898 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE | 2940 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE |
2899 */ | 2941 */ |
2900 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); | 2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); |
2901 } else if (ctxt->sax->characters != NULL) { | 2943 } else if (ctxt->sax->characters != NULL) { |
2902 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2944 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
(...skipping 29 matching lines...) Expand all Loading... |
2932 "Invalid char in CDATA 0x%X\n", cur); | 2974 "Invalid char in CDATA 0x%X\n", cur); |
2933 } else { | 2975 } else { |
2934 COPY_BUF(l,buf,nbchar,cur); | 2976 COPY_BUF(l,buf,nbchar,cur); |
2935 } | 2977 } |
2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2978 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
2937 /* | 2979 /* |
2938 * Ok the segment is to be consumed as chars. | 2980 * Ok the segment is to be consumed as chars. |
2939 */ | 2981 */ |
2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
2941 if (areBlanks(ctxt, buf, nbchar)) { | 2983 if (areBlanks(ctxt, buf, nbchar)) { |
2942 » » if (ctxt->sax->ignorableWhitespace != NULL) | 2984 » » if (ctxt->keepBlanks) { |
2943 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, | 2985 » » » if (ctxt->sax->characters != NULL) |
2944 » » » buf, nbchar); | 2986 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2987 » » } else { |
| 2988 » » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 2989 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, |
| 2990 » » » buf, nbchar); |
| 2991 » » } |
2945 } else { | 2992 } else { |
2946 htmlCheckParagraph(ctxt); | 2993 htmlCheckParagraph(ctxt); |
2947 if (ctxt->sax->characters != NULL) | 2994 if (ctxt->sax->characters != NULL) |
2948 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2995 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
2949 } | 2996 } |
2950 } | 2997 } |
2951 nbchar = 0; | 2998 nbchar = 0; |
2952 } | 2999 } |
2953 NEXTL(l); | 3000 NEXTL(l); |
2954 chunk++; | 3001 chunk++; |
(...skipping 10 matching lines...) Expand all Loading... |
2965 } | 3012 } |
2966 } | 3013 } |
2967 if (nbchar != 0) { | 3014 if (nbchar != 0) { |
2968 buf[nbchar] = 0; | 3015 buf[nbchar] = 0; |
2969 | 3016 |
2970 /* | 3017 /* |
2971 * Ok the segment is to be consumed as chars. | 3018 * Ok the segment is to be consumed as chars. |
2972 */ | 3019 */ |
2973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 3020 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
2974 if (areBlanks(ctxt, buf, nbchar)) { | 3021 if (areBlanks(ctxt, buf, nbchar)) { |
2975 » » if (ctxt->sax->ignorableWhitespace != NULL) | 3022 » » if (ctxt->keepBlanks) { |
2976 » » ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); | 3023 » » if (ctxt->sax->characters != NULL) |
| 3024 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 3025 » » } else { |
| 3026 » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 3027 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, |
| 3028 » » » buf, nbchar); |
| 3029 » » } |
2977 } else { | 3030 } else { |
2978 htmlCheckParagraph(ctxt); | 3031 htmlCheckParagraph(ctxt); |
2979 if (ctxt->sax->characters != NULL) | 3032 if (ctxt->sax->characters != NULL) |
2980 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 3033 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
2981 } | 3034 } |
2982 } | 3035 } |
2983 } else { | 3036 } else { |
2984 /* | 3037 /* |
2985 * Loop detection | 3038 * Loop detection |
2986 */ | 3039 */ |
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3268 SKIP(3); | 3321 SKIP(3); |
3269 while (CUR != ';') { | 3322 while (CUR != ';') { |
3270 if ((CUR >= '0') && (CUR <= '9')) | 3323 if ((CUR >= '0') && (CUR <= '9')) |
3271 val = val * 16 + (CUR - '0'); | 3324 val = val * 16 + (CUR - '0'); |
3272 else if ((CUR >= 'a') && (CUR <= 'f')) | 3325 else if ((CUR >= 'a') && (CUR <= 'f')) |
3273 val = val * 16 + (CUR - 'a') + 10; | 3326 val = val * 16 + (CUR - 'a') + 10; |
3274 else if ((CUR >= 'A') && (CUR <= 'F')) | 3327 else if ((CUR >= 'A') && (CUR <= 'F')) |
3275 val = val * 16 + (CUR - 'A') + 10; | 3328 val = val * 16 + (CUR - 'A') + 10; |
3276 else { | 3329 else { |
3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, | 3330 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, |
3278 » » "htmlParseCharRef: missing semicolumn\n", | 3331 » » "htmlParseCharRef: missing semicolon\n", |
3279 NULL, NULL); | 3332 NULL, NULL); |
3280 break; | 3333 break; |
3281 } | 3334 } |
3282 NEXT; | 3335 NEXT; |
3283 } | 3336 } |
3284 if (CUR == ';') | 3337 if (CUR == ';') |
3285 NEXT; | 3338 NEXT; |
3286 } else if ((CUR == '&') && (NXT(1) == '#')) { | 3339 } else if ((CUR == '&') && (NXT(1) == '#')) { |
3287 SKIP(2); | 3340 SKIP(2); |
3288 while (CUR != ';') { | 3341 while (CUR != ';') { |
3289 if ((CUR >= '0') && (CUR <= '9')) | 3342 if ((CUR >= '0') && (CUR <= '9')) |
3290 val = val * 10 + (CUR - '0'); | 3343 val = val * 10 + (CUR - '0'); |
3291 else { | 3344 else { |
3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, | 3345 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, |
3293 » » "htmlParseCharRef: missing semicolumn\n", | 3346 » » "htmlParseCharRef: missing semicolon\n", |
3294 NULL, NULL); | 3347 NULL, NULL); |
3295 break; | 3348 break; |
3296 } | 3349 } |
3297 NEXT; | 3350 NEXT; |
3298 } | 3351 } |
3299 if (CUR == ';') | 3352 if (CUR == ';') |
3300 NEXT; | 3353 NEXT; |
3301 } else { | 3354 } else { |
3302 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, | 3355 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, |
3303 "htmlParseCharRef: invalid value\n", NULL, NULL); | 3356 "htmlParseCharRef: invalid value\n", NULL, NULL); |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3426 NEXT; | 3479 NEXT; |
3427 SKIP_BLANKS; | 3480 SKIP_BLANKS; |
3428 val = htmlParseAttValue(ctxt); | 3481 val = htmlParseAttValue(ctxt); |
3429 } | 3482 } |
3430 | 3483 |
3431 *value = val; | 3484 *value = val; |
3432 return(name); | 3485 return(name); |
3433 } | 3486 } |
3434 | 3487 |
3435 /** | 3488 /** |
3436 * htmlCheckEncoding: | 3489 * htmlCheckEncodingDirect: |
3437 * @ctxt: an HTML parser context | 3490 * @ctxt: an HTML parser context |
3438 * @attvalue: the attribute value | 3491 * @attvalue: the attribute value |
3439 * | 3492 * |
3440 * Checks an http-equiv attribute from a Meta tag to detect | 3493 * Checks an attribute value to detect |
3441 * the encoding | 3494 * the encoding |
3442 * If a new encoding is detected the parser is switched to decode | 3495 * If a new encoding is detected the parser is switched to decode |
3443 * it and pass UTF8 | 3496 * it and pass UTF8 |
3444 */ | 3497 */ |
3445 static void | 3498 static void |
3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | 3499 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
3447 const xmlChar *encoding; | |
3448 | 3500 |
3449 if ((ctxt == NULL) || (attvalue == NULL)) | 3501 if ((ctxt == NULL) || (encoding == NULL) || |
| 3502 (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
3450 return; | 3503 return; |
3451 | 3504 |
3452 /* do not change encoding */ | 3505 /* do not change encoding */ |
3453 if (ctxt->input->encoding != NULL) | 3506 if (ctxt->input->encoding != NULL) |
3454 return; | 3507 return; |
3455 | 3508 |
3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); | |
3457 if (encoding != NULL) { | |
3458 encoding += 8; | |
3459 } else { | |
3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); | |
3461 if (encoding != NULL) | |
3462 encoding += 9; | |
3463 } | |
3464 if (encoding != NULL) { | 3509 if (encoding != NULL) { |
3465 xmlCharEncoding enc; | 3510 xmlCharEncoding enc; |
3466 xmlCharEncodingHandlerPtr handler; | 3511 xmlCharEncodingHandlerPtr handler; |
3467 | 3512 |
3468 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; | 3513 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
3469 | 3514 |
3470 if (ctxt->input->encoding != NULL) | 3515 if (ctxt->input->encoding != NULL) |
3471 xmlFree((xmlChar *) ctxt->input->encoding); | 3516 xmlFree((xmlChar *) ctxt->input->encoding); |
3472 ctxt->input->encoding = xmlStrdup(encoding); | 3517 ctxt->input->encoding = xmlStrdup(encoding); |
3473 | 3518 |
(...skipping 17 matching lines...) Expand all Loading... |
3491 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 3536 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
3492 } else { | 3537 } else { |
3493 /* | 3538 /* |
3494 * fallback for unknown encodings | 3539 * fallback for unknown encodings |
3495 */ | 3540 */ |
3496 handler = xmlFindCharEncodingHandler((const char *) encoding); | 3541 handler = xmlFindCharEncodingHandler((const char *) encoding); |
3497 if (handler != NULL) { | 3542 if (handler != NULL) { |
3498 xmlSwitchToEncoding(ctxt, handler); | 3543 xmlSwitchToEncoding(ctxt, handler); |
3499 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 3544 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
3500 } else { | 3545 } else { |
3501 » » ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 3546 » » htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| 3547 » » "htmlCheckEncoding: unknown encoding %s\n", |
| 3548 » » » encoding, NULL); |
3502 } | 3549 } |
3503 } | 3550 } |
3504 | 3551 |
3505 if ((ctxt->input->buf != NULL) && | 3552 if ((ctxt->input->buf != NULL) && |
3506 (ctxt->input->buf->encoder != NULL) && | 3553 (ctxt->input->buf->encoder != NULL) && |
3507 (ctxt->input->buf->raw != NULL) && | 3554 (ctxt->input->buf->raw != NULL) && |
3508 (ctxt->input->buf->buffer != NULL)) { | 3555 (ctxt->input->buf->buffer != NULL)) { |
3509 int nbchars; | 3556 int nbchars; |
3510 int processed; | 3557 int processed; |
3511 | 3558 |
3512 /* | 3559 /* |
3513 * convert as much as possible to the parser reading buffer. | 3560 * convert as much as possible to the parser reading buffer. |
3514 */ | 3561 */ |
3515 processed = ctxt->input->cur - ctxt->input->base; | 3562 processed = ctxt->input->cur - ctxt->input->base; |
3516 » xmlBufferShrink(ctxt->input->buf->buffer, processed); | 3563 » xmlBufShrink(ctxt->input->buf->buffer, processed); |
3517 » nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 3564 » nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
3518 » » ctxt->input->buf->buffer, | |
3519 » » » » ctxt->input->buf->raw); | |
3520 if (nbchars < 0) { | 3565 if (nbchars < 0) { |
3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3566 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
3522 "htmlCheckEncoding: encoder error\n", | 3567 "htmlCheckEncoding: encoder error\n", |
3523 NULL, NULL); | 3568 NULL, NULL); |
3524 } | 3569 } |
3525 » ctxt->input->base = | 3570 xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
3526 » ctxt->input->cur = ctxt->input->buf->buffer->content; | |
3527 ctxt->input->end = | |
3528 &ctxt->input->base[ctxt->input->buf->buffer->use]; | |
3529 } | 3571 } |
3530 } | 3572 } |
3531 } | 3573 } |
3532 | 3574 |
3533 /** | 3575 /** |
| 3576 * htmlCheckEncoding: |
| 3577 * @ctxt: an HTML parser context |
| 3578 * @attvalue: the attribute value |
| 3579 * |
| 3580 * Checks an http-equiv attribute from a Meta tag to detect |
| 3581 * the encoding |
| 3582 * If a new encoding is detected the parser is switched to decode |
| 3583 * it and pass UTF8 |
| 3584 */ |
| 3585 static void |
| 3586 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
| 3587 const xmlChar *encoding; |
| 3588 |
| 3589 if (!attvalue) |
| 3590 return; |
| 3591 |
| 3592 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
| 3593 if (encoding != NULL) { |
| 3594 encoding += 7; |
| 3595 } |
| 3596 /* |
| 3597 * skip blank |
| 3598 */ |
| 3599 if (encoding && IS_BLANK_CH(*encoding)) |
| 3600 encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
| 3601 if (encoding && *encoding == '=') { |
| 3602 encoding ++; |
| 3603 htmlCheckEncodingDirect(ctxt, encoding); |
| 3604 } |
| 3605 } |
| 3606 |
| 3607 /** |
3534 * htmlCheckMeta: | 3608 * htmlCheckMeta: |
3535 * @ctxt: an HTML parser context | 3609 * @ctxt: an HTML parser context |
3536 * @atts: the attributes values | 3610 * @atts: the attributes values |
3537 * | 3611 * |
3538 * Checks an attributes from a Meta tag | 3612 * Checks an attributes from a Meta tag |
3539 */ | 3613 */ |
3540 static void | 3614 static void |
3541 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { | 3615 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { |
3542 int i; | 3616 int i; |
3543 const xmlChar *att, *value; | 3617 const xmlChar *att, *value; |
3544 int http = 0; | 3618 int http = 0; |
3545 const xmlChar *content = NULL; | 3619 const xmlChar *content = NULL; |
3546 | 3620 |
3547 if ((ctxt == NULL) || (atts == NULL)) | 3621 if ((ctxt == NULL) || (atts == NULL)) |
3548 return; | 3622 return; |
3549 | 3623 |
3550 i = 0; | 3624 i = 0; |
3551 att = atts[i++]; | 3625 att = atts[i++]; |
3552 while (att != NULL) { | 3626 while (att != NULL) { |
3553 value = atts[i++]; | 3627 value = atts[i++]; |
3554 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) | 3628 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
3555 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) | 3629 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
3556 http = 1; | 3630 http = 1; |
| 3631 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
| 3632 htmlCheckEncodingDirect(ctxt, value); |
3557 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) | 3633 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
3558 content = value; | 3634 content = value; |
3559 att = atts[i++]; | 3635 att = atts[i++]; |
3560 } | 3636 } |
3561 if ((http) && (content != NULL)) | 3637 if ((http) && (content != NULL)) |
3562 htmlCheckEncoding(ctxt, content); | 3638 htmlCheckEncoding(ctxt, content); |
3563 | 3639 |
3564 } | 3640 } |
3565 | 3641 |
3566 /** | 3642 /** |
(...skipping 21 matching lines...) Expand all Loading... |
3588 const xmlChar *name; | 3664 const xmlChar *name; |
3589 const xmlChar *attname; | 3665 const xmlChar *attname; |
3590 xmlChar *attvalue; | 3666 xmlChar *attvalue; |
3591 const xmlChar **atts; | 3667 const xmlChar **atts; |
3592 int nbatts = 0; | 3668 int nbatts = 0; |
3593 int maxatts; | 3669 int maxatts; |
3594 int meta = 0; | 3670 int meta = 0; |
3595 int i; | 3671 int i; |
3596 int discardtag = 0; | 3672 int discardtag = 0; |
3597 | 3673 |
3598 if (ctxt->instate == XML_PARSER_EOF) | |
3599 return(-1); | |
3600 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3674 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3675 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
3602 "htmlParseStartTag: context error\n", NULL, NULL); | 3676 "htmlParseStartTag: context error\n", NULL, NULL); |
3603 return -1; | 3677 return -1; |
3604 } | 3678 } |
| 3679 if (ctxt->instate == XML_PARSER_EOF) |
| 3680 return(-1); |
3605 if (CUR != '<') return -1; | 3681 if (CUR != '<') return -1; |
3606 NEXT; | 3682 NEXT; |
3607 | 3683 |
3608 atts = ctxt->atts; | 3684 atts = ctxt->atts; |
3609 maxatts = ctxt->maxatts; | 3685 maxatts = ctxt->maxatts; |
3610 | 3686 |
3611 GROW; | 3687 GROW; |
3612 name = htmlParseHTMLName(ctxt); | 3688 name = htmlParseHTMLName(ctxt); |
3613 if (name == NULL) { | 3689 if (name == NULL) { |
3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 3690 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3876 } | 3952 } |
3877 } | 3953 } |
3878 | 3954 |
3879 /* | 3955 /* |
3880 * SAX: End of Tag | 3956 * SAX: End of Tag |
3881 */ | 3957 */ |
3882 oldname = ctxt->name; | 3958 oldname = ctxt->name; |
3883 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { | 3959 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
3884 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 3960 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
3885 ctxt->sax->endElement(ctxt->userData, name); | 3961 ctxt->sax->endElement(ctxt->userData, name); |
| 3962 htmlNodeInfoPop(ctxt); |
3886 htmlnamePop(ctxt); | 3963 htmlnamePop(ctxt); |
3887 ret = 1; | 3964 ret = 1; |
3888 } else { | 3965 } else { |
3889 ret = 0; | 3966 ret = 0; |
3890 } | 3967 } |
3891 | 3968 |
3892 return (ret); | 3969 return (ret); |
3893 } | 3970 } |
3894 | 3971 |
3895 | 3972 |
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4282 * | 4359 * |
4283 * [39] element ::= EmptyElemTag | STag content ETag | 4360 * [39] element ::= EmptyElemTag | STag content ETag |
4284 * | 4361 * |
4285 * [41] Attribute ::= Name Eq AttValue | 4362 * [41] Attribute ::= Name Eq AttValue |
4286 */ | 4363 */ |
4287 | 4364 |
4288 static void | 4365 static void |
4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { | 4366 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { |
4290 const xmlChar *name; | 4367 const xmlChar *name; |
4291 const htmlElemDesc * info; | 4368 const htmlElemDesc * info; |
4292 htmlParserNodeInfo node_info; | 4369 htmlParserNodeInfo node_info = { 0, }; |
4293 int failed; | 4370 int failed; |
4294 | 4371 |
4295 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4372 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4373 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
4297 "htmlParseElementInternal: context error\n", NULL, NULL); | 4374 "htmlParseElementInternal: context error\n", NULL, NULL); |
4298 return; | 4375 return; |
4299 } | 4376 } |
4300 | 4377 |
4301 if (ctxt->instate == XML_PARSER_EOF) | 4378 if (ctxt->instate == XML_PARSER_EOF) |
4302 return; | 4379 return; |
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4663 if (CUR == 0) | 4740 if (CUR == 0) |
4664 htmlAutoCloseOnEnd(ctxt); | 4741 htmlAutoCloseOnEnd(ctxt); |
4665 | 4742 |
4666 | 4743 |
4667 /* | 4744 /* |
4668 * SAX: end of the document processing. | 4745 * SAX: end of the document processing. |
4669 */ | 4746 */ |
4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 4747 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
4671 ctxt->sax->endDocument(ctxt->userData); | 4748 ctxt->sax->endDocument(ctxt->userData); |
4672 | 4749 |
4673 if (ctxt->myDoc != NULL) { | 4750 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { |
4674 dtd = xmlGetIntSubset(ctxt->myDoc); | 4751 dtd = xmlGetIntSubset(ctxt->myDoc); |
4675 if (dtd == NULL) | 4752 if (dtd == NULL) |
4676 ctxt->myDoc->intSubset = | 4753 ctxt->myDoc->intSubset = |
4677 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 4754 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 4755 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 4756 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
4680 } | 4757 } |
4681 if (! ctxt->wellFormed) return(-1); | 4758 if (! ctxt->wellFormed) return(-1); |
4682 return(0); | 4759 return(0); |
4683 } | 4760 } |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4868 if (buf == NULL) return(NULL); | 4945 if (buf == NULL) return(NULL); |
4869 | 4946 |
4870 input = xmlNewInputStream(ctxt); | 4947 input = xmlNewInputStream(ctxt); |
4871 if (input == NULL) { | 4948 if (input == NULL) { |
4872 xmlFreeParserCtxt(ctxt); | 4949 xmlFreeParserCtxt(ctxt); |
4873 return(NULL); | 4950 return(NULL); |
4874 } | 4951 } |
4875 | 4952 |
4876 input->filename = NULL; | 4953 input->filename = NULL; |
4877 input->buf = buf; | 4954 input->buf = buf; |
4878 input->base = input->buf->buffer->content; | 4955 xmlBufResetInput(buf->buffer, input); |
4879 input->cur = input->buf->buffer->content; | |
4880 input->end = &input->buf->buffer->content[input->buf->buffer->use]; | |
4881 | 4956 |
4882 inputPush(ctxt, input); | 4957 inputPush(ctxt, input); |
4883 return(ctxt); | 4958 return(ctxt); |
4884 } | 4959 } |
4885 | 4960 |
4886 /** | 4961 /** |
4887 * htmlCreateDocParserCtxt: | 4962 * htmlCreateDocParserCtxt: |
4888 * @cur: a pointer to an array of xmlChar | 4963 * @cur: a pointer to an array of xmlChar |
4889 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 4964 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
4890 * | 4965 * |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4987 if (base < 0) | 5062 if (base < 0) |
4988 return (-1); | 5063 return (-1); |
4989 | 5064 |
4990 if (ctxt->checkIndex > base) | 5065 if (ctxt->checkIndex > base) |
4991 base = ctxt->checkIndex; | 5066 base = ctxt->checkIndex; |
4992 | 5067 |
4993 if (in->buf == NULL) { | 5068 if (in->buf == NULL) { |
4994 buf = in->base; | 5069 buf = in->base; |
4995 len = in->length; | 5070 len = in->length; |
4996 } else { | 5071 } else { |
4997 buf = in->buf->buffer->content; | 5072 buf = xmlBufContent(in->buf->buffer); |
4998 len = in->buf->buffer->use; | 5073 len = xmlBufUse(in->buf->buffer); |
4999 } | 5074 } |
5000 | 5075 |
5001 /* take into account the sequence length */ | 5076 /* take into account the sequence length */ |
5002 if (third) | 5077 if (third) |
5003 len -= 2; | 5078 len -= 2; |
5004 else if (next) | 5079 else if (next) |
5005 len--; | 5080 len--; |
5006 for (; base < len; base++) { | 5081 for (; base < len; base++) { |
5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) { | 5082 if ((!incomment) && (base + 4 < len) && (!iscomment)) { |
5008 if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5083 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5080 #endif | 5155 #endif |
5081 return (-1); | 5156 return (-1); |
5082 } | 5157 } |
5083 | 5158 |
5084 /** | 5159 /** |
5085 * htmlParseLookupChars: | 5160 * htmlParseLookupChars: |
5086 * @ctxt: an HTML parser context | 5161 * @ctxt: an HTML parser context |
5087 * @stop: Array of chars, which stop the lookup. | 5162 * @stop: Array of chars, which stop the lookup. |
5088 * @stopLen: Length of stop-Array | 5163 * @stopLen: Length of stop-Array |
5089 * | 5164 * |
5090 * Try to find if any char of the stop-Array is available in the input | 5165 * Try to find if any char of the stop-Array is available in the input |
5091 * stream. | 5166 * stream. |
5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex | 5167 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
5093 * to avoid rescanning sequences of bytes, it DOES change the state of the | 5168 * to avoid rescanning sequences of bytes, it DOES change the state of the |
5094 * parser, do not use liberally. | 5169 * parser, do not use liberally. |
5095 * | 5170 * |
5096 * Returns the index to the current parsing point if a stopChar | 5171 * Returns the index to the current parsing point if a stopChar |
5097 * is available, -1 otherwise. | 5172 * is available, -1 otherwise. |
5098 */ | 5173 */ |
5099 static int | 5174 static int |
5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, | 5175 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, |
5101 int stopLen) | 5176 int stopLen) |
5102 { | 5177 { |
5103 int base, len; | 5178 int base, len; |
5104 htmlParserInputPtr in; | 5179 htmlParserInputPtr in; |
5105 const xmlChar *buf; | 5180 const xmlChar *buf; |
5106 int incomment = 0; | 5181 int incomment = 0; |
5107 int i; | 5182 int i; |
5108 | 5183 |
5109 in = ctxt->input; | 5184 in = ctxt->input; |
5110 if (in == NULL) | 5185 if (in == NULL) |
5111 return (-1); | 5186 return (-1); |
5112 | 5187 |
5113 base = in->cur - in->base; | 5188 base = in->cur - in->base; |
5114 if (base < 0) | 5189 if (base < 0) |
5115 return (-1); | 5190 return (-1); |
5116 | 5191 |
5117 if (ctxt->checkIndex > base) | 5192 if (ctxt->checkIndex > base) |
5118 base = ctxt->checkIndex; | 5193 base = ctxt->checkIndex; |
5119 | 5194 |
5120 if (in->buf == NULL) { | 5195 if (in->buf == NULL) { |
5121 buf = in->base; | 5196 buf = in->base; |
5122 len = in->length; | 5197 len = in->length; |
5123 } else { | 5198 } else { |
5124 buf = in->buf->buffer->content; | 5199 buf = xmlBufContent(in->buf->buffer); |
5125 len = in->buf->buffer->use; | 5200 len = xmlBufUse(in->buf->buffer); |
5126 } | 5201 } |
5127 | 5202 |
5128 for (; base < len; base++) { | 5203 for (; base < len; base++) { |
5129 if (!incomment && (base + 4 < len)) { | 5204 if (!incomment && (base + 4 < len)) { |
5130 if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5205 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { | 5206 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
5132 incomment = 1; | 5207 incomment = 1; |
5133 /* do not increment past <! - some people use <!--> */ | 5208 /* do not increment past <! - some people use <!--> */ |
5134 base += 2; | 5209 base += 2; |
5135 } | 5210 } |
(...skipping 28 matching lines...) Expand all Loading... |
5164 * | 5239 * |
5165 * Returns zero if no parsing was possible | 5240 * Returns zero if no parsing was possible |
5166 */ | 5241 */ |
5167 static int | 5242 static int |
5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | 5243 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
5169 int ret = 0; | 5244 int ret = 0; |
5170 htmlParserInputPtr in; | 5245 htmlParserInputPtr in; |
5171 int avail = 0; | 5246 int avail = 0; |
5172 xmlChar cur, next; | 5247 xmlChar cur, next; |
5173 | 5248 |
| 5249 htmlParserNodeInfo node_info; |
| 5250 |
5174 #ifdef DEBUG_PUSH | 5251 #ifdef DEBUG_PUSH |
5175 switch (ctxt->instate) { | 5252 switch (ctxt->instate) { |
5176 case XML_PARSER_EOF: | 5253 case XML_PARSER_EOF: |
5177 xmlGenericError(xmlGenericErrorContext, | 5254 xmlGenericError(xmlGenericErrorContext, |
5178 "HPP: try EOF\n"); break; | 5255 "HPP: try EOF\n"); break; |
5179 case XML_PARSER_START: | 5256 case XML_PARSER_START: |
5180 xmlGenericError(xmlGenericErrorContext, | 5257 xmlGenericError(xmlGenericErrorContext, |
5181 "HPP: try START\n"); break; | 5258 "HPP: try START\n"); break; |
5182 case XML_PARSER_MISC: | 5259 case XML_PARSER_MISC: |
5183 xmlGenericError(xmlGenericErrorContext, | 5260 xmlGenericError(xmlGenericErrorContext, |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5224 } | 5301 } |
5225 #endif | 5302 #endif |
5226 | 5303 |
5227 while (1) { | 5304 while (1) { |
5228 | 5305 |
5229 in = ctxt->input; | 5306 in = ctxt->input; |
5230 if (in == NULL) break; | 5307 if (in == NULL) break; |
5231 if (in->buf == NULL) | 5308 if (in->buf == NULL) |
5232 avail = in->length - (in->cur - in->base); | 5309 avail = in->length - (in->cur - in->base); |
5233 else | 5310 else |
5234 » avail = in->buf->buffer->use - (in->cur - in->base); | 5311 » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
5235 if ((avail == 0) && (terminate)) { | 5312 if ((avail == 0) && (terminate)) { |
5236 htmlAutoCloseOnEnd(ctxt); | 5313 htmlAutoCloseOnEnd(ctxt); |
5237 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5314 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
5238 /* | 5315 /* |
5239 * SAX: end of the document processing. | 5316 * SAX: end of the document processing. |
5240 */ | 5317 */ |
5241 ctxt->instate = XML_PARSER_EOF; | 5318 ctxt->instate = XML_PARSER_EOF; |
5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5319 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
5243 ctxt->sax->endDocument(ctxt->userData); | 5320 ctxt->sax->endDocument(ctxt->userData); |
5244 } | 5321 } |
(...skipping 15 matching lines...) Expand all Loading... |
5260 case XML_PARSER_START: | 5337 case XML_PARSER_START: |
5261 /* | 5338 /* |
5262 * Very first chars read from the document flow. | 5339 * Very first chars read from the document flow. |
5263 */ | 5340 */ |
5264 cur = in->cur[0]; | 5341 cur = in->cur[0]; |
5265 if (IS_BLANK_CH(cur)) { | 5342 if (IS_BLANK_CH(cur)) { |
5266 SKIP_BLANKS; | 5343 SKIP_BLANKS; |
5267 if (in->buf == NULL) | 5344 if (in->buf == NULL) |
5268 avail = in->length - (in->cur - in->base); | 5345 avail = in->length - (in->cur - in->base); |
5269 else | 5346 else |
5270 » » » avail = in->buf->buffer->use - (in->cur - in->base); | 5347 » » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base
); |
5271 } | 5348 } |
5272 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) | 5349 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
5273 ctxt->sax->setDocumentLocator(ctxt->userData, | 5350 ctxt->sax->setDocumentLocator(ctxt->userData, |
5274 &xmlDefaultSAXLocator); | 5351 &xmlDefaultSAXLocator); |
5275 if ((ctxt->sax) && (ctxt->sax->startDocument) && | 5352 if ((ctxt->sax) && (ctxt->sax->startDocument) && |
5276 (!ctxt->disableSAX)) | 5353 (!ctxt->disableSAX)) |
5277 ctxt->sax->startDocument(ctxt->userData); | 5354 ctxt->sax->startDocument(ctxt->userData); |
5278 | 5355 |
5279 cur = in->cur[0]; | 5356 cur = in->cur[0]; |
5280 next = in->cur[1]; | 5357 next = in->cur[1]; |
(...skipping 21 matching lines...) Expand all Loading... |
5302 xmlGenericError(xmlGenericErrorContext, | 5379 xmlGenericError(xmlGenericErrorContext, |
5303 "HPP: entering MISC\n"); | 5380 "HPP: entering MISC\n"); |
5304 #endif | 5381 #endif |
5305 } | 5382 } |
5306 break; | 5383 break; |
5307 case XML_PARSER_MISC: | 5384 case XML_PARSER_MISC: |
5308 SKIP_BLANKS; | 5385 SKIP_BLANKS; |
5309 if (in->buf == NULL) | 5386 if (in->buf == NULL) |
5310 avail = in->length - (in->cur - in->base); | 5387 avail = in->length - (in->cur - in->base); |
5311 else | 5388 else |
5312 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5389 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
5313 » » if (avail < 2) | 5390 » » /* |
| 5391 » » * no chars in buffer |
| 5392 » » */ |
| 5393 » » if (avail < 1) |
5314 goto done; | 5394 goto done; |
| 5395 /* |
| 5396 * not enouth chars in buffer |
| 5397 */ |
| 5398 if (avail < 2) { |
| 5399 if (!terminate) |
| 5400 goto done; |
| 5401 else |
| 5402 next = ' '; |
| 5403 } else { |
| 5404 next = in->cur[1]; |
| 5405 } |
5315 cur = in->cur[0]; | 5406 cur = in->cur[0]; |
5316 next = in->cur[1]; | |
5317 if ((cur == '<') && (next == '!') && | 5407 if ((cur == '<') && (next == '!') && |
5318 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5408 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
5319 if ((!terminate) && | 5409 if ((!terminate) && |
5320 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) | 5410 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
5321 goto done; | 5411 goto done; |
5322 #ifdef DEBUG_PUSH | 5412 #ifdef DEBUG_PUSH |
5323 xmlGenericError(xmlGenericErrorContext, | 5413 xmlGenericError(xmlGenericErrorContext, |
5324 "HPP: Parsing Comment\n"); | 5414 "HPP: Parsing Comment\n"); |
5325 #endif | 5415 #endif |
5326 htmlParseComment(ctxt); | 5416 htmlParseComment(ctxt); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5362 xmlGenericError(xmlGenericErrorContext, | 5452 xmlGenericError(xmlGenericErrorContext, |
5363 "HPP: entering START_TAG\n"); | 5453 "HPP: entering START_TAG\n"); |
5364 #endif | 5454 #endif |
5365 } | 5455 } |
5366 break; | 5456 break; |
5367 case XML_PARSER_PROLOG: | 5457 case XML_PARSER_PROLOG: |
5368 SKIP_BLANKS; | 5458 SKIP_BLANKS; |
5369 if (in->buf == NULL) | 5459 if (in->buf == NULL) |
5370 avail = in->length - (in->cur - in->base); | 5460 avail = in->length - (in->cur - in->base); |
5371 else | 5461 else |
5372 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5462 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
5373 if (avail < 2) | 5463 if (avail < 2) |
5374 goto done; | 5464 goto done; |
5375 cur = in->cur[0]; | 5465 cur = in->cur[0]; |
5376 next = in->cur[1]; | 5466 next = in->cur[1]; |
5377 if ((cur == '<') && (next == '!') && | 5467 if ((cur == '<') && (next == '!') && |
5378 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5468 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
5379 if ((!terminate) && | 5469 if ((!terminate) && |
5380 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) | 5470 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
5381 goto done; | 5471 goto done; |
5382 #ifdef DEBUG_PUSH | 5472 #ifdef DEBUG_PUSH |
(...skipping 20 matching lines...) Expand all Loading... |
5403 #ifdef DEBUG_PUSH | 5493 #ifdef DEBUG_PUSH |
5404 xmlGenericError(xmlGenericErrorContext, | 5494 xmlGenericError(xmlGenericErrorContext, |
5405 "HPP: entering START_TAG\n"); | 5495 "HPP: entering START_TAG\n"); |
5406 #endif | 5496 #endif |
5407 } | 5497 } |
5408 break; | 5498 break; |
5409 case XML_PARSER_EPILOG: | 5499 case XML_PARSER_EPILOG: |
5410 if (in->buf == NULL) | 5500 if (in->buf == NULL) |
5411 avail = in->length - (in->cur - in->base); | 5501 avail = in->length - (in->cur - in->base); |
5412 else | 5502 else |
5413 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5503 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
5414 if (avail < 1) | 5504 if (avail < 1) |
5415 goto done; | 5505 goto done; |
5416 cur = in->cur[0]; | 5506 cur = in->cur[0]; |
5417 if (IS_BLANK_CH(cur)) { | 5507 if (IS_BLANK_CH(cur)) { |
5418 htmlParseCharData(ctxt); | 5508 htmlParseCharData(ctxt); |
5419 goto done; | 5509 goto done; |
5420 } | 5510 } |
5421 if (avail < 2) | 5511 if (avail < 2) |
5422 goto done; | 5512 goto done; |
5423 next = in->cur[1]; | 5513 next = in->cur[1]; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5546 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
5457 ctxt->sax->endDocument(ctxt->userData); | 5547 ctxt->sax->endDocument(ctxt->userData); |
5458 goto done; | 5548 goto done; |
5459 } | 5549 } |
5460 break; | 5550 break; |
5461 case XML_PARSER_START_TAG: { | 5551 case XML_PARSER_START_TAG: { |
5462 const xmlChar *name; | 5552 const xmlChar *name; |
5463 int failed; | 5553 int failed; |
5464 const htmlElemDesc * info; | 5554 const htmlElemDesc * info; |
5465 | 5555 |
5466 » » if (avail < 2) | 5556 » » /* |
| 5557 » » * no chars in buffer |
| 5558 » » */ |
| 5559 » » if (avail < 1) |
5467 goto done; | 5560 goto done; |
| 5561 /* |
| 5562 * not enouth chars in buffer |
| 5563 */ |
| 5564 if (avail < 2) { |
| 5565 if (!terminate) |
| 5566 goto done; |
| 5567 else |
| 5568 next = ' '; |
| 5569 } else { |
| 5570 next = in->cur[1]; |
| 5571 } |
5468 cur = in->cur[0]; | 5572 cur = in->cur[0]; |
5469 if (cur != '<') { | 5573 if (cur != '<') { |
5470 ctxt->instate = XML_PARSER_CONTENT; | 5574 ctxt->instate = XML_PARSER_CONTENT; |
5471 #ifdef DEBUG_PUSH | 5575 #ifdef DEBUG_PUSH |
5472 xmlGenericError(xmlGenericErrorContext, | 5576 xmlGenericError(xmlGenericErrorContext, |
5473 "HPP: entering CONTENT\n"); | 5577 "HPP: entering CONTENT\n"); |
5474 #endif | 5578 #endif |
5475 break; | 5579 break; |
5476 } | 5580 } |
5477 » » if (in->cur[1] == '/') { | 5581 » » if (next == '/') { |
5478 ctxt->instate = XML_PARSER_END_TAG; | 5582 ctxt->instate = XML_PARSER_END_TAG; |
5479 ctxt->checkIndex = 0; | 5583 ctxt->checkIndex = 0; |
5480 #ifdef DEBUG_PUSH | 5584 #ifdef DEBUG_PUSH |
5481 xmlGenericError(xmlGenericErrorContext, | 5585 xmlGenericError(xmlGenericErrorContext, |
5482 "HPP: entering END_TAG\n"); | 5586 "HPP: entering END_TAG\n"); |
5483 #endif | 5587 #endif |
5484 break; | 5588 break; |
5485 } | 5589 } |
5486 if ((!terminate) && | 5590 if ((!terminate) && |
5487 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) | 5591 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
5488 goto done; | 5592 goto done; |
5489 | 5593 |
| 5594 /* Capture start position */ |
| 5595 if (ctxt->record_info) { |
| 5596 node_info.begin_pos = ctxt->input->consumed + |
| 5597 (CUR_PTR - ctxt->input->base); |
| 5598 node_info.begin_line = ctxt->input->line; |
| 5599 } |
| 5600 |
| 5601 |
5490 failed = htmlParseStartTag(ctxt); | 5602 failed = htmlParseStartTag(ctxt); |
5491 name = ctxt->name; | 5603 name = ctxt->name; |
5492 if ((failed == -1) || | 5604 if ((failed == -1) || |
5493 (name == NULL)) { | 5605 (name == NULL)) { |
5494 if (CUR == '>') | 5606 if (CUR == '>') |
5495 NEXT; | 5607 NEXT; |
5496 break; | 5608 break; |
5497 } | 5609 } |
5498 | 5610 |
5499 /* | 5611 /* |
(...skipping 29 matching lines...) Expand all Loading... |
5529 name, NULL); | 5641 name, NULL); |
5530 | 5642 |
5531 /* | 5643 /* |
5532 * end of parsing of this node. | 5644 * end of parsing of this node. |
5533 */ | 5645 */ |
5534 if (xmlStrEqual(name, ctxt->name)) { | 5646 if (xmlStrEqual(name, ctxt->name)) { |
5535 nodePop(ctxt); | 5647 nodePop(ctxt); |
5536 htmlnamePop(ctxt); | 5648 htmlnamePop(ctxt); |
5537 } | 5649 } |
5538 | 5650 |
| 5651 if (ctxt->record_info) |
| 5652 htmlNodeInfoPush(ctxt, &node_info); |
| 5653 |
5539 ctxt->instate = XML_PARSER_CONTENT; | 5654 ctxt->instate = XML_PARSER_CONTENT; |
5540 #ifdef DEBUG_PUSH | 5655 #ifdef DEBUG_PUSH |
5541 xmlGenericError(xmlGenericErrorContext, | 5656 xmlGenericError(xmlGenericErrorContext, |
5542 "HPP: entering CONTENT\n"); | 5657 "HPP: entering CONTENT\n"); |
5543 #endif | 5658 #endif |
5544 break; | 5659 break; |
5545 } | 5660 } |
5546 | 5661 |
5547 /* | 5662 /* |
5548 * Check for an Empty Element from DTD definition | 5663 * Check for an Empty Element from DTD definition |
5549 */ | 5664 */ |
5550 if ((info != NULL) && (info->empty)) { | 5665 if ((info != NULL) && (info->empty)) { |
5551 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 5666 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
5552 ctxt->sax->endElement(ctxt->userData, name); | 5667 ctxt->sax->endElement(ctxt->userData, name); |
5553 htmlnamePop(ctxt); | 5668 htmlnamePop(ctxt); |
5554 } | 5669 } |
| 5670 |
| 5671 if (ctxt->record_info) |
| 5672 htmlNodeInfoPush(ctxt, &node_info); |
| 5673 |
5555 ctxt->instate = XML_PARSER_CONTENT; | 5674 ctxt->instate = XML_PARSER_CONTENT; |
5556 #ifdef DEBUG_PUSH | 5675 #ifdef DEBUG_PUSH |
5557 xmlGenericError(xmlGenericErrorContext, | 5676 xmlGenericError(xmlGenericErrorContext, |
5558 "HPP: entering CONTENT\n"); | 5677 "HPP: entering CONTENT\n"); |
5559 #endif | 5678 #endif |
5560 break; | 5679 break; |
5561 } | 5680 } |
5562 case XML_PARSER_CONTENT: { | 5681 case XML_PARSER_CONTENT: { |
5563 long cons; | 5682 long cons; |
5564 /* | 5683 /* |
5565 * Handle preparsed entities and charRef | 5684 * Handle preparsed entities and charRef |
5566 */ | 5685 */ |
5567 if (ctxt->token != 0) { | 5686 if (ctxt->token != 0) { |
5568 xmlChar chr[2] = { 0 , 0 } ; | 5687 xmlChar chr[2] = { 0 , 0 } ; |
5569 | 5688 |
5570 chr[0] = (xmlChar) ctxt->token; | 5689 chr[0] = (xmlChar) ctxt->token; |
5571 htmlCheckParagraph(ctxt); | 5690 htmlCheckParagraph(ctxt); |
5572 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 5691 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
5573 ctxt->sax->characters(ctxt->userData, chr, 1); | 5692 ctxt->sax->characters(ctxt->userData, chr, 1); |
5574 ctxt->token = 0; | 5693 ctxt->token = 0; |
5575 ctxt->checkIndex = 0; | 5694 ctxt->checkIndex = 0; |
5576 } | 5695 } |
5577 if ((avail == 1) && (terminate)) { | 5696 if ((avail == 1) && (terminate)) { |
5578 cur = in->cur[0]; | 5697 cur = in->cur[0]; |
5579 if ((cur != '<') && (cur != '&')) { | 5698 if ((cur != '<') && (cur != '&')) { |
5580 if (ctxt->sax != NULL) { | 5699 if (ctxt->sax != NULL) { |
5581 if (IS_BLANK_CH(cur)) { | 5700 if (IS_BLANK_CH(cur)) { |
5582 » » » » if (ctxt->sax->ignorableWhitespace != NULL) | 5701 » » » » if (ctxt->keepBlanks) { |
5583 » » » » ctxt->sax->ignorableWhitespace( | 5702 » » » » if (ctxt->sax->characters != NULL) |
5584 » » » » » ctxt->userData, &cur, 1); | 5703 » » » » » ctxt->sax->characters( |
| 5704 » » » » » » ctxt->userData, &cur, 1); |
| 5705 » » » » } else { |
| 5706 » » » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 5707 » » » » » ctxt->sax->ignorableWhitespace( |
| 5708 » » » » » » ctxt->userData, &cur, 1); |
| 5709 » » » » } |
5585 } else { | 5710 } else { |
5586 htmlCheckParagraph(ctxt); | 5711 htmlCheckParagraph(ctxt); |
5587 if (ctxt->sax->characters != NULL) | 5712 if (ctxt->sax->characters != NULL) |
5588 ctxt->sax->characters( | 5713 ctxt->sax->characters( |
5589 ctxt->userData, &cur, 1); | 5714 ctxt->userData, &cur, 1); |
5590 } | 5715 } |
5591 } | 5716 } |
5592 ctxt->token = 0; | 5717 ctxt->token = 0; |
5593 ctxt->checkIndex = 0; | 5718 ctxt->checkIndex = 0; |
5594 in->cur++; | 5719 in->cur++; |
5595 break; | 5720 break; |
5596 } | 5721 } |
5597 } | 5722 } |
5598 if (avail < 2) | 5723 if (avail < 2) |
5599 goto done; | 5724 goto done; |
5600 cur = in->cur[0]; | 5725 cur = in->cur[0]; |
5601 next = in->cur[1]; | 5726 next = in->cur[1]; |
5602 cons = ctxt->nbChars; | 5727 cons = ctxt->nbChars; |
5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || | 5728 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || |
5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { | 5729 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { |
5605 /* | 5730 /* |
5606 * Handle SCRIPT/STYLE separately | 5731 * Handle SCRIPT/STYLE separately |
5607 */ | 5732 */ |
5608 if (!terminate) { | 5733 if (!terminate) { |
5609 int idx; | 5734 int idx; |
5610 xmlChar val; | 5735 xmlChar val; |
5611 | 5736 |
5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); | 5737 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0); |
5613 if (idx < 0) | 5738 if (idx < 0) |
5614 goto done; | 5739 goto done; |
5615 val = in->cur[idx + 2]; | 5740 val = in->cur[idx + 2]; |
5616 if (val == 0) /* bad cut of input */ | 5741 if (val == 0) /* bad cut of input */ |
5617 goto done; | 5742 goto done; |
5618 } | 5743 } |
5619 htmlParseScript(ctxt); | 5744 htmlParseScript(ctxt); |
5620 if ((cur == '<') && (next == '/')) { | 5745 if ((cur == '<') && (next == '/')) { |
5621 ctxt->instate = XML_PARSER_END_TAG; | 5746 ctxt->instate = XML_PARSER_END_TAG; |
5622 ctxt->checkIndex = 0; | 5747 ctxt->checkIndex = 0; |
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5859 htmlAutoCloseOnEnd(ctxt); | 5984 htmlAutoCloseOnEnd(ctxt); |
5860 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5985 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
5861 /* | 5986 /* |
5862 * SAX: end of the document processing. | 5987 * SAX: end of the document processing. |
5863 */ | 5988 */ |
5864 ctxt->instate = XML_PARSER_EOF; | 5989 ctxt->instate = XML_PARSER_EOF; |
5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5990 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
5866 ctxt->sax->endDocument(ctxt->userData); | 5991 ctxt->sax->endDocument(ctxt->userData); |
5867 } | 5992 } |
5868 } | 5993 } |
5869 if ((ctxt->myDoc != NULL) && | 5994 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) && |
5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || | 5995 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
5871 (ctxt->instate == XML_PARSER_EPILOG))) { | 5996 (ctxt->instate == XML_PARSER_EPILOG))) { |
5872 xmlDtdPtr dtd; | 5997 xmlDtdPtr dtd; |
5873 dtd = xmlGetIntSubset(ctxt->myDoc); | 5998 dtd = xmlGetIntSubset(ctxt->myDoc); |
5874 if (dtd == NULL) | 5999 if (dtd == NULL) |
5875 ctxt->myDoc->intSubset = | 6000 ctxt->myDoc->intSubset = |
5876 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 6001 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 6002 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 6003 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
5879 } | 6004 } |
(...skipping 17 matching lines...) Expand all Loading... |
5897 int | 6022 int |
5898 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, | 6023 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, |
5899 int terminate) { | 6024 int terminate) { |
5900 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 6025 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 6026 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
5902 "htmlParseChunk: context error\n", NULL, NULL); | 6027 "htmlParseChunk: context error\n", NULL, NULL); |
5903 return(XML_ERR_INTERNAL_ERROR); | 6028 return(XML_ERR_INTERNAL_ERROR); |
5904 } | 6029 } |
5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6030 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { | 6031 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
5907 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6032 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
5908 » int cur = ctxt->input->cur - ctxt->input->base; | 6033 » size_t cur = ctxt->input->cur - ctxt->input->base; |
5909 int res; | 6034 int res; |
5910 | 6035 |
5911 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); | 6036 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
5912 if (res < 0) { | 6037 if (res < 0) { |
5913 ctxt->errNo = XML_PARSER_EOF; | 6038 ctxt->errNo = XML_PARSER_EOF; |
5914 ctxt->disableSAX = 1; | 6039 ctxt->disableSAX = 1; |
5915 return (XML_PARSER_EOF); | 6040 return (XML_PARSER_EOF); |
5916 } | 6041 } |
5917 » ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6042 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
5918 » ctxt->input->cur = ctxt->input->base + cur; | |
5919 » ctxt->input->end = | |
5920 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
5921 #ifdef DEBUG_PUSH | 6043 #ifdef DEBUG_PUSH |
5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6044 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
5923 #endif | 6045 #endif |
5924 | 6046 |
5925 #if 0 | 6047 #if 0 |
5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) | 6048 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) |
5927 htmlParseTryOrFinish(ctxt, terminate); | 6049 htmlParseTryOrFinish(ctxt, terminate); |
5928 #endif | 6050 #endif |
5929 } else if (ctxt->instate != XML_PARSER_EOF) { | 6051 } else if (ctxt->instate != XML_PARSER_EOF) { |
5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { | 6052 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { |
5931 xmlParserInputBufferPtr in = ctxt->input->buf; | 6053 xmlParserInputBufferPtr in = ctxt->input->buf; |
5932 if ((in->encoder != NULL) && (in->buffer != NULL) && | 6054 if ((in->encoder != NULL) && (in->buffer != NULL) && |
5933 (in->raw != NULL)) { | 6055 (in->raw != NULL)) { |
5934 int nbchars; | 6056 int nbchars; |
| 6057 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
| 6058 size_t current = ctxt->input->cur - ctxt->input->base; |
5935 | 6059 |
5936 » » nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | 6060 » » nbchars = xmlCharEncInput(in, terminate); |
5937 if (nbchars < 0) { | 6061 if (nbchars < 0) { |
5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 6062 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
5939 "encoder error\n", NULL, NULL); | 6063 "encoder error\n", NULL, NULL); |
5940 return(XML_ERR_INVALID_ENCODING); | 6064 return(XML_ERR_INVALID_ENCODING); |
5941 } | 6065 } |
| 6066 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
5942 } | 6067 } |
5943 } | 6068 } |
5944 } | 6069 } |
5945 htmlParseTryOrFinish(ctxt, terminate); | 6070 htmlParseTryOrFinish(ctxt, terminate); |
5946 if (terminate) { | 6071 if (terminate) { |
5947 if ((ctxt->instate != XML_PARSER_EOF) && | 6072 if ((ctxt->instate != XML_PARSER_EOF) && |
5948 (ctxt->instate != XML_PARSER_EPILOG) && | 6073 (ctxt->instate != XML_PARSER_EPILOG) && |
5949 (ctxt->instate != XML_PARSER_MISC)) { | 6074 (ctxt->instate != XML_PARSER_MISC)) { |
5950 ctxt->errNo = XML_ERR_DOCUMENT_END; | 6075 ctxt->errNo = XML_ERR_DOCUMENT_END; |
5951 ctxt->wellFormed = 0; | 6076 ctxt->wellFormed = 0; |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6025 xmlFree(buf); | 6150 xmlFree(buf); |
6026 return(NULL); | 6151 return(NULL); |
6027 } | 6152 } |
6028 | 6153 |
6029 if (filename == NULL) | 6154 if (filename == NULL) |
6030 inputStream->filename = NULL; | 6155 inputStream->filename = NULL; |
6031 else | 6156 else |
6032 inputStream->filename = (char *) | 6157 inputStream->filename = (char *) |
6033 xmlCanonicPath((const xmlChar *) filename); | 6158 xmlCanonicPath((const xmlChar *) filename); |
6034 inputStream->buf = buf; | 6159 inputStream->buf = buf; |
6035 inputStream->base = inputStream->buf->buffer->content; | 6160 xmlBufResetInput(buf->buffer, inputStream); |
6036 inputStream->cur = inputStream->buf->buffer->content; | |
6037 inputStream->end = | |
6038 » &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | |
6039 | 6161 |
6040 inputPush(ctxt, inputStream); | 6162 inputPush(ctxt, inputStream); |
6041 | 6163 |
6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6164 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
6043 (ctxt->input->buf != NULL)) { | 6165 (ctxt->input->buf != NULL)) { |
6044 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6166 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
6045 » int cur = ctxt->input->cur - ctxt->input->base; | 6167 » size_t cur = ctxt->input->cur - ctxt->input->base; |
6046 | 6168 |
6047 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); | 6169 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
6048 | 6170 |
6049 » ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6171 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
6050 » ctxt->input->cur = ctxt->input->base + cur; | |
6051 » ctxt->input->end = | |
6052 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
6053 #ifdef DEBUG_PUSH | 6172 #ifdef DEBUG_PUSH |
6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6173 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
6055 #endif | 6174 #endif |
6056 } | 6175 } |
6057 ctxt->progressive = 1; | 6176 ctxt->progressive = 1; |
6058 | 6177 |
6059 return(ctxt); | 6178 return(ctxt); |
6060 } | 6179 } |
6061 #endif /* LIBXML_PUSH_ENABLED */ | 6180 #endif /* LIBXML_PUSH_ENABLED */ |
6062 | 6181 |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6162 xmlFree(canonicFilename); | 6281 xmlFree(canonicFilename); |
6163 if (inputStream == NULL) { | 6282 if (inputStream == NULL) { |
6164 xmlFreeParserCtxt(ctxt); | 6283 xmlFreeParserCtxt(ctxt); |
6165 return(NULL); | 6284 return(NULL); |
6166 } | 6285 } |
6167 | 6286 |
6168 inputPush(ctxt, inputStream); | 6287 inputPush(ctxt, inputStream); |
6169 | 6288 |
6170 /* set encoding */ | 6289 /* set encoding */ |
6171 if (encoding) { | 6290 if (encoding) { |
6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); | 6291 size_t l = strlen(encoding); |
6173 » if (content) { | 6292 |
6174 » strcpy ((char *)content, (char *)content_line); | 6293 » if (l < 1000) { |
6175 strcat ((char *)content, (char *)encoding); | 6294 » content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1); |
6176 htmlCheckEncoding (ctxt, content); | 6295 » if (content) { |
6177 » xmlFree (content); | 6296 » » strcpy ((char *)content, (char *)content_line); |
| 6297 » » strcat ((char *)content, (char *)encoding); |
| 6298 » » htmlCheckEncoding (ctxt, content); |
| 6299 » » xmlFree (content); |
| 6300 » } |
6178 } | 6301 } |
6179 } | 6302 } |
6180 | 6303 |
6181 return(ctxt); | 6304 return(ctxt); |
6182 } | 6305 } |
6183 | 6306 |
6184 /** | 6307 /** |
6185 * htmlSAXParseFile: | 6308 * htmlSAXParseFile: |
6186 * @filename: the filename | 6309 * @filename: the filename |
6187 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6310 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6444 ctxt->standalone = -1; | 6567 ctxt->standalone = -1; |
6445 ctxt->hasExternalSubset = 0; | 6568 ctxt->hasExternalSubset = 0; |
6446 ctxt->hasPErefs = 0; | 6569 ctxt->hasPErefs = 0; |
6447 ctxt->html = 1; | 6570 ctxt->html = 1; |
6448 ctxt->external = 0; | 6571 ctxt->external = 0; |
6449 ctxt->instate = XML_PARSER_START; | 6572 ctxt->instate = XML_PARSER_START; |
6450 ctxt->token = 0; | 6573 ctxt->token = 0; |
6451 | 6574 |
6452 ctxt->wellFormed = 1; | 6575 ctxt->wellFormed = 1; |
6453 ctxt->nsWellFormed = 1; | 6576 ctxt->nsWellFormed = 1; |
| 6577 ctxt->disableSAX = 0; |
6454 ctxt->valid = 1; | 6578 ctxt->valid = 1; |
6455 ctxt->vctxt.userData = ctxt; | 6579 ctxt->vctxt.userData = ctxt; |
6456 ctxt->vctxt.error = xmlParserValidityError; | 6580 ctxt->vctxt.error = xmlParserValidityError; |
6457 ctxt->vctxt.warning = xmlParserValidityWarning; | 6581 ctxt->vctxt.warning = xmlParserValidityWarning; |
6458 ctxt->record_info = 0; | 6582 ctxt->record_info = 0; |
6459 ctxt->nbChars = 0; | 6583 ctxt->nbChars = 0; |
6460 ctxt->checkIndex = 0; | 6584 ctxt->checkIndex = 0; |
6461 ctxt->inSubset = 0; | 6585 ctxt->inSubset = 0; |
6462 ctxt->errNo = XML_ERR_OK; | 6586 ctxt->errNo = XML_ERR_OK; |
6463 ctxt->depth = 0; | 6587 ctxt->depth = 0; |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6523 } else | 6647 } else |
6524 ctxt->recovery = 0; | 6648 ctxt->recovery = 0; |
6525 if (options & HTML_PARSE_COMPACT) { | 6649 if (options & HTML_PARSE_COMPACT) { |
6526 ctxt->options |= HTML_PARSE_COMPACT; | 6650 ctxt->options |= HTML_PARSE_COMPACT; |
6527 options -= HTML_PARSE_COMPACT; | 6651 options -= HTML_PARSE_COMPACT; |
6528 } | 6652 } |
6529 if (options & XML_PARSE_HUGE) { | 6653 if (options & XML_PARSE_HUGE) { |
6530 ctxt->options |= XML_PARSE_HUGE; | 6654 ctxt->options |= XML_PARSE_HUGE; |
6531 options -= XML_PARSE_HUGE; | 6655 options -= XML_PARSE_HUGE; |
6532 } | 6656 } |
| 6657 if (options & HTML_PARSE_NODEFDTD) { |
| 6658 ctxt->options |= HTML_PARSE_NODEFDTD; |
| 6659 options -= HTML_PARSE_NODEFDTD; |
| 6660 } |
| 6661 if (options & HTML_PARSE_IGNORE_ENC) { |
| 6662 ctxt->options |= HTML_PARSE_IGNORE_ENC; |
| 6663 options -= HTML_PARSE_IGNORE_ENC; |
| 6664 } |
| 6665 if (options & HTML_PARSE_NOIMPLIED) { |
| 6666 ctxt->options |= HTML_PARSE_NOIMPLIED; |
| 6667 options -= HTML_PARSE_NOIMPLIED; |
| 6668 } |
6533 ctxt->dictNames = 0; | 6669 ctxt->dictNames = 0; |
6534 return (options); | 6670 return (options); |
6535 } | 6671 } |
6536 | 6672 |
6537 /** | 6673 /** |
6538 * htmlDoRead: | 6674 * htmlDoRead: |
6539 * @ctxt: an HTML parser context | 6675 * @ctxt: an HTML parser context |
6540 * @URL: the base URL to use for the document | 6676 * @URL: the base URL to use for the document |
6541 * @encoding: the document encoding, or NULL | 6677 * @encoding: the document encoding, or NULL |
6542 * @options: a combination of htmlParserOption(s) | 6678 * @options: a combination of htmlParserOption(s) |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6669 */ | 6805 */ |
6670 htmlDocPtr | 6806 htmlDocPtr |
6671 htmlReadFd(int fd, const char *URL, const char *encoding, int options) | 6807 htmlReadFd(int fd, const char *URL, const char *encoding, int options) |
6672 { | 6808 { |
6673 htmlParserCtxtPtr ctxt; | 6809 htmlParserCtxtPtr ctxt; |
6674 xmlParserInputBufferPtr input; | 6810 xmlParserInputBufferPtr input; |
6675 xmlParserInputPtr stream; | 6811 xmlParserInputPtr stream; |
6676 | 6812 |
6677 if (fd < 0) | 6813 if (fd < 0) |
6678 return (NULL); | 6814 return (NULL); |
| 6815 xmlInitParser(); |
6679 | 6816 |
6680 xmlInitParser(); | 6817 xmlInitParser(); |
6681 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); | 6818 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); |
6682 if (input == NULL) | 6819 if (input == NULL) |
6683 return (NULL); | 6820 return (NULL); |
6684 ctxt = xmlNewParserCtxt(); | 6821 ctxt = xmlNewParserCtxt(); |
6685 if (ctxt == NULL) { | 6822 if (ctxt == NULL) { |
6686 xmlFreeParserInputBuffer(input); | 6823 xmlFreeParserInputBuffer(input); |
6687 return (NULL); | 6824 return (NULL); |
6688 } | 6825 } |
(...skipping 27 matching lines...) Expand all Loading... |
6716 htmlParserCtxtPtr ctxt; | 6853 htmlParserCtxtPtr ctxt; |
6717 xmlParserInputBufferPtr input; | 6854 xmlParserInputBufferPtr input; |
6718 xmlParserInputPtr stream; | 6855 xmlParserInputPtr stream; |
6719 | 6856 |
6720 if (ioread == NULL) | 6857 if (ioread == NULL) |
6721 return (NULL); | 6858 return (NULL); |
6722 xmlInitParser(); | 6859 xmlInitParser(); |
6723 | 6860 |
6724 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, | 6861 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
6725 XML_CHAR_ENCODING_NONE); | 6862 XML_CHAR_ENCODING_NONE); |
6726 if (input == NULL) | 6863 if (input == NULL) { |
| 6864 if (ioclose != NULL) |
| 6865 ioclose(ioctx); |
6727 return (NULL); | 6866 return (NULL); |
| 6867 } |
6728 ctxt = htmlNewParserCtxt(); | 6868 ctxt = htmlNewParserCtxt(); |
6729 if (ctxt == NULL) { | 6869 if (ctxt == NULL) { |
6730 xmlFreeParserInputBuffer(input); | 6870 xmlFreeParserInputBuffer(input); |
6731 return (NULL); | 6871 return (NULL); |
6732 } | 6872 } |
6733 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 6873 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
6734 if (stream == NULL) { | 6874 if (stream == NULL) { |
6735 xmlFreeParserInputBuffer(input); | 6875 xmlFreeParserInputBuffer(input); |
6736 xmlFreeParserCtxt(ctxt); | 6876 xmlFreeParserCtxt(ctxt); |
6737 return (NULL); | 6877 return (NULL); |
(...skipping 18 matching lines...) Expand all Loading... |
6756 htmlDocPtr | 6896 htmlDocPtr |
6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, | 6897 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, |
6758 const char *URL, const char *encoding, int options) | 6898 const char *URL, const char *encoding, int options) |
6759 { | 6899 { |
6760 xmlParserInputPtr stream; | 6900 xmlParserInputPtr stream; |
6761 | 6901 |
6762 if (cur == NULL) | 6902 if (cur == NULL) |
6763 return (NULL); | 6903 return (NULL); |
6764 if (ctxt == NULL) | 6904 if (ctxt == NULL) |
6765 return (NULL); | 6905 return (NULL); |
| 6906 xmlInitParser(); |
6766 | 6907 |
6767 htmlCtxtReset(ctxt); | 6908 htmlCtxtReset(ctxt); |
6768 | 6909 |
6769 stream = xmlNewStringInputStream(ctxt, cur); | 6910 stream = xmlNewStringInputStream(ctxt, cur); |
6770 if (stream == NULL) { | 6911 if (stream == NULL) { |
6771 return (NULL); | 6912 return (NULL); |
6772 } | 6913 } |
6773 inputPush(ctxt, stream); | 6914 inputPush(ctxt, stream); |
6774 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 6915 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
6775 } | 6916 } |
(...skipping 13 matching lines...) Expand all Loading... |
6789 htmlDocPtr | 6930 htmlDocPtr |
6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, | 6931 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, |
6791 const char *encoding, int options) | 6932 const char *encoding, int options) |
6792 { | 6933 { |
6793 xmlParserInputPtr stream; | 6934 xmlParserInputPtr stream; |
6794 | 6935 |
6795 if (filename == NULL) | 6936 if (filename == NULL) |
6796 return (NULL); | 6937 return (NULL); |
6797 if (ctxt == NULL) | 6938 if (ctxt == NULL) |
6798 return (NULL); | 6939 return (NULL); |
| 6940 xmlInitParser(); |
6799 | 6941 |
6800 htmlCtxtReset(ctxt); | 6942 htmlCtxtReset(ctxt); |
6801 | 6943 |
6802 stream = xmlLoadExternalEntity(filename, NULL, ctxt); | 6944 stream = xmlLoadExternalEntity(filename, NULL, ctxt); |
6803 if (stream == NULL) { | 6945 if (stream == NULL) { |
6804 return (NULL); | 6946 return (NULL); |
6805 } | 6947 } |
6806 inputPush(ctxt, stream); | 6948 inputPush(ctxt, stream); |
6807 return (htmlDoRead(ctxt, NULL, encoding, options, 1)); | 6949 return (htmlDoRead(ctxt, NULL, encoding, options, 1)); |
6808 } | 6950 } |
(...skipping 16 matching lines...) Expand all Loading... |
6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, | 6967 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, |
6826 const char *URL, const char *encoding, int options) | 6968 const char *URL, const char *encoding, int options) |
6827 { | 6969 { |
6828 xmlParserInputBufferPtr input; | 6970 xmlParserInputBufferPtr input; |
6829 xmlParserInputPtr stream; | 6971 xmlParserInputPtr stream; |
6830 | 6972 |
6831 if (ctxt == NULL) | 6973 if (ctxt == NULL) |
6832 return (NULL); | 6974 return (NULL); |
6833 if (buffer == NULL) | 6975 if (buffer == NULL) |
6834 return (NULL); | 6976 return (NULL); |
| 6977 xmlInitParser(); |
6835 | 6978 |
6836 htmlCtxtReset(ctxt); | 6979 htmlCtxtReset(ctxt); |
6837 | 6980 |
6838 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); | 6981 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); |
6839 if (input == NULL) { | 6982 if (input == NULL) { |
6840 return(NULL); | 6983 return(NULL); |
6841 } | 6984 } |
6842 | 6985 |
6843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 6986 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
6844 if (stream == NULL) { | 6987 if (stream == NULL) { |
(...skipping 22 matching lines...) Expand all Loading... |
6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, | 7010 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, |
6868 const char *URL, const char *encoding, int options) | 7011 const char *URL, const char *encoding, int options) |
6869 { | 7012 { |
6870 xmlParserInputBufferPtr input; | 7013 xmlParserInputBufferPtr input; |
6871 xmlParserInputPtr stream; | 7014 xmlParserInputPtr stream; |
6872 | 7015 |
6873 if (fd < 0) | 7016 if (fd < 0) |
6874 return (NULL); | 7017 return (NULL); |
6875 if (ctxt == NULL) | 7018 if (ctxt == NULL) |
6876 return (NULL); | 7019 return (NULL); |
| 7020 xmlInitParser(); |
6877 | 7021 |
6878 htmlCtxtReset(ctxt); | 7022 htmlCtxtReset(ctxt); |
6879 | 7023 |
6880 | 7024 |
6881 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); | 7025 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); |
6882 if (input == NULL) | 7026 if (input == NULL) |
6883 return (NULL); | 7027 return (NULL); |
6884 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 7028 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
6885 if (stream == NULL) { | 7029 if (stream == NULL) { |
6886 xmlFreeParserInputBuffer(input); | 7030 xmlFreeParserInputBuffer(input); |
(...skipping 24 matching lines...) Expand all Loading... |
6911 const char *URL, | 7055 const char *URL, |
6912 const char *encoding, int options) | 7056 const char *encoding, int options) |
6913 { | 7057 { |
6914 xmlParserInputBufferPtr input; | 7058 xmlParserInputBufferPtr input; |
6915 xmlParserInputPtr stream; | 7059 xmlParserInputPtr stream; |
6916 | 7060 |
6917 if (ioread == NULL) | 7061 if (ioread == NULL) |
6918 return (NULL); | 7062 return (NULL); |
6919 if (ctxt == NULL) | 7063 if (ctxt == NULL) |
6920 return (NULL); | 7064 return (NULL); |
| 7065 xmlInitParser(); |
6921 | 7066 |
6922 htmlCtxtReset(ctxt); | 7067 htmlCtxtReset(ctxt); |
6923 | 7068 |
6924 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, | 7069 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
6925 XML_CHAR_ENCODING_NONE); | 7070 XML_CHAR_ENCODING_NONE); |
6926 if (input == NULL) | 7071 if (input == NULL) { |
| 7072 if (ioclose != NULL) |
| 7073 ioclose(ioctx); |
6927 return (NULL); | 7074 return (NULL); |
| 7075 } |
6928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 7076 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
6929 if (stream == NULL) { | 7077 if (stream == NULL) { |
6930 xmlFreeParserInputBuffer(input); | 7078 xmlFreeParserInputBuffer(input); |
6931 return (NULL); | 7079 return (NULL); |
6932 } | 7080 } |
6933 inputPush(ctxt, stream); | 7081 inputPush(ctxt, stream); |
6934 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 7082 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
6935 } | 7083 } |
6936 | 7084 |
6937 #define bottom_HTMLparser | 7085 #define bottom_HTMLparser |
6938 #include "elfgcchack.h" | 7086 #include "elfgcchack.h" |
6939 #endif /* LIBXML_HTML_ENABLED */ | 7087 #endif /* LIBXML_HTML_ENABLED */ |
OLD | NEW |