| OLD | NEW |
| 1 /* | 1 /* |
| 2 * HTMLparser.c : an HTML 4.0 non-verifying parser | 2 * HTMLparser.c : an HTML 4.0 non-verifying parser |
| 3 * | 3 * |
| 4 * See Copyright for the status of this software. | 4 * See Copyright for the status of this software. |
| 5 * | 5 * |
| 6 * daniel@veillard.com | 6 * daniel@veillard.com |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 #define IN_LIBXML | 9 #define IN_LIBXML |
| 10 #include "libxml.h" | 10 #include "libxml.h" |
| (...skipping 26 matching lines...) Expand all Loading... |
| 37 #include <libxml/xmlerror.h> | 37 #include <libxml/xmlerror.h> |
| 38 #include <libxml/HTMLparser.h> | 38 #include <libxml/HTMLparser.h> |
| 39 #include <libxml/HTMLtree.h> | 39 #include <libxml/HTMLtree.h> |
| 40 #include <libxml/entities.h> | 40 #include <libxml/entities.h> |
| 41 #include <libxml/encoding.h> | 41 #include <libxml/encoding.h> |
| 42 #include <libxml/valid.h> | 42 #include <libxml/valid.h> |
| 43 #include <libxml/xmlIO.h> | 43 #include <libxml/xmlIO.h> |
| 44 #include <libxml/globals.h> | 44 #include <libxml/globals.h> |
| 45 #include <libxml/uri.h> | 45 #include <libxml/uri.h> |
| 46 | 46 |
| 47 #include "buf.h" |
| 48 #include "enc.h" |
| 49 |
| 47 #define HTML_MAX_NAMELEN 1000 | 50 #define HTML_MAX_NAMELEN 1000 |
| 48 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 | 51 #define HTML_PARSER_BIG_BUFFER_SIZE 1000 |
| 49 #define HTML_PARSER_BUFFER_SIZE 100 | 52 #define HTML_PARSER_BUFFER_SIZE 100 |
| 50 | 53 |
| 51 /* #define DEBUG */ | 54 /* #define DEBUG */ |
| 52 /* #define DEBUG_PUSH */ | 55 /* #define DEBUG_PUSH */ |
| 53 | 56 |
| 54 static int htmlOmittedDefaultValue = 1; | 57 static int htmlOmittedDefaultValue = 1; |
| 55 | 58 |
| 56 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, | 59 xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len, |
| (...skipping 663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 720 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec
ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma
p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep
t", NULL } ; | 723 static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "chec
ked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "isma
p", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accep
t", NULL } ; |
| 721 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; | 724 static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ; |
| 722 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus",
"onblur", NULL } ; | 725 static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus",
"onblur", NULL } ; |
| 723 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; | 726 static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ; |
| 724 static const char* const align_attr[] = { "align", NULL } ; | 727 static const char* const align_attr[] = { "align", NULL } ; |
| 725 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang",
"type", "rel", "rev", "media", NULL } ; | 728 static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang",
"type", "rel", "rev", "media", NULL } ; |
| 726 static const char* const map_contents[] = { BLOCK, "area", NULL } ; | 729 static const char* const map_contents[] = { BLOCK, "area", NULL } ; |
| 727 static const char* const name_attr[] = { "name", NULL } ; | 730 static const char* const name_attr[] = { "name", NULL } ; |
| 728 static const char* const action_attr[] = { "action", NULL } ; | 731 static const char* const action_attr[] = { "action", NULL } ; |
| 729 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; | 732 static const char* const blockli_elt[] = { BLOCK, "li", NULL } ; |
| 730 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme",
NULL } ; | 733 static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme",
"charset", NULL } ; |
| 731 static const char* const content_attr[] = { "content", NULL } ; | 734 static const char* const content_attr[] = { "content", NULL } ; |
| 732 static const char* const type_attr[] = { "type", NULL } ; | 735 static const char* const type_attr[] = { "type", NULL } ; |
| 733 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; | 736 static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ; |
| 734 static const char* const object_contents[] = { FLOW, "param", NULL } ; | 737 static const char* const object_contents[] = { FLOW, "param", NULL } ; |
| 735 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba
se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema
p", "name", "tabindex", NULL } ; | 738 static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codeba
se", "data", "type", "codetype", "archive", "standby", "height", "width", "usema
p", "name", "tabindex", NULL } ; |
| 736 static const char* const object_depr[] = { "align", "border", "hspace", "vspace"
, NULL } ; | 739 static const char* const object_depr[] = { "align", "border", "hspace", "vspace"
, NULL } ; |
| 737 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; | 740 static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ; |
| 738 static const char* const option_elt[] = { "option", NULL } ; | 741 static const char* const option_elt[] = { "option", NULL } ; |
| 739 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; | 742 static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ; |
| 740 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; | 743 static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selecte
d", "value", NULL } ; |
| (...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1073 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1076 "dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
| 1074 "head", "dd", NULL, | 1077 "head", "dd", NULL, |
| 1075 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", | 1078 "dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", |
| 1076 "head", "dt", NULL, | 1079 "head", "dt", NULL, |
| 1077 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", | 1080 "ul", "p", "head", "ol", "menu", "dir", "address", "pre", |
| 1078 "listing", "xmp", NULL, | 1081 "listing", "xmp", NULL, |
| 1079 "ol", "p", "head", "ul", NULL, | 1082 "ol", "p", "head", "ul", NULL, |
| 1080 "menu", "p", "head", "ul", NULL, | 1083 "menu", "p", "head", "ul", NULL, |
| 1081 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, | 1084 "p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL
, |
| 1082 "div", "p", "head", NULL, | 1085 "div", "p", "head", NULL, |
| 1083 "noscript",» "p", "head", NULL, | 1086 "noscript",» "p", NULL, |
| 1084 "center", "font", "b", "i", "p", "head", NULL, | 1087 "center", "font", "b", "i", "p", "head", NULL, |
| 1085 "a",» » "a", NULL, | 1088 "a",» » "a", "head", NULL, |
| 1086 "caption", "p", NULL, | 1089 "caption", "p", NULL, |
| 1087 "colgroup", "caption", "colgroup", "col", "p", NULL, | 1090 "colgroup", "caption", "colgroup", "col", "p", NULL, |
| 1088 "col", "caption", "col", "p", NULL, | 1091 "col", "caption", "col", "p", NULL, |
| 1089 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", | 1092 "table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", |
| 1090 "listing", "xmp", "a", NULL, | 1093 "listing", "xmp", "a", NULL, |
| 1091 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1094 "th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
| 1092 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, | 1095 "td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, |
| 1093 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, | 1096 "tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, |
| 1094 "thead", "caption", "col", "colgroup", NULL, | 1097 "thead", "caption", "col", "colgroup", NULL, |
| 1095 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1098 "tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
| 1096 "tbody", "p", NULL, | 1099 "tbody", "p", NULL, |
| 1097 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", | 1100 "tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", |
| 1098 "tfoot", "tbody", "p", NULL, | 1101 "tfoot", "tbody", "p", NULL, |
| 1099 "optgroup", "option", NULL, | 1102 "optgroup", "option", NULL, |
| 1100 "option", "option", NULL, | 1103 "option", "option", NULL, |
| 1101 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", | 1104 "fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", |
| 1102 "pre", "listing", "xmp", "a", NULL, | 1105 "pre", "listing", "xmp", "a", NULL, |
| 1106 /* most tags in in FONTSTYLE, PHRASE and SPECIAL should close <head> */ |
| 1107 "tt", "head", NULL, |
| 1108 "i", "head", NULL, |
| 1109 "b", "head", NULL, |
| 1110 "u", "head", NULL, |
| 1111 "s", "head", NULL, |
| 1112 "strike", "head", NULL, |
| 1113 "big", "head", NULL, |
| 1114 "small", "head", NULL, |
| 1115 |
| 1116 "em", "head", NULL, |
| 1117 "strong", "head", NULL, |
| 1118 "dfn", "head", NULL, |
| 1119 "code", "head", NULL, |
| 1120 "samp", "head", NULL, |
| 1121 "kbd", "head", NULL, |
| 1122 "var", "head", NULL, |
| 1123 "cite", "head", NULL, |
| 1124 "abbr", "head", NULL, |
| 1125 "acronym", "head", NULL, |
| 1126 |
| 1127 /* "a" */ |
| 1128 "img", "head", NULL, |
| 1129 /* "applet" */ |
| 1130 /* "embed" */ |
| 1131 /* "object" */ |
| 1132 "font", "head", NULL, |
| 1133 /* "basefont" */ |
| 1134 "br", "head", NULL, |
| 1135 /* "script" */ |
| 1136 "map", "head", NULL, |
| 1137 "q", "head", NULL, |
| 1138 "sub", "head", NULL, |
| 1139 "sup", "head", NULL, |
| 1140 "span", "head", NULL, |
| 1141 "bdo", "head", NULL, |
| 1142 "iframe", "head", NULL, |
| 1103 NULL | 1143 NULL |
| 1104 }; | 1144 }; |
| 1105 | 1145 |
| 1106 /* | 1146 /* |
| 1107 * The list of HTML elements which are supposed not to have | 1147 * The list of HTML elements which are supposed not to have |
| 1108 * CDATA content and where a p element will be implied | 1148 * CDATA content and where a p element will be implied |
| 1109 * | 1149 * |
| 1110 * TODO: extend that list by reading the HTML SGML DTD on | 1150 * TODO: extend that list by reading the HTML SGML DTD on |
| 1111 * implied paragraph | 1151 * implied paragraph |
| 1112 */ | 1152 */ |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1130 "onmousemove", | 1170 "onmousemove", |
| 1131 "onmouseout", | 1171 "onmouseout", |
| 1132 "onkeypress", | 1172 "onkeypress", |
| 1133 "onkeydown", | 1173 "onkeydown", |
| 1134 "onkeyup", | 1174 "onkeyup", |
| 1135 "onload", | 1175 "onload", |
| 1136 "onunload", | 1176 "onunload", |
| 1137 "onfocus", | 1177 "onfocus", |
| 1138 "onblur", | 1178 "onblur", |
| 1139 "onsubmit", | 1179 "onsubmit", |
| 1140 "onrest", | 1180 "onreset", |
| 1141 "onchange", | 1181 "onchange", |
| 1142 "onselect" | 1182 "onselect" |
| 1143 }; | 1183 }; |
| 1144 | 1184 |
| 1145 /* | 1185 /* |
| 1146 * This table is used by the htmlparser to know what to do with | 1186 * This table is used by the htmlparser to know what to do with |
| 1147 * broken html pages. By assigning different priorities to different | 1187 * broken html pages. By assigning different priorities to different |
| 1148 * elements the parser can decide how to handle extra endtags. | 1188 * elements the parser can decide how to handle extra endtags. |
| 1149 * Endtags are only allowed to close elements with lower or equal | 1189 * Endtags are only allowed to close elements with lower or equal |
| 1150 * priority. | 1190 * priority. |
| (...skipping 1729 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2880 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2920 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2881 } | 2921 } |
| 2882 nbchar = 0; | 2922 nbchar = 0; |
| 2883 } | 2923 } |
| 2884 GROW; | 2924 GROW; |
| 2885 NEXTL(l); | 2925 NEXTL(l); |
| 2886 cur = CUR_CHAR(l); | 2926 cur = CUR_CHAR(l); |
| 2887 } | 2927 } |
| 2888 | 2928 |
| 2889 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { | 2929 if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { |
| 2890 » htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, | 2930 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, |
| 2891 » "Invalid char in CDATA 0x%X\n", cur); | 2931 "Invalid char in CDATA 0x%X\n", cur); |
| 2892 » NEXT; | 2932 if (ctxt->input->cur < ctxt->input->end) { |
| 2933 NEXT; |
| 2934 } |
| 2893 } | 2935 } |
| 2894 | 2936 |
| 2895 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2937 if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| 2896 if (ctxt->sax->cdataBlock!= NULL) { | 2938 if (ctxt->sax->cdataBlock!= NULL) { |
| 2897 /* | 2939 /* |
| 2898 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE | 2940 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE |
| 2899 */ | 2941 */ |
| 2900 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); | 2942 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar); |
| 2901 } else if (ctxt->sax->characters != NULL) { | 2943 } else if (ctxt->sax->characters != NULL) { |
| 2902 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2944 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| (...skipping 29 matching lines...) Expand all Loading... |
| 2932 "Invalid char in CDATA 0x%X\n", cur); | 2974 "Invalid char in CDATA 0x%X\n", cur); |
| 2933 } else { | 2975 } else { |
| 2934 COPY_BUF(l,buf,nbchar,cur); | 2976 COPY_BUF(l,buf,nbchar,cur); |
| 2935 } | 2977 } |
| 2936 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { | 2978 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { |
| 2937 /* | 2979 /* |
| 2938 * Ok the segment is to be consumed as chars. | 2980 * Ok the segment is to be consumed as chars. |
| 2939 */ | 2981 */ |
| 2940 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 2982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| 2941 if (areBlanks(ctxt, buf, nbchar)) { | 2983 if (areBlanks(ctxt, buf, nbchar)) { |
| 2942 » » if (ctxt->sax->ignorableWhitespace != NULL) | 2984 » » if (ctxt->keepBlanks) { |
| 2943 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, | 2985 » » » if (ctxt->sax->characters != NULL) |
| 2944 » » » buf, nbchar); | 2986 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2987 » » } else { |
| 2988 » » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 2989 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, |
| 2990 » » » buf, nbchar); |
| 2991 » » } |
| 2945 } else { | 2992 } else { |
| 2946 htmlCheckParagraph(ctxt); | 2993 htmlCheckParagraph(ctxt); |
| 2947 if (ctxt->sax->characters != NULL) | 2994 if (ctxt->sax->characters != NULL) |
| 2948 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 2995 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2949 } | 2996 } |
| 2950 } | 2997 } |
| 2951 nbchar = 0; | 2998 nbchar = 0; |
| 2952 } | 2999 } |
| 2953 NEXTL(l); | 3000 NEXTL(l); |
| 2954 chunk++; | 3001 chunk++; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 2965 } | 3012 } |
| 2966 } | 3013 } |
| 2967 if (nbchar != 0) { | 3014 if (nbchar != 0) { |
| 2968 buf[nbchar] = 0; | 3015 buf[nbchar] = 0; |
| 2969 | 3016 |
| 2970 /* | 3017 /* |
| 2971 * Ok the segment is to be consumed as chars. | 3018 * Ok the segment is to be consumed as chars. |
| 2972 */ | 3019 */ |
| 2973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { | 3020 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| 2974 if (areBlanks(ctxt, buf, nbchar)) { | 3021 if (areBlanks(ctxt, buf, nbchar)) { |
| 2975 » » if (ctxt->sax->ignorableWhitespace != NULL) | 3022 » » if (ctxt->keepBlanks) { |
| 2976 » » ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); | 3023 » » if (ctxt->sax->characters != NULL) |
| 3024 » » » ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 3025 » » } else { |
| 3026 » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 3027 » » » ctxt->sax->ignorableWhitespace(ctxt->userData, |
| 3028 » » » buf, nbchar); |
| 3029 » » } |
| 2977 } else { | 3030 } else { |
| 2978 htmlCheckParagraph(ctxt); | 3031 htmlCheckParagraph(ctxt); |
| 2979 if (ctxt->sax->characters != NULL) | 3032 if (ctxt->sax->characters != NULL) |
| 2980 ctxt->sax->characters(ctxt->userData, buf, nbchar); | 3033 ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| 2981 } | 3034 } |
| 2982 } | 3035 } |
| 2983 } else { | 3036 } else { |
| 2984 /* | 3037 /* |
| 2985 * Loop detection | 3038 * Loop detection |
| 2986 */ | 3039 */ |
| (...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3268 SKIP(3); | 3321 SKIP(3); |
| 3269 while (CUR != ';') { | 3322 while (CUR != ';') { |
| 3270 if ((CUR >= '0') && (CUR <= '9')) | 3323 if ((CUR >= '0') && (CUR <= '9')) |
| 3271 val = val * 16 + (CUR - '0'); | 3324 val = val * 16 + (CUR - '0'); |
| 3272 else if ((CUR >= 'a') && (CUR <= 'f')) | 3325 else if ((CUR >= 'a') && (CUR <= 'f')) |
| 3273 val = val * 16 + (CUR - 'a') + 10; | 3326 val = val * 16 + (CUR - 'a') + 10; |
| 3274 else if ((CUR >= 'A') && (CUR <= 'F')) | 3327 else if ((CUR >= 'A') && (CUR <= 'F')) |
| 3275 val = val * 16 + (CUR - 'A') + 10; | 3328 val = val * 16 + (CUR - 'A') + 10; |
| 3276 else { | 3329 else { |
| 3277 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, | 3330 htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, |
| 3278 » » "htmlParseCharRef: missing semicolumn\n", | 3331 » » "htmlParseCharRef: missing semicolon\n", |
| 3279 NULL, NULL); | 3332 NULL, NULL); |
| 3280 break; | 3333 break; |
| 3281 } | 3334 } |
| 3282 NEXT; | 3335 NEXT; |
| 3283 } | 3336 } |
| 3284 if (CUR == ';') | 3337 if (CUR == ';') |
| 3285 NEXT; | 3338 NEXT; |
| 3286 } else if ((CUR == '&') && (NXT(1) == '#')) { | 3339 } else if ((CUR == '&') && (NXT(1) == '#')) { |
| 3287 SKIP(2); | 3340 SKIP(2); |
| 3288 while (CUR != ';') { | 3341 while (CUR != ';') { |
| 3289 if ((CUR >= '0') && (CUR <= '9')) | 3342 if ((CUR >= '0') && (CUR <= '9')) |
| 3290 val = val * 10 + (CUR - '0'); | 3343 val = val * 10 + (CUR - '0'); |
| 3291 else { | 3344 else { |
| 3292 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, | 3345 htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, |
| 3293 » » "htmlParseCharRef: missing semicolumn\n", | 3346 » » "htmlParseCharRef: missing semicolon\n", |
| 3294 NULL, NULL); | 3347 NULL, NULL); |
| 3295 break; | 3348 break; |
| 3296 } | 3349 } |
| 3297 NEXT; | 3350 NEXT; |
| 3298 } | 3351 } |
| 3299 if (CUR == ';') | 3352 if (CUR == ';') |
| 3300 NEXT; | 3353 NEXT; |
| 3301 } else { | 3354 } else { |
| 3302 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, | 3355 htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF, |
| 3303 "htmlParseCharRef: invalid value\n", NULL, NULL); | 3356 "htmlParseCharRef: invalid value\n", NULL, NULL); |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3426 NEXT; | 3479 NEXT; |
| 3427 SKIP_BLANKS; | 3480 SKIP_BLANKS; |
| 3428 val = htmlParseAttValue(ctxt); | 3481 val = htmlParseAttValue(ctxt); |
| 3429 } | 3482 } |
| 3430 | 3483 |
| 3431 *value = val; | 3484 *value = val; |
| 3432 return(name); | 3485 return(name); |
| 3433 } | 3486 } |
| 3434 | 3487 |
| 3435 /** | 3488 /** |
| 3436 * htmlCheckEncoding: | 3489 * htmlCheckEncodingDirect: |
| 3437 * @ctxt: an HTML parser context | 3490 * @ctxt: an HTML parser context |
| 3438 * @attvalue: the attribute value | 3491 * @attvalue: the attribute value |
| 3439 * | 3492 * |
| 3440 * Checks an http-equiv attribute from a Meta tag to detect | 3493 * Checks an attribute value to detect |
| 3441 * the encoding | 3494 * the encoding |
| 3442 * If a new encoding is detected the parser is switched to decode | 3495 * If a new encoding is detected the parser is switched to decode |
| 3443 * it and pass UTF8 | 3496 * it and pass UTF8 |
| 3444 */ | 3497 */ |
| 3445 static void | 3498 static void |
| 3446 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { | 3499 htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) { |
| 3447 const xmlChar *encoding; | |
| 3448 | 3500 |
| 3449 if ((ctxt == NULL) || (attvalue == NULL)) | 3501 if ((ctxt == NULL) || (encoding == NULL) || |
| 3502 (ctxt->options & HTML_PARSE_IGNORE_ENC)) |
| 3450 return; | 3503 return; |
| 3451 | 3504 |
| 3452 /* do not change encoding */ | 3505 /* do not change encoding */ |
| 3453 if (ctxt->input->encoding != NULL) | 3506 if (ctxt->input->encoding != NULL) |
| 3454 return; | 3507 return; |
| 3455 | 3508 |
| 3456 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset="); | |
| 3457 if (encoding != NULL) { | |
| 3458 encoding += 8; | |
| 3459 } else { | |
| 3460 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset ="); | |
| 3461 if (encoding != NULL) | |
| 3462 encoding += 9; | |
| 3463 } | |
| 3464 if (encoding != NULL) { | 3509 if (encoding != NULL) { |
| 3465 xmlCharEncoding enc; | 3510 xmlCharEncoding enc; |
| 3466 xmlCharEncodingHandlerPtr handler; | 3511 xmlCharEncodingHandlerPtr handler; |
| 3467 | 3512 |
| 3468 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; | 3513 while ((*encoding == ' ') || (*encoding == '\t')) encoding++; |
| 3469 | 3514 |
| 3470 if (ctxt->input->encoding != NULL) | 3515 if (ctxt->input->encoding != NULL) |
| 3471 xmlFree((xmlChar *) ctxt->input->encoding); | 3516 xmlFree((xmlChar *) ctxt->input->encoding); |
| 3472 ctxt->input->encoding = xmlStrdup(encoding); | 3517 ctxt->input->encoding = xmlStrdup(encoding); |
| 3473 | 3518 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 3491 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 3536 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
| 3492 } else { | 3537 } else { |
| 3493 /* | 3538 /* |
| 3494 * fallback for unknown encodings | 3539 * fallback for unknown encodings |
| 3495 */ | 3540 */ |
| 3496 handler = xmlFindCharEncodingHandler((const char *) encoding); | 3541 handler = xmlFindCharEncodingHandler((const char *) encoding); |
| 3497 if (handler != NULL) { | 3542 if (handler != NULL) { |
| 3498 xmlSwitchToEncoding(ctxt, handler); | 3543 xmlSwitchToEncoding(ctxt, handler); |
| 3499 ctxt->charset = XML_CHAR_ENCODING_UTF8; | 3544 ctxt->charset = XML_CHAR_ENCODING_UTF8; |
| 3500 } else { | 3545 } else { |
| 3501 » » ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING; | 3546 » » htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING, |
| 3547 » » "htmlCheckEncoding: unknown encoding %s\n", |
| 3548 » » » encoding, NULL); |
| 3502 } | 3549 } |
| 3503 } | 3550 } |
| 3504 | 3551 |
| 3505 if ((ctxt->input->buf != NULL) && | 3552 if ((ctxt->input->buf != NULL) && |
| 3506 (ctxt->input->buf->encoder != NULL) && | 3553 (ctxt->input->buf->encoder != NULL) && |
| 3507 (ctxt->input->buf->raw != NULL) && | 3554 (ctxt->input->buf->raw != NULL) && |
| 3508 (ctxt->input->buf->buffer != NULL)) { | 3555 (ctxt->input->buf->buffer != NULL)) { |
| 3509 int nbchars; | 3556 int nbchars; |
| 3510 int processed; | 3557 int processed; |
| 3511 | 3558 |
| 3512 /* | 3559 /* |
| 3513 * convert as much as possible to the parser reading buffer. | 3560 * convert as much as possible to the parser reading buffer. |
| 3514 */ | 3561 */ |
| 3515 processed = ctxt->input->cur - ctxt->input->base; | 3562 processed = ctxt->input->cur - ctxt->input->base; |
| 3516 » xmlBufferShrink(ctxt->input->buf->buffer, processed); | 3563 » xmlBufShrink(ctxt->input->buf->buffer, processed); |
| 3517 » nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder, | 3564 » nbchars = xmlCharEncInput(ctxt->input->buf, 1); |
| 3518 » » ctxt->input->buf->buffer, | |
| 3519 » » » » ctxt->input->buf->raw); | |
| 3520 if (nbchars < 0) { | 3565 if (nbchars < 0) { |
| 3521 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 3566 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 3522 "htmlCheckEncoding: encoder error\n", | 3567 "htmlCheckEncoding: encoder error\n", |
| 3523 NULL, NULL); | 3568 NULL, NULL); |
| 3524 } | 3569 } |
| 3525 » ctxt->input->base = | 3570 xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input); |
| 3526 » ctxt->input->cur = ctxt->input->buf->buffer->content; | |
| 3527 ctxt->input->end = | |
| 3528 &ctxt->input->base[ctxt->input->buf->buffer->use]; | |
| 3529 } | 3571 } |
| 3530 } | 3572 } |
| 3531 } | 3573 } |
| 3532 | 3574 |
| 3533 /** | 3575 /** |
| 3576 * htmlCheckEncoding: |
| 3577 * @ctxt: an HTML parser context |
| 3578 * @attvalue: the attribute value |
| 3579 * |
| 3580 * Checks an http-equiv attribute from a Meta tag to detect |
| 3581 * the encoding |
| 3582 * If a new encoding is detected the parser is switched to decode |
| 3583 * it and pass UTF8 |
| 3584 */ |
| 3585 static void |
| 3586 htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) { |
| 3587 const xmlChar *encoding; |
| 3588 |
| 3589 if (!attvalue) |
| 3590 return; |
| 3591 |
| 3592 encoding = xmlStrcasestr(attvalue, BAD_CAST"charset"); |
| 3593 if (encoding != NULL) { |
| 3594 encoding += 7; |
| 3595 } |
| 3596 /* |
| 3597 * skip blank |
| 3598 */ |
| 3599 if (encoding && IS_BLANK_CH(*encoding)) |
| 3600 encoding = xmlStrcasestr(attvalue, BAD_CAST"="); |
| 3601 if (encoding && *encoding == '=') { |
| 3602 encoding ++; |
| 3603 htmlCheckEncodingDirect(ctxt, encoding); |
| 3604 } |
| 3605 } |
| 3606 |
| 3607 /** |
| 3534 * htmlCheckMeta: | 3608 * htmlCheckMeta: |
| 3535 * @ctxt: an HTML parser context | 3609 * @ctxt: an HTML parser context |
| 3536 * @atts: the attributes values | 3610 * @atts: the attributes values |
| 3537 * | 3611 * |
| 3538 * Checks an attributes from a Meta tag | 3612 * Checks an attributes from a Meta tag |
| 3539 */ | 3613 */ |
| 3540 static void | 3614 static void |
| 3541 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { | 3615 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { |
| 3542 int i; | 3616 int i; |
| 3543 const xmlChar *att, *value; | 3617 const xmlChar *att, *value; |
| 3544 int http = 0; | 3618 int http = 0; |
| 3545 const xmlChar *content = NULL; | 3619 const xmlChar *content = NULL; |
| 3546 | 3620 |
| 3547 if ((ctxt == NULL) || (atts == NULL)) | 3621 if ((ctxt == NULL) || (atts == NULL)) |
| 3548 return; | 3622 return; |
| 3549 | 3623 |
| 3550 i = 0; | 3624 i = 0; |
| 3551 att = atts[i++]; | 3625 att = atts[i++]; |
| 3552 while (att != NULL) { | 3626 while (att != NULL) { |
| 3553 value = atts[i++]; | 3627 value = atts[i++]; |
| 3554 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) | 3628 if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv")) |
| 3555 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) | 3629 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) |
| 3556 http = 1; | 3630 http = 1; |
| 3631 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset"))) |
| 3632 htmlCheckEncodingDirect(ctxt, value); |
| 3557 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) | 3633 else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content"))) |
| 3558 content = value; | 3634 content = value; |
| 3559 att = atts[i++]; | 3635 att = atts[i++]; |
| 3560 } | 3636 } |
| 3561 if ((http) && (content != NULL)) | 3637 if ((http) && (content != NULL)) |
| 3562 htmlCheckEncoding(ctxt, content); | 3638 htmlCheckEncoding(ctxt, content); |
| 3563 | 3639 |
| 3564 } | 3640 } |
| 3565 | 3641 |
| 3566 /** | 3642 /** |
| (...skipping 21 matching lines...) Expand all Loading... |
| 3588 const xmlChar *name; | 3664 const xmlChar *name; |
| 3589 const xmlChar *attname; | 3665 const xmlChar *attname; |
| 3590 xmlChar *attvalue; | 3666 xmlChar *attvalue; |
| 3591 const xmlChar **atts; | 3667 const xmlChar **atts; |
| 3592 int nbatts = 0; | 3668 int nbatts = 0; |
| 3593 int maxatts; | 3669 int maxatts; |
| 3594 int meta = 0; | 3670 int meta = 0; |
| 3595 int i; | 3671 int i; |
| 3596 int discardtag = 0; | 3672 int discardtag = 0; |
| 3597 | 3673 |
| 3598 if (ctxt->instate == XML_PARSER_EOF) | |
| 3599 return(-1); | |
| 3600 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 3674 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 3601 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 3675 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 3602 "htmlParseStartTag: context error\n", NULL, NULL); | 3676 "htmlParseStartTag: context error\n", NULL, NULL); |
| 3603 return -1; | 3677 return -1; |
| 3604 } | 3678 } |
| 3679 if (ctxt->instate == XML_PARSER_EOF) |
| 3680 return(-1); |
| 3605 if (CUR != '<') return -1; | 3681 if (CUR != '<') return -1; |
| 3606 NEXT; | 3682 NEXT; |
| 3607 | 3683 |
| 3608 atts = ctxt->atts; | 3684 atts = ctxt->atts; |
| 3609 maxatts = ctxt->maxatts; | 3685 maxatts = ctxt->maxatts; |
| 3610 | 3686 |
| 3611 GROW; | 3687 GROW; |
| 3612 name = htmlParseHTMLName(ctxt); | 3688 name = htmlParseHTMLName(ctxt); |
| 3613 if (name == NULL) { | 3689 if (name == NULL) { |
| 3614 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, | 3690 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| (...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3876 } | 3952 } |
| 3877 } | 3953 } |
| 3878 | 3954 |
| 3879 /* | 3955 /* |
| 3880 * SAX: End of Tag | 3956 * SAX: End of Tag |
| 3881 */ | 3957 */ |
| 3882 oldname = ctxt->name; | 3958 oldname = ctxt->name; |
| 3883 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { | 3959 if ((oldname != NULL) && (xmlStrEqual(oldname, name))) { |
| 3884 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 3960 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 3885 ctxt->sax->endElement(ctxt->userData, name); | 3961 ctxt->sax->endElement(ctxt->userData, name); |
| 3962 htmlNodeInfoPop(ctxt); |
| 3886 htmlnamePop(ctxt); | 3963 htmlnamePop(ctxt); |
| 3887 ret = 1; | 3964 ret = 1; |
| 3888 } else { | 3965 } else { |
| 3889 ret = 0; | 3966 ret = 0; |
| 3890 } | 3967 } |
| 3891 | 3968 |
| 3892 return (ret); | 3969 return (ret); |
| 3893 } | 3970 } |
| 3894 | 3971 |
| 3895 | 3972 |
| (...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4282 * | 4359 * |
| 4283 * [39] element ::= EmptyElemTag | STag content ETag | 4360 * [39] element ::= EmptyElemTag | STag content ETag |
| 4284 * | 4361 * |
| 4285 * [41] Attribute ::= Name Eq AttValue | 4362 * [41] Attribute ::= Name Eq AttValue |
| 4286 */ | 4363 */ |
| 4287 | 4364 |
| 4288 static void | 4365 static void |
| 4289 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { | 4366 htmlParseElementInternal(htmlParserCtxtPtr ctxt) { |
| 4290 const xmlChar *name; | 4367 const xmlChar *name; |
| 4291 const htmlElemDesc * info; | 4368 const htmlElemDesc * info; |
| 4292 htmlParserNodeInfo node_info; | 4369 htmlParserNodeInfo node_info = { 0, }; |
| 4293 int failed; | 4370 int failed; |
| 4294 | 4371 |
| 4295 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 4372 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 4296 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 4373 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 4297 "htmlParseElementInternal: context error\n", NULL, NULL); | 4374 "htmlParseElementInternal: context error\n", NULL, NULL); |
| 4298 return; | 4375 return; |
| 4299 } | 4376 } |
| 4300 | 4377 |
| 4301 if (ctxt->instate == XML_PARSER_EOF) | 4378 if (ctxt->instate == XML_PARSER_EOF) |
| 4302 return; | 4379 return; |
| (...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4663 if (CUR == 0) | 4740 if (CUR == 0) |
| 4664 htmlAutoCloseOnEnd(ctxt); | 4741 htmlAutoCloseOnEnd(ctxt); |
| 4665 | 4742 |
| 4666 | 4743 |
| 4667 /* | 4744 /* |
| 4668 * SAX: end of the document processing. | 4745 * SAX: end of the document processing. |
| 4669 */ | 4746 */ |
| 4670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 4747 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 4671 ctxt->sax->endDocument(ctxt->userData); | 4748 ctxt->sax->endDocument(ctxt->userData); |
| 4672 | 4749 |
| 4673 if (ctxt->myDoc != NULL) { | 4750 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { |
| 4674 dtd = xmlGetIntSubset(ctxt->myDoc); | 4751 dtd = xmlGetIntSubset(ctxt->myDoc); |
| 4675 if (dtd == NULL) | 4752 if (dtd == NULL) |
| 4676 ctxt->myDoc->intSubset = | 4753 ctxt->myDoc->intSubset = |
| 4677 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 4754 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
| 4678 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 4755 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
| 4679 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 4756 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
| 4680 } | 4757 } |
| 4681 if (! ctxt->wellFormed) return(-1); | 4758 if (! ctxt->wellFormed) return(-1); |
| 4682 return(0); | 4759 return(0); |
| 4683 } | 4760 } |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4868 if (buf == NULL) return(NULL); | 4945 if (buf == NULL) return(NULL); |
| 4869 | 4946 |
| 4870 input = xmlNewInputStream(ctxt); | 4947 input = xmlNewInputStream(ctxt); |
| 4871 if (input == NULL) { | 4948 if (input == NULL) { |
| 4872 xmlFreeParserCtxt(ctxt); | 4949 xmlFreeParserCtxt(ctxt); |
| 4873 return(NULL); | 4950 return(NULL); |
| 4874 } | 4951 } |
| 4875 | 4952 |
| 4876 input->filename = NULL; | 4953 input->filename = NULL; |
| 4877 input->buf = buf; | 4954 input->buf = buf; |
| 4878 input->base = input->buf->buffer->content; | 4955 xmlBufResetInput(buf->buffer, input); |
| 4879 input->cur = input->buf->buffer->content; | |
| 4880 input->end = &input->buf->buffer->content[input->buf->buffer->use]; | |
| 4881 | 4956 |
| 4882 inputPush(ctxt, input); | 4957 inputPush(ctxt, input); |
| 4883 return(ctxt); | 4958 return(ctxt); |
| 4884 } | 4959 } |
| 4885 | 4960 |
| 4886 /** | 4961 /** |
| 4887 * htmlCreateDocParserCtxt: | 4962 * htmlCreateDocParserCtxt: |
| 4888 * @cur: a pointer to an array of xmlChar | 4963 * @cur: a pointer to an array of xmlChar |
| 4889 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 4964 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| 4890 * | 4965 * |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4987 if (base < 0) | 5062 if (base < 0) |
| 4988 return (-1); | 5063 return (-1); |
| 4989 | 5064 |
| 4990 if (ctxt->checkIndex > base) | 5065 if (ctxt->checkIndex > base) |
| 4991 base = ctxt->checkIndex; | 5066 base = ctxt->checkIndex; |
| 4992 | 5067 |
| 4993 if (in->buf == NULL) { | 5068 if (in->buf == NULL) { |
| 4994 buf = in->base; | 5069 buf = in->base; |
| 4995 len = in->length; | 5070 len = in->length; |
| 4996 } else { | 5071 } else { |
| 4997 buf = in->buf->buffer->content; | 5072 buf = xmlBufContent(in->buf->buffer); |
| 4998 len = in->buf->buffer->use; | 5073 len = xmlBufUse(in->buf->buffer); |
| 4999 } | 5074 } |
| 5000 | 5075 |
| 5001 /* take into account the sequence length */ | 5076 /* take into account the sequence length */ |
| 5002 if (third) | 5077 if (third) |
| 5003 len -= 2; | 5078 len -= 2; |
| 5004 else if (next) | 5079 else if (next) |
| 5005 len--; | 5080 len--; |
| 5006 for (; base < len; base++) { | 5081 for (; base < len; base++) { |
| 5007 if ((!incomment) && (base + 4 < len) && (!iscomment)) { | 5082 if ((!incomment) && (base + 4 < len) && (!iscomment)) { |
| 5008 if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5083 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5080 #endif | 5155 #endif |
| 5081 return (-1); | 5156 return (-1); |
| 5082 } | 5157 } |
| 5083 | 5158 |
| 5084 /** | 5159 /** |
| 5085 * htmlParseLookupChars: | 5160 * htmlParseLookupChars: |
| 5086 * @ctxt: an HTML parser context | 5161 * @ctxt: an HTML parser context |
| 5087 * @stop: Array of chars, which stop the lookup. | 5162 * @stop: Array of chars, which stop the lookup. |
| 5088 * @stopLen: Length of stop-Array | 5163 * @stopLen: Length of stop-Array |
| 5089 * | 5164 * |
| 5090 * Try to find if any char of the stop-Array is available in the input | 5165 * Try to find if any char of the stop-Array is available in the input |
| 5091 * stream. | 5166 * stream. |
| 5092 * This function has a side effect of (possibly) incrementing ctxt->checkIndex | 5167 * This function has a side effect of (possibly) incrementing ctxt->checkIndex |
| 5093 * to avoid rescanning sequences of bytes, it DOES change the state of the | 5168 * to avoid rescanning sequences of bytes, it DOES change the state of the |
| 5094 * parser, do not use liberally. | 5169 * parser, do not use liberally. |
| 5095 * | 5170 * |
| 5096 * Returns the index to the current parsing point if a stopChar | 5171 * Returns the index to the current parsing point if a stopChar |
| 5097 * is available, -1 otherwise. | 5172 * is available, -1 otherwise. |
| 5098 */ | 5173 */ |
| 5099 static int | 5174 static int |
| 5100 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, | 5175 htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, |
| 5101 int stopLen) | 5176 int stopLen) |
| 5102 { | 5177 { |
| 5103 int base, len; | 5178 int base, len; |
| 5104 htmlParserInputPtr in; | 5179 htmlParserInputPtr in; |
| 5105 const xmlChar *buf; | 5180 const xmlChar *buf; |
| 5106 int incomment = 0; | 5181 int incomment = 0; |
| 5107 int i; | 5182 int i; |
| 5108 | 5183 |
| 5109 in = ctxt->input; | 5184 in = ctxt->input; |
| 5110 if (in == NULL) | 5185 if (in == NULL) |
| 5111 return (-1); | 5186 return (-1); |
| 5112 | 5187 |
| 5113 base = in->cur - in->base; | 5188 base = in->cur - in->base; |
| 5114 if (base < 0) | 5189 if (base < 0) |
| 5115 return (-1); | 5190 return (-1); |
| 5116 | 5191 |
| 5117 if (ctxt->checkIndex > base) | 5192 if (ctxt->checkIndex > base) |
| 5118 base = ctxt->checkIndex; | 5193 base = ctxt->checkIndex; |
| 5119 | 5194 |
| 5120 if (in->buf == NULL) { | 5195 if (in->buf == NULL) { |
| 5121 buf = in->base; | 5196 buf = in->base; |
| 5122 len = in->length; | 5197 len = in->length; |
| 5123 } else { | 5198 } else { |
| 5124 buf = in->buf->buffer->content; | 5199 buf = xmlBufContent(in->buf->buffer); |
| 5125 len = in->buf->buffer->use; | 5200 len = xmlBufUse(in->buf->buffer); |
| 5126 } | 5201 } |
| 5127 | 5202 |
| 5128 for (; base < len; base++) { | 5203 for (; base < len; base++) { |
| 5129 if (!incomment && (base + 4 < len)) { | 5204 if (!incomment && (base + 4 < len)) { |
| 5130 if ((buf[base] == '<') && (buf[base + 1] == '!') && | 5205 if ((buf[base] == '<') && (buf[base + 1] == '!') && |
| 5131 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { | 5206 (buf[base + 2] == '-') && (buf[base + 3] == '-')) { |
| 5132 incomment = 1; | 5207 incomment = 1; |
| 5133 /* do not increment past <! - some people use <!--> */ | 5208 /* do not increment past <! - some people use <!--> */ |
| 5134 base += 2; | 5209 base += 2; |
| 5135 } | 5210 } |
| (...skipping 28 matching lines...) Expand all Loading... |
| 5164 * | 5239 * |
| 5165 * Returns zero if no parsing was possible | 5240 * Returns zero if no parsing was possible |
| 5166 */ | 5241 */ |
| 5167 static int | 5242 static int |
| 5168 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { | 5243 htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { |
| 5169 int ret = 0; | 5244 int ret = 0; |
| 5170 htmlParserInputPtr in; | 5245 htmlParserInputPtr in; |
| 5171 int avail = 0; | 5246 int avail = 0; |
| 5172 xmlChar cur, next; | 5247 xmlChar cur, next; |
| 5173 | 5248 |
| 5249 htmlParserNodeInfo node_info; |
| 5250 |
| 5174 #ifdef DEBUG_PUSH | 5251 #ifdef DEBUG_PUSH |
| 5175 switch (ctxt->instate) { | 5252 switch (ctxt->instate) { |
| 5176 case XML_PARSER_EOF: | 5253 case XML_PARSER_EOF: |
| 5177 xmlGenericError(xmlGenericErrorContext, | 5254 xmlGenericError(xmlGenericErrorContext, |
| 5178 "HPP: try EOF\n"); break; | 5255 "HPP: try EOF\n"); break; |
| 5179 case XML_PARSER_START: | 5256 case XML_PARSER_START: |
| 5180 xmlGenericError(xmlGenericErrorContext, | 5257 xmlGenericError(xmlGenericErrorContext, |
| 5181 "HPP: try START\n"); break; | 5258 "HPP: try START\n"); break; |
| 5182 case XML_PARSER_MISC: | 5259 case XML_PARSER_MISC: |
| 5183 xmlGenericError(xmlGenericErrorContext, | 5260 xmlGenericError(xmlGenericErrorContext, |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5224 } | 5301 } |
| 5225 #endif | 5302 #endif |
| 5226 | 5303 |
| 5227 while (1) { | 5304 while (1) { |
| 5228 | 5305 |
| 5229 in = ctxt->input; | 5306 in = ctxt->input; |
| 5230 if (in == NULL) break; | 5307 if (in == NULL) break; |
| 5231 if (in->buf == NULL) | 5308 if (in->buf == NULL) |
| 5232 avail = in->length - (in->cur - in->base); | 5309 avail = in->length - (in->cur - in->base); |
| 5233 else | 5310 else |
| 5234 » avail = in->buf->buffer->use - (in->cur - in->base); | 5311 » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| 5235 if ((avail == 0) && (terminate)) { | 5312 if ((avail == 0) && (terminate)) { |
| 5236 htmlAutoCloseOnEnd(ctxt); | 5313 htmlAutoCloseOnEnd(ctxt); |
| 5237 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5314 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
| 5238 /* | 5315 /* |
| 5239 * SAX: end of the document processing. | 5316 * SAX: end of the document processing. |
| 5240 */ | 5317 */ |
| 5241 ctxt->instate = XML_PARSER_EOF; | 5318 ctxt->instate = XML_PARSER_EOF; |
| 5242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5319 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 5243 ctxt->sax->endDocument(ctxt->userData); | 5320 ctxt->sax->endDocument(ctxt->userData); |
| 5244 } | 5321 } |
| (...skipping 15 matching lines...) Expand all Loading... |
| 5260 case XML_PARSER_START: | 5337 case XML_PARSER_START: |
| 5261 /* | 5338 /* |
| 5262 * Very first chars read from the document flow. | 5339 * Very first chars read from the document flow. |
| 5263 */ | 5340 */ |
| 5264 cur = in->cur[0]; | 5341 cur = in->cur[0]; |
| 5265 if (IS_BLANK_CH(cur)) { | 5342 if (IS_BLANK_CH(cur)) { |
| 5266 SKIP_BLANKS; | 5343 SKIP_BLANKS; |
| 5267 if (in->buf == NULL) | 5344 if (in->buf == NULL) |
| 5268 avail = in->length - (in->cur - in->base); | 5345 avail = in->length - (in->cur - in->base); |
| 5269 else | 5346 else |
| 5270 » » » avail = in->buf->buffer->use - (in->cur - in->base); | 5347 » » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base
); |
| 5271 } | 5348 } |
| 5272 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) | 5349 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) |
| 5273 ctxt->sax->setDocumentLocator(ctxt->userData, | 5350 ctxt->sax->setDocumentLocator(ctxt->userData, |
| 5274 &xmlDefaultSAXLocator); | 5351 &xmlDefaultSAXLocator); |
| 5275 if ((ctxt->sax) && (ctxt->sax->startDocument) && | 5352 if ((ctxt->sax) && (ctxt->sax->startDocument) && |
| 5276 (!ctxt->disableSAX)) | 5353 (!ctxt->disableSAX)) |
| 5277 ctxt->sax->startDocument(ctxt->userData); | 5354 ctxt->sax->startDocument(ctxt->userData); |
| 5278 | 5355 |
| 5279 cur = in->cur[0]; | 5356 cur = in->cur[0]; |
| 5280 next = in->cur[1]; | 5357 next = in->cur[1]; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 5302 xmlGenericError(xmlGenericErrorContext, | 5379 xmlGenericError(xmlGenericErrorContext, |
| 5303 "HPP: entering MISC\n"); | 5380 "HPP: entering MISC\n"); |
| 5304 #endif | 5381 #endif |
| 5305 } | 5382 } |
| 5306 break; | 5383 break; |
| 5307 case XML_PARSER_MISC: | 5384 case XML_PARSER_MISC: |
| 5308 SKIP_BLANKS; | 5385 SKIP_BLANKS; |
| 5309 if (in->buf == NULL) | 5386 if (in->buf == NULL) |
| 5310 avail = in->length - (in->cur - in->base); | 5387 avail = in->length - (in->cur - in->base); |
| 5311 else | 5388 else |
| 5312 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5389 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| 5313 » » if (avail < 2) | 5390 » » /* |
| 5391 » » * no chars in buffer |
| 5392 » » */ |
| 5393 » » if (avail < 1) |
| 5314 goto done; | 5394 goto done; |
| 5395 /* |
| 5396 * not enouth chars in buffer |
| 5397 */ |
| 5398 if (avail < 2) { |
| 5399 if (!terminate) |
| 5400 goto done; |
| 5401 else |
| 5402 next = ' '; |
| 5403 } else { |
| 5404 next = in->cur[1]; |
| 5405 } |
| 5315 cur = in->cur[0]; | 5406 cur = in->cur[0]; |
| 5316 next = in->cur[1]; | |
| 5317 if ((cur == '<') && (next == '!') && | 5407 if ((cur == '<') && (next == '!') && |
| 5318 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5408 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 5319 if ((!terminate) && | 5409 if ((!terminate) && |
| 5320 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) | 5410 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
| 5321 goto done; | 5411 goto done; |
| 5322 #ifdef DEBUG_PUSH | 5412 #ifdef DEBUG_PUSH |
| 5323 xmlGenericError(xmlGenericErrorContext, | 5413 xmlGenericError(xmlGenericErrorContext, |
| 5324 "HPP: Parsing Comment\n"); | 5414 "HPP: Parsing Comment\n"); |
| 5325 #endif | 5415 #endif |
| 5326 htmlParseComment(ctxt); | 5416 htmlParseComment(ctxt); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5362 xmlGenericError(xmlGenericErrorContext, | 5452 xmlGenericError(xmlGenericErrorContext, |
| 5363 "HPP: entering START_TAG\n"); | 5453 "HPP: entering START_TAG\n"); |
| 5364 #endif | 5454 #endif |
| 5365 } | 5455 } |
| 5366 break; | 5456 break; |
| 5367 case XML_PARSER_PROLOG: | 5457 case XML_PARSER_PROLOG: |
| 5368 SKIP_BLANKS; | 5458 SKIP_BLANKS; |
| 5369 if (in->buf == NULL) | 5459 if (in->buf == NULL) |
| 5370 avail = in->length - (in->cur - in->base); | 5460 avail = in->length - (in->cur - in->base); |
| 5371 else | 5461 else |
| 5372 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5462 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| 5373 if (avail < 2) | 5463 if (avail < 2) |
| 5374 goto done; | 5464 goto done; |
| 5375 cur = in->cur[0]; | 5465 cur = in->cur[0]; |
| 5376 next = in->cur[1]; | 5466 next = in->cur[1]; |
| 5377 if ((cur == '<') && (next == '!') && | 5467 if ((cur == '<') && (next == '!') && |
| 5378 (in->cur[2] == '-') && (in->cur[3] == '-')) { | 5468 (in->cur[2] == '-') && (in->cur[3] == '-')) { |
| 5379 if ((!terminate) && | 5469 if ((!terminate) && |
| 5380 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) | 5470 (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)
) |
| 5381 goto done; | 5471 goto done; |
| 5382 #ifdef DEBUG_PUSH | 5472 #ifdef DEBUG_PUSH |
| (...skipping 20 matching lines...) Expand all Loading... |
| 5403 #ifdef DEBUG_PUSH | 5493 #ifdef DEBUG_PUSH |
| 5404 xmlGenericError(xmlGenericErrorContext, | 5494 xmlGenericError(xmlGenericErrorContext, |
| 5405 "HPP: entering START_TAG\n"); | 5495 "HPP: entering START_TAG\n"); |
| 5406 #endif | 5496 #endif |
| 5407 } | 5497 } |
| 5408 break; | 5498 break; |
| 5409 case XML_PARSER_EPILOG: | 5499 case XML_PARSER_EPILOG: |
| 5410 if (in->buf == NULL) | 5500 if (in->buf == NULL) |
| 5411 avail = in->length - (in->cur - in->base); | 5501 avail = in->length - (in->cur - in->base); |
| 5412 else | 5502 else |
| 5413 » » avail = in->buf->buffer->use - (in->cur - in->base); | 5503 » » avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); |
| 5414 if (avail < 1) | 5504 if (avail < 1) |
| 5415 goto done; | 5505 goto done; |
| 5416 cur = in->cur[0]; | 5506 cur = in->cur[0]; |
| 5417 if (IS_BLANK_CH(cur)) { | 5507 if (IS_BLANK_CH(cur)) { |
| 5418 htmlParseCharData(ctxt); | 5508 htmlParseCharData(ctxt); |
| 5419 goto done; | 5509 goto done; |
| 5420 } | 5510 } |
| 5421 if (avail < 2) | 5511 if (avail < 2) |
| 5422 goto done; | 5512 goto done; |
| 5423 next = in->cur[1]; | 5513 next = in->cur[1]; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5546 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 5457 ctxt->sax->endDocument(ctxt->userData); | 5547 ctxt->sax->endDocument(ctxt->userData); |
| 5458 goto done; | 5548 goto done; |
| 5459 } | 5549 } |
| 5460 break; | 5550 break; |
| 5461 case XML_PARSER_START_TAG: { | 5551 case XML_PARSER_START_TAG: { |
| 5462 const xmlChar *name; | 5552 const xmlChar *name; |
| 5463 int failed; | 5553 int failed; |
| 5464 const htmlElemDesc * info; | 5554 const htmlElemDesc * info; |
| 5465 | 5555 |
| 5466 » » if (avail < 2) | 5556 » » /* |
| 5557 » » * no chars in buffer |
| 5558 » » */ |
| 5559 » » if (avail < 1) |
| 5467 goto done; | 5560 goto done; |
| 5561 /* |
| 5562 * not enouth chars in buffer |
| 5563 */ |
| 5564 if (avail < 2) { |
| 5565 if (!terminate) |
| 5566 goto done; |
| 5567 else |
| 5568 next = ' '; |
| 5569 } else { |
| 5570 next = in->cur[1]; |
| 5571 } |
| 5468 cur = in->cur[0]; | 5572 cur = in->cur[0]; |
| 5469 if (cur != '<') { | 5573 if (cur != '<') { |
| 5470 ctxt->instate = XML_PARSER_CONTENT; | 5574 ctxt->instate = XML_PARSER_CONTENT; |
| 5471 #ifdef DEBUG_PUSH | 5575 #ifdef DEBUG_PUSH |
| 5472 xmlGenericError(xmlGenericErrorContext, | 5576 xmlGenericError(xmlGenericErrorContext, |
| 5473 "HPP: entering CONTENT\n"); | 5577 "HPP: entering CONTENT\n"); |
| 5474 #endif | 5578 #endif |
| 5475 break; | 5579 break; |
| 5476 } | 5580 } |
| 5477 » » if (in->cur[1] == '/') { | 5581 » » if (next == '/') { |
| 5478 ctxt->instate = XML_PARSER_END_TAG; | 5582 ctxt->instate = XML_PARSER_END_TAG; |
| 5479 ctxt->checkIndex = 0; | 5583 ctxt->checkIndex = 0; |
| 5480 #ifdef DEBUG_PUSH | 5584 #ifdef DEBUG_PUSH |
| 5481 xmlGenericError(xmlGenericErrorContext, | 5585 xmlGenericError(xmlGenericErrorContext, |
| 5482 "HPP: entering END_TAG\n"); | 5586 "HPP: entering END_TAG\n"); |
| 5483 #endif | 5587 #endif |
| 5484 break; | 5588 break; |
| 5485 } | 5589 } |
| 5486 if ((!terminate) && | 5590 if ((!terminate) && |
| 5487 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) | 5591 (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) |
| 5488 goto done; | 5592 goto done; |
| 5489 | 5593 |
| 5594 /* Capture start position */ |
| 5595 if (ctxt->record_info) { |
| 5596 node_info.begin_pos = ctxt->input->consumed + |
| 5597 (CUR_PTR - ctxt->input->base); |
| 5598 node_info.begin_line = ctxt->input->line; |
| 5599 } |
| 5600 |
| 5601 |
| 5490 failed = htmlParseStartTag(ctxt); | 5602 failed = htmlParseStartTag(ctxt); |
| 5491 name = ctxt->name; | 5603 name = ctxt->name; |
| 5492 if ((failed == -1) || | 5604 if ((failed == -1) || |
| 5493 (name == NULL)) { | 5605 (name == NULL)) { |
| 5494 if (CUR == '>') | 5606 if (CUR == '>') |
| 5495 NEXT; | 5607 NEXT; |
| 5496 break; | 5608 break; |
| 5497 } | 5609 } |
| 5498 | 5610 |
| 5499 /* | 5611 /* |
| (...skipping 29 matching lines...) Expand all Loading... |
| 5529 name, NULL); | 5641 name, NULL); |
| 5530 | 5642 |
| 5531 /* | 5643 /* |
| 5532 * end of parsing of this node. | 5644 * end of parsing of this node. |
| 5533 */ | 5645 */ |
| 5534 if (xmlStrEqual(name, ctxt->name)) { | 5646 if (xmlStrEqual(name, ctxt->name)) { |
| 5535 nodePop(ctxt); | 5647 nodePop(ctxt); |
| 5536 htmlnamePop(ctxt); | 5648 htmlnamePop(ctxt); |
| 5537 } | 5649 } |
| 5538 | 5650 |
| 5651 if (ctxt->record_info) |
| 5652 htmlNodeInfoPush(ctxt, &node_info); |
| 5653 |
| 5539 ctxt->instate = XML_PARSER_CONTENT; | 5654 ctxt->instate = XML_PARSER_CONTENT; |
| 5540 #ifdef DEBUG_PUSH | 5655 #ifdef DEBUG_PUSH |
| 5541 xmlGenericError(xmlGenericErrorContext, | 5656 xmlGenericError(xmlGenericErrorContext, |
| 5542 "HPP: entering CONTENT\n"); | 5657 "HPP: entering CONTENT\n"); |
| 5543 #endif | 5658 #endif |
| 5544 break; | 5659 break; |
| 5545 } | 5660 } |
| 5546 | 5661 |
| 5547 /* | 5662 /* |
| 5548 * Check for an Empty Element from DTD definition | 5663 * Check for an Empty Element from DTD definition |
| 5549 */ | 5664 */ |
| 5550 if ((info != NULL) && (info->empty)) { | 5665 if ((info != NULL) && (info->empty)) { |
| 5551 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) | 5666 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) |
| 5552 ctxt->sax->endElement(ctxt->userData, name); | 5667 ctxt->sax->endElement(ctxt->userData, name); |
| 5553 htmlnamePop(ctxt); | 5668 htmlnamePop(ctxt); |
| 5554 } | 5669 } |
| 5670 |
| 5671 if (ctxt->record_info) |
| 5672 htmlNodeInfoPush(ctxt, &node_info); |
| 5673 |
| 5555 ctxt->instate = XML_PARSER_CONTENT; | 5674 ctxt->instate = XML_PARSER_CONTENT; |
| 5556 #ifdef DEBUG_PUSH | 5675 #ifdef DEBUG_PUSH |
| 5557 xmlGenericError(xmlGenericErrorContext, | 5676 xmlGenericError(xmlGenericErrorContext, |
| 5558 "HPP: entering CONTENT\n"); | 5677 "HPP: entering CONTENT\n"); |
| 5559 #endif | 5678 #endif |
| 5560 break; | 5679 break; |
| 5561 } | 5680 } |
| 5562 case XML_PARSER_CONTENT: { | 5681 case XML_PARSER_CONTENT: { |
| 5563 long cons; | 5682 long cons; |
| 5564 /* | 5683 /* |
| 5565 * Handle preparsed entities and charRef | 5684 * Handle preparsed entities and charRef |
| 5566 */ | 5685 */ |
| 5567 if (ctxt->token != 0) { | 5686 if (ctxt->token != 0) { |
| 5568 xmlChar chr[2] = { 0 , 0 } ; | 5687 xmlChar chr[2] = { 0 , 0 } ; |
| 5569 | 5688 |
| 5570 chr[0] = (xmlChar) ctxt->token; | 5689 chr[0] = (xmlChar) ctxt->token; |
| 5571 htmlCheckParagraph(ctxt); | 5690 htmlCheckParagraph(ctxt); |
| 5572 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) | 5691 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) |
| 5573 ctxt->sax->characters(ctxt->userData, chr, 1); | 5692 ctxt->sax->characters(ctxt->userData, chr, 1); |
| 5574 ctxt->token = 0; | 5693 ctxt->token = 0; |
| 5575 ctxt->checkIndex = 0; | 5694 ctxt->checkIndex = 0; |
| 5576 } | 5695 } |
| 5577 if ((avail == 1) && (terminate)) { | 5696 if ((avail == 1) && (terminate)) { |
| 5578 cur = in->cur[0]; | 5697 cur = in->cur[0]; |
| 5579 if ((cur != '<') && (cur != '&')) { | 5698 if ((cur != '<') && (cur != '&')) { |
| 5580 if (ctxt->sax != NULL) { | 5699 if (ctxt->sax != NULL) { |
| 5581 if (IS_BLANK_CH(cur)) { | 5700 if (IS_BLANK_CH(cur)) { |
| 5582 » » » » if (ctxt->sax->ignorableWhitespace != NULL) | 5701 » » » » if (ctxt->keepBlanks) { |
| 5583 » » » » ctxt->sax->ignorableWhitespace( | 5702 » » » » if (ctxt->sax->characters != NULL) |
| 5584 » » » » » ctxt->userData, &cur, 1); | 5703 » » » » » ctxt->sax->characters( |
| 5704 » » » » » » ctxt->userData, &cur, 1); |
| 5705 » » » » } else { |
| 5706 » » » » if (ctxt->sax->ignorableWhitespace != NULL) |
| 5707 » » » » » ctxt->sax->ignorableWhitespace( |
| 5708 » » » » » » ctxt->userData, &cur, 1); |
| 5709 » » » » } |
| 5585 } else { | 5710 } else { |
| 5586 htmlCheckParagraph(ctxt); | 5711 htmlCheckParagraph(ctxt); |
| 5587 if (ctxt->sax->characters != NULL) | 5712 if (ctxt->sax->characters != NULL) |
| 5588 ctxt->sax->characters( | 5713 ctxt->sax->characters( |
| 5589 ctxt->userData, &cur, 1); | 5714 ctxt->userData, &cur, 1); |
| 5590 } | 5715 } |
| 5591 } | 5716 } |
| 5592 ctxt->token = 0; | 5717 ctxt->token = 0; |
| 5593 ctxt->checkIndex = 0; | 5718 ctxt->checkIndex = 0; |
| 5594 in->cur++; | 5719 in->cur++; |
| 5595 break; | 5720 break; |
| 5596 } | 5721 } |
| 5597 } | 5722 } |
| 5598 if (avail < 2) | 5723 if (avail < 2) |
| 5599 goto done; | 5724 goto done; |
| 5600 cur = in->cur[0]; | 5725 cur = in->cur[0]; |
| 5601 next = in->cur[1]; | 5726 next = in->cur[1]; |
| 5602 cons = ctxt->nbChars; | 5727 cons = ctxt->nbChars; |
| 5603 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || | 5728 if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || |
| 5604 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { | 5729 (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { |
| 5605 /* | 5730 /* |
| 5606 * Handle SCRIPT/STYLE separately | 5731 * Handle SCRIPT/STYLE separately |
| 5607 */ | 5732 */ |
| 5608 if (!terminate) { | 5733 if (!terminate) { |
| 5609 int idx; | 5734 int idx; |
| 5610 xmlChar val; | 5735 xmlChar val; |
| 5611 | 5736 |
| 5612 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 1); | 5737 » » » idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0); |
| 5613 if (idx < 0) | 5738 if (idx < 0) |
| 5614 goto done; | 5739 goto done; |
| 5615 val = in->cur[idx + 2]; | 5740 val = in->cur[idx + 2]; |
| 5616 if (val == 0) /* bad cut of input */ | 5741 if (val == 0) /* bad cut of input */ |
| 5617 goto done; | 5742 goto done; |
| 5618 } | 5743 } |
| 5619 htmlParseScript(ctxt); | 5744 htmlParseScript(ctxt); |
| 5620 if ((cur == '<') && (next == '/')) { | 5745 if ((cur == '<') && (next == '/')) { |
| 5621 ctxt->instate = XML_PARSER_END_TAG; | 5746 ctxt->instate = XML_PARSER_END_TAG; |
| 5622 ctxt->checkIndex = 0; | 5747 ctxt->checkIndex = 0; |
| (...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5859 htmlAutoCloseOnEnd(ctxt); | 5984 htmlAutoCloseOnEnd(ctxt); |
| 5860 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { | 5985 if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { |
| 5861 /* | 5986 /* |
| 5862 * SAX: end of the document processing. | 5987 * SAX: end of the document processing. |
| 5863 */ | 5988 */ |
| 5864 ctxt->instate = XML_PARSER_EOF; | 5989 ctxt->instate = XML_PARSER_EOF; |
| 5865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) | 5990 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) |
| 5866 ctxt->sax->endDocument(ctxt->userData); | 5991 ctxt->sax->endDocument(ctxt->userData); |
| 5867 } | 5992 } |
| 5868 } | 5993 } |
| 5869 if ((ctxt->myDoc != NULL) && | 5994 if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) && |
| 5870 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || | 5995 ((terminate) || (ctxt->instate == XML_PARSER_EOF) || |
| 5871 (ctxt->instate == XML_PARSER_EPILOG))) { | 5996 (ctxt->instate == XML_PARSER_EPILOG))) { |
| 5872 xmlDtdPtr dtd; | 5997 xmlDtdPtr dtd; |
| 5873 dtd = xmlGetIntSubset(ctxt->myDoc); | 5998 dtd = xmlGetIntSubset(ctxt->myDoc); |
| 5874 if (dtd == NULL) | 5999 if (dtd == NULL) |
| 5875 ctxt->myDoc->intSubset = | 6000 ctxt->myDoc->intSubset = |
| 5876 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", | 6001 xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", |
| 5877 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", | 6002 BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", |
| 5878 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); | 6003 BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); |
| 5879 } | 6004 } |
| (...skipping 17 matching lines...) Expand all Loading... |
| 5897 int | 6022 int |
| 5898 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, | 6023 htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, |
| 5899 int terminate) { | 6024 int terminate) { |
| 5900 if ((ctxt == NULL) || (ctxt->input == NULL)) { | 6025 if ((ctxt == NULL) || (ctxt->input == NULL)) { |
| 5901 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, | 6026 htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| 5902 "htmlParseChunk: context error\n", NULL, NULL); | 6027 "htmlParseChunk: context error\n", NULL, NULL); |
| 5903 return(XML_ERR_INTERNAL_ERROR); | 6028 return(XML_ERR_INTERNAL_ERROR); |
| 5904 } | 6029 } |
| 5905 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6030 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| 5906 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { | 6031 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) { |
| 5907 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6032 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
| 5908 » int cur = ctxt->input->cur - ctxt->input->base; | 6033 » size_t cur = ctxt->input->cur - ctxt->input->base; |
| 5909 int res; | 6034 int res; |
| 5910 | 6035 |
| 5911 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); | 6036 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
| 5912 if (res < 0) { | 6037 if (res < 0) { |
| 5913 ctxt->errNo = XML_PARSER_EOF; | 6038 ctxt->errNo = XML_PARSER_EOF; |
| 5914 ctxt->disableSAX = 1; | 6039 ctxt->disableSAX = 1; |
| 5915 return (XML_PARSER_EOF); | 6040 return (XML_PARSER_EOF); |
| 5916 } | 6041 } |
| 5917 » ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6042 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
| 5918 » ctxt->input->cur = ctxt->input->base + cur; | |
| 5919 » ctxt->input->end = | |
| 5920 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
| 5921 #ifdef DEBUG_PUSH | 6043 #ifdef DEBUG_PUSH |
| 5922 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6044 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| 5923 #endif | 6045 #endif |
| 5924 | 6046 |
| 5925 #if 0 | 6047 #if 0 |
| 5926 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) | 6048 if ((terminate) || (ctxt->input->buf->buffer->use > 80)) |
| 5927 htmlParseTryOrFinish(ctxt, terminate); | 6049 htmlParseTryOrFinish(ctxt, terminate); |
| 5928 #endif | 6050 #endif |
| 5929 } else if (ctxt->instate != XML_PARSER_EOF) { | 6051 } else if (ctxt->instate != XML_PARSER_EOF) { |
| 5930 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { | 6052 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) { |
| 5931 xmlParserInputBufferPtr in = ctxt->input->buf; | 6053 xmlParserInputBufferPtr in = ctxt->input->buf; |
| 5932 if ((in->encoder != NULL) && (in->buffer != NULL) && | 6054 if ((in->encoder != NULL) && (in->buffer != NULL) && |
| 5933 (in->raw != NULL)) { | 6055 (in->raw != NULL)) { |
| 5934 int nbchars; | 6056 int nbchars; |
| 6057 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input); |
| 6058 size_t current = ctxt->input->cur - ctxt->input->base; |
| 5935 | 6059 |
| 5936 » » nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw); | 6060 » » nbchars = xmlCharEncInput(in, terminate); |
| 5937 if (nbchars < 0) { | 6061 if (nbchars < 0) { |
| 5938 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, | 6062 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, |
| 5939 "encoder error\n", NULL, NULL); | 6063 "encoder error\n", NULL, NULL); |
| 5940 return(XML_ERR_INVALID_ENCODING); | 6064 return(XML_ERR_INVALID_ENCODING); |
| 5941 } | 6065 } |
| 6066 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); |
| 5942 } | 6067 } |
| 5943 } | 6068 } |
| 5944 } | 6069 } |
| 5945 htmlParseTryOrFinish(ctxt, terminate); | 6070 htmlParseTryOrFinish(ctxt, terminate); |
| 5946 if (terminate) { | 6071 if (terminate) { |
| 5947 if ((ctxt->instate != XML_PARSER_EOF) && | 6072 if ((ctxt->instate != XML_PARSER_EOF) && |
| 5948 (ctxt->instate != XML_PARSER_EPILOG) && | 6073 (ctxt->instate != XML_PARSER_EPILOG) && |
| 5949 (ctxt->instate != XML_PARSER_MISC)) { | 6074 (ctxt->instate != XML_PARSER_MISC)) { |
| 5950 ctxt->errNo = XML_ERR_DOCUMENT_END; | 6075 ctxt->errNo = XML_ERR_DOCUMENT_END; |
| 5951 ctxt->wellFormed = 0; | 6076 ctxt->wellFormed = 0; |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6025 xmlFree(buf); | 6150 xmlFree(buf); |
| 6026 return(NULL); | 6151 return(NULL); |
| 6027 } | 6152 } |
| 6028 | 6153 |
| 6029 if (filename == NULL) | 6154 if (filename == NULL) |
| 6030 inputStream->filename = NULL; | 6155 inputStream->filename = NULL; |
| 6031 else | 6156 else |
| 6032 inputStream->filename = (char *) | 6157 inputStream->filename = (char *) |
| 6033 xmlCanonicPath((const xmlChar *) filename); | 6158 xmlCanonicPath((const xmlChar *) filename); |
| 6034 inputStream->buf = buf; | 6159 inputStream->buf = buf; |
| 6035 inputStream->base = inputStream->buf->buffer->content; | 6160 xmlBufResetInput(buf->buffer, inputStream); |
| 6036 inputStream->cur = inputStream->buf->buffer->content; | |
| 6037 inputStream->end = | |
| 6038 » &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; | |
| 6039 | 6161 |
| 6040 inputPush(ctxt, inputStream); | 6162 inputPush(ctxt, inputStream); |
| 6041 | 6163 |
| 6042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && | 6164 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && |
| 6043 (ctxt->input->buf != NULL)) { | 6165 (ctxt->input->buf != NULL)) { |
| 6044 » int base = ctxt->input->base - ctxt->input->buf->buffer->content; | 6166 » size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input); |
| 6045 » int cur = ctxt->input->cur - ctxt->input->base; | 6167 » size_t cur = ctxt->input->cur - ctxt->input->base; |
| 6046 | 6168 |
| 6047 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); | 6169 xmlParserInputBufferPush(ctxt->input->buf, size, chunk); |
| 6048 | 6170 |
| 6049 » ctxt->input->base = ctxt->input->buf->buffer->content + base; | 6171 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); |
| 6050 » ctxt->input->cur = ctxt->input->base + cur; | |
| 6051 » ctxt->input->end = | |
| 6052 » &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use]; | |
| 6053 #ifdef DEBUG_PUSH | 6172 #ifdef DEBUG_PUSH |
| 6054 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); | 6173 xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); |
| 6055 #endif | 6174 #endif |
| 6056 } | 6175 } |
| 6057 ctxt->progressive = 1; | 6176 ctxt->progressive = 1; |
| 6058 | 6177 |
| 6059 return(ctxt); | 6178 return(ctxt); |
| 6060 } | 6179 } |
| 6061 #endif /* LIBXML_PUSH_ENABLED */ | 6180 #endif /* LIBXML_PUSH_ENABLED */ |
| 6062 | 6181 |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6162 xmlFree(canonicFilename); | 6281 xmlFree(canonicFilename); |
| 6163 if (inputStream == NULL) { | 6282 if (inputStream == NULL) { |
| 6164 xmlFreeParserCtxt(ctxt); | 6283 xmlFreeParserCtxt(ctxt); |
| 6165 return(NULL); | 6284 return(NULL); |
| 6166 } | 6285 } |
| 6167 | 6286 |
| 6168 inputPush(ctxt, inputStream); | 6287 inputPush(ctxt, inputStream); |
| 6169 | 6288 |
| 6170 /* set encoding */ | 6289 /* set encoding */ |
| 6171 if (encoding) { | 6290 if (encoding) { |
| 6172 content = xmlMallocAtomic (xmlStrlen(content_line) + strlen(encoding) +
1); | 6291 size_t l = strlen(encoding); |
| 6173 » if (content) { | 6292 |
| 6174 » strcpy ((char *)content, (char *)content_line); | 6293 » if (l < 1000) { |
| 6175 strcat ((char *)content, (char *)encoding); | 6294 » content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1); |
| 6176 htmlCheckEncoding (ctxt, content); | 6295 » if (content) { |
| 6177 » xmlFree (content); | 6296 » » strcpy ((char *)content, (char *)content_line); |
| 6297 » » strcat ((char *)content, (char *)encoding); |
| 6298 » » htmlCheckEncoding (ctxt, content); |
| 6299 » » xmlFree (content); |
| 6300 » } |
| 6178 } | 6301 } |
| 6179 } | 6302 } |
| 6180 | 6303 |
| 6181 return(ctxt); | 6304 return(ctxt); |
| 6182 } | 6305 } |
| 6183 | 6306 |
| 6184 /** | 6307 /** |
| 6185 * htmlSAXParseFile: | 6308 * htmlSAXParseFile: |
| 6186 * @filename: the filename | 6309 * @filename: the filename |
| 6187 * @encoding: a free form C string describing the HTML document encoding, or NU
LL | 6310 * @encoding: a free form C string describing the HTML document encoding, or NU
LL |
| (...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6444 ctxt->standalone = -1; | 6567 ctxt->standalone = -1; |
| 6445 ctxt->hasExternalSubset = 0; | 6568 ctxt->hasExternalSubset = 0; |
| 6446 ctxt->hasPErefs = 0; | 6569 ctxt->hasPErefs = 0; |
| 6447 ctxt->html = 1; | 6570 ctxt->html = 1; |
| 6448 ctxt->external = 0; | 6571 ctxt->external = 0; |
| 6449 ctxt->instate = XML_PARSER_START; | 6572 ctxt->instate = XML_PARSER_START; |
| 6450 ctxt->token = 0; | 6573 ctxt->token = 0; |
| 6451 | 6574 |
| 6452 ctxt->wellFormed = 1; | 6575 ctxt->wellFormed = 1; |
| 6453 ctxt->nsWellFormed = 1; | 6576 ctxt->nsWellFormed = 1; |
| 6577 ctxt->disableSAX = 0; |
| 6454 ctxt->valid = 1; | 6578 ctxt->valid = 1; |
| 6455 ctxt->vctxt.userData = ctxt; | 6579 ctxt->vctxt.userData = ctxt; |
| 6456 ctxt->vctxt.error = xmlParserValidityError; | 6580 ctxt->vctxt.error = xmlParserValidityError; |
| 6457 ctxt->vctxt.warning = xmlParserValidityWarning; | 6581 ctxt->vctxt.warning = xmlParserValidityWarning; |
| 6458 ctxt->record_info = 0; | 6582 ctxt->record_info = 0; |
| 6459 ctxt->nbChars = 0; | 6583 ctxt->nbChars = 0; |
| 6460 ctxt->checkIndex = 0; | 6584 ctxt->checkIndex = 0; |
| 6461 ctxt->inSubset = 0; | 6585 ctxt->inSubset = 0; |
| 6462 ctxt->errNo = XML_ERR_OK; | 6586 ctxt->errNo = XML_ERR_OK; |
| 6463 ctxt->depth = 0; | 6587 ctxt->depth = 0; |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6523 } else | 6647 } else |
| 6524 ctxt->recovery = 0; | 6648 ctxt->recovery = 0; |
| 6525 if (options & HTML_PARSE_COMPACT) { | 6649 if (options & HTML_PARSE_COMPACT) { |
| 6526 ctxt->options |= HTML_PARSE_COMPACT; | 6650 ctxt->options |= HTML_PARSE_COMPACT; |
| 6527 options -= HTML_PARSE_COMPACT; | 6651 options -= HTML_PARSE_COMPACT; |
| 6528 } | 6652 } |
| 6529 if (options & XML_PARSE_HUGE) { | 6653 if (options & XML_PARSE_HUGE) { |
| 6530 ctxt->options |= XML_PARSE_HUGE; | 6654 ctxt->options |= XML_PARSE_HUGE; |
| 6531 options -= XML_PARSE_HUGE; | 6655 options -= XML_PARSE_HUGE; |
| 6532 } | 6656 } |
| 6657 if (options & HTML_PARSE_NODEFDTD) { |
| 6658 ctxt->options |= HTML_PARSE_NODEFDTD; |
| 6659 options -= HTML_PARSE_NODEFDTD; |
| 6660 } |
| 6661 if (options & HTML_PARSE_IGNORE_ENC) { |
| 6662 ctxt->options |= HTML_PARSE_IGNORE_ENC; |
| 6663 options -= HTML_PARSE_IGNORE_ENC; |
| 6664 } |
| 6665 if (options & HTML_PARSE_NOIMPLIED) { |
| 6666 ctxt->options |= HTML_PARSE_NOIMPLIED; |
| 6667 options -= HTML_PARSE_NOIMPLIED; |
| 6668 } |
| 6533 ctxt->dictNames = 0; | 6669 ctxt->dictNames = 0; |
| 6534 return (options); | 6670 return (options); |
| 6535 } | 6671 } |
| 6536 | 6672 |
| 6537 /** | 6673 /** |
| 6538 * htmlDoRead: | 6674 * htmlDoRead: |
| 6539 * @ctxt: an HTML parser context | 6675 * @ctxt: an HTML parser context |
| 6540 * @URL: the base URL to use for the document | 6676 * @URL: the base URL to use for the document |
| 6541 * @encoding: the document encoding, or NULL | 6677 * @encoding: the document encoding, or NULL |
| 6542 * @options: a combination of htmlParserOption(s) | 6678 * @options: a combination of htmlParserOption(s) |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6669 */ | 6805 */ |
| 6670 htmlDocPtr | 6806 htmlDocPtr |
| 6671 htmlReadFd(int fd, const char *URL, const char *encoding, int options) | 6807 htmlReadFd(int fd, const char *URL, const char *encoding, int options) |
| 6672 { | 6808 { |
| 6673 htmlParserCtxtPtr ctxt; | 6809 htmlParserCtxtPtr ctxt; |
| 6674 xmlParserInputBufferPtr input; | 6810 xmlParserInputBufferPtr input; |
| 6675 xmlParserInputPtr stream; | 6811 xmlParserInputPtr stream; |
| 6676 | 6812 |
| 6677 if (fd < 0) | 6813 if (fd < 0) |
| 6678 return (NULL); | 6814 return (NULL); |
| 6815 xmlInitParser(); |
| 6679 | 6816 |
| 6680 xmlInitParser(); | 6817 xmlInitParser(); |
| 6681 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); | 6818 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); |
| 6682 if (input == NULL) | 6819 if (input == NULL) |
| 6683 return (NULL); | 6820 return (NULL); |
| 6684 ctxt = xmlNewParserCtxt(); | 6821 ctxt = xmlNewParserCtxt(); |
| 6685 if (ctxt == NULL) { | 6822 if (ctxt == NULL) { |
| 6686 xmlFreeParserInputBuffer(input); | 6823 xmlFreeParserInputBuffer(input); |
| 6687 return (NULL); | 6824 return (NULL); |
| 6688 } | 6825 } |
| (...skipping 27 matching lines...) Expand all Loading... |
| 6716 htmlParserCtxtPtr ctxt; | 6853 htmlParserCtxtPtr ctxt; |
| 6717 xmlParserInputBufferPtr input; | 6854 xmlParserInputBufferPtr input; |
| 6718 xmlParserInputPtr stream; | 6855 xmlParserInputPtr stream; |
| 6719 | 6856 |
| 6720 if (ioread == NULL) | 6857 if (ioread == NULL) |
| 6721 return (NULL); | 6858 return (NULL); |
| 6722 xmlInitParser(); | 6859 xmlInitParser(); |
| 6723 | 6860 |
| 6724 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, | 6861 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
| 6725 XML_CHAR_ENCODING_NONE); | 6862 XML_CHAR_ENCODING_NONE); |
| 6726 if (input == NULL) | 6863 if (input == NULL) { |
| 6864 if (ioclose != NULL) |
| 6865 ioclose(ioctx); |
| 6727 return (NULL); | 6866 return (NULL); |
| 6867 } |
| 6728 ctxt = htmlNewParserCtxt(); | 6868 ctxt = htmlNewParserCtxt(); |
| 6729 if (ctxt == NULL) { | 6869 if (ctxt == NULL) { |
| 6730 xmlFreeParserInputBuffer(input); | 6870 xmlFreeParserInputBuffer(input); |
| 6731 return (NULL); | 6871 return (NULL); |
| 6732 } | 6872 } |
| 6733 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 6873 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
| 6734 if (stream == NULL) { | 6874 if (stream == NULL) { |
| 6735 xmlFreeParserInputBuffer(input); | 6875 xmlFreeParserInputBuffer(input); |
| 6736 xmlFreeParserCtxt(ctxt); | 6876 xmlFreeParserCtxt(ctxt); |
| 6737 return (NULL); | 6877 return (NULL); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 6756 htmlDocPtr | 6896 htmlDocPtr |
| 6757 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, | 6897 htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, |
| 6758 const char *URL, const char *encoding, int options) | 6898 const char *URL, const char *encoding, int options) |
| 6759 { | 6899 { |
| 6760 xmlParserInputPtr stream; | 6900 xmlParserInputPtr stream; |
| 6761 | 6901 |
| 6762 if (cur == NULL) | 6902 if (cur == NULL) |
| 6763 return (NULL); | 6903 return (NULL); |
| 6764 if (ctxt == NULL) | 6904 if (ctxt == NULL) |
| 6765 return (NULL); | 6905 return (NULL); |
| 6906 xmlInitParser(); |
| 6766 | 6907 |
| 6767 htmlCtxtReset(ctxt); | 6908 htmlCtxtReset(ctxt); |
| 6768 | 6909 |
| 6769 stream = xmlNewStringInputStream(ctxt, cur); | 6910 stream = xmlNewStringInputStream(ctxt, cur); |
| 6770 if (stream == NULL) { | 6911 if (stream == NULL) { |
| 6771 return (NULL); | 6912 return (NULL); |
| 6772 } | 6913 } |
| 6773 inputPush(ctxt, stream); | 6914 inputPush(ctxt, stream); |
| 6774 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 6915 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
| 6775 } | 6916 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 6789 htmlDocPtr | 6930 htmlDocPtr |
| 6790 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, | 6931 htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename, |
| 6791 const char *encoding, int options) | 6932 const char *encoding, int options) |
| 6792 { | 6933 { |
| 6793 xmlParserInputPtr stream; | 6934 xmlParserInputPtr stream; |
| 6794 | 6935 |
| 6795 if (filename == NULL) | 6936 if (filename == NULL) |
| 6796 return (NULL); | 6937 return (NULL); |
| 6797 if (ctxt == NULL) | 6938 if (ctxt == NULL) |
| 6798 return (NULL); | 6939 return (NULL); |
| 6940 xmlInitParser(); |
| 6799 | 6941 |
| 6800 htmlCtxtReset(ctxt); | 6942 htmlCtxtReset(ctxt); |
| 6801 | 6943 |
| 6802 stream = xmlLoadExternalEntity(filename, NULL, ctxt); | 6944 stream = xmlLoadExternalEntity(filename, NULL, ctxt); |
| 6803 if (stream == NULL) { | 6945 if (stream == NULL) { |
| 6804 return (NULL); | 6946 return (NULL); |
| 6805 } | 6947 } |
| 6806 inputPush(ctxt, stream); | 6948 inputPush(ctxt, stream); |
| 6807 return (htmlDoRead(ctxt, NULL, encoding, options, 1)); | 6949 return (htmlDoRead(ctxt, NULL, encoding, options, 1)); |
| 6808 } | 6950 } |
| (...skipping 16 matching lines...) Expand all Loading... |
| 6825 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, | 6967 htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size, |
| 6826 const char *URL, const char *encoding, int options) | 6968 const char *URL, const char *encoding, int options) |
| 6827 { | 6969 { |
| 6828 xmlParserInputBufferPtr input; | 6970 xmlParserInputBufferPtr input; |
| 6829 xmlParserInputPtr stream; | 6971 xmlParserInputPtr stream; |
| 6830 | 6972 |
| 6831 if (ctxt == NULL) | 6973 if (ctxt == NULL) |
| 6832 return (NULL); | 6974 return (NULL); |
| 6833 if (buffer == NULL) | 6975 if (buffer == NULL) |
| 6834 return (NULL); | 6976 return (NULL); |
| 6977 xmlInitParser(); |
| 6835 | 6978 |
| 6836 htmlCtxtReset(ctxt); | 6979 htmlCtxtReset(ctxt); |
| 6837 | 6980 |
| 6838 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); | 6981 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); |
| 6839 if (input == NULL) { | 6982 if (input == NULL) { |
| 6840 return(NULL); | 6983 return(NULL); |
| 6841 } | 6984 } |
| 6842 | 6985 |
| 6843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 6986 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
| 6844 if (stream == NULL) { | 6987 if (stream == NULL) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 6867 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, | 7010 htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd, |
| 6868 const char *URL, const char *encoding, int options) | 7011 const char *URL, const char *encoding, int options) |
| 6869 { | 7012 { |
| 6870 xmlParserInputBufferPtr input; | 7013 xmlParserInputBufferPtr input; |
| 6871 xmlParserInputPtr stream; | 7014 xmlParserInputPtr stream; |
| 6872 | 7015 |
| 6873 if (fd < 0) | 7016 if (fd < 0) |
| 6874 return (NULL); | 7017 return (NULL); |
| 6875 if (ctxt == NULL) | 7018 if (ctxt == NULL) |
| 6876 return (NULL); | 7019 return (NULL); |
| 7020 xmlInitParser(); |
| 6877 | 7021 |
| 6878 htmlCtxtReset(ctxt); | 7022 htmlCtxtReset(ctxt); |
| 6879 | 7023 |
| 6880 | 7024 |
| 6881 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); | 7025 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); |
| 6882 if (input == NULL) | 7026 if (input == NULL) |
| 6883 return (NULL); | 7027 return (NULL); |
| 6884 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 7028 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
| 6885 if (stream == NULL) { | 7029 if (stream == NULL) { |
| 6886 xmlFreeParserInputBuffer(input); | 7030 xmlFreeParserInputBuffer(input); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 6911 const char *URL, | 7055 const char *URL, |
| 6912 const char *encoding, int options) | 7056 const char *encoding, int options) |
| 6913 { | 7057 { |
| 6914 xmlParserInputBufferPtr input; | 7058 xmlParserInputBufferPtr input; |
| 6915 xmlParserInputPtr stream; | 7059 xmlParserInputPtr stream; |
| 6916 | 7060 |
| 6917 if (ioread == NULL) | 7061 if (ioread == NULL) |
| 6918 return (NULL); | 7062 return (NULL); |
| 6919 if (ctxt == NULL) | 7063 if (ctxt == NULL) |
| 6920 return (NULL); | 7064 return (NULL); |
| 7065 xmlInitParser(); |
| 6921 | 7066 |
| 6922 htmlCtxtReset(ctxt); | 7067 htmlCtxtReset(ctxt); |
| 6923 | 7068 |
| 6924 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, | 7069 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, |
| 6925 XML_CHAR_ENCODING_NONE); | 7070 XML_CHAR_ENCODING_NONE); |
| 6926 if (input == NULL) | 7071 if (input == NULL) { |
| 7072 if (ioclose != NULL) |
| 7073 ioclose(ioctx); |
| 6927 return (NULL); | 7074 return (NULL); |
| 7075 } |
| 6928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); | 7076 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); |
| 6929 if (stream == NULL) { | 7077 if (stream == NULL) { |
| 6930 xmlFreeParserInputBuffer(input); | 7078 xmlFreeParserInputBuffer(input); |
| 6931 return (NULL); | 7079 return (NULL); |
| 6932 } | 7080 } |
| 6933 inputPush(ctxt, stream); | 7081 inputPush(ctxt, stream); |
| 6934 return (htmlDoRead(ctxt, URL, encoding, options, 1)); | 7082 return (htmlDoRead(ctxt, URL, encoding, options, 1)); |
| 6935 } | 7083 } |
| 6936 | 7084 |
| 6937 #define bottom_HTMLparser | 7085 #define bottom_HTMLparser |
| 6938 #include "elfgcchack.h" | 7086 #include "elfgcchack.h" |
| 6939 #endif /* LIBXML_HTML_ENABLED */ | 7087 #endif /* LIBXML_HTML_ENABLED */ |
| OLD | NEW |