OLD | NEW |
| (Empty) |
1 /** | |
2 * markupsafe._speedups | |
3 * ~~~~~~~~~~~~~~~~~~~~ | |
4 * | |
5 * This module implements functions for automatic escaping in C for better | |
6 * performance. | |
7 * | |
8 * :copyright: (c) 2010 by Armin Ronacher. | |
9 * :license: BSD. | |
10 */ | |
11 | |
12 #include <Python.h> | |
13 | |
14 #define ESCAPED_CHARS_TABLE_SIZE 63 | |
15 #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(
x, strlen(x), NULL))); | |
16 | |
17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) | |
18 typedef int Py_ssize_t; | |
19 #define PY_SSIZE_T_MAX INT_MAX | |
20 #define PY_SSIZE_T_MIN INT_MIN | |
21 #endif | |
22 | |
23 | |
24 static PyObject* markup; | |
25 static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; | |
26 static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; | |
27 | |
28 static int | |
29 init_constants(void) | |
30 { | |
31 PyObject *module; | |
32 /* happing of characters to replace */ | |
33 escaped_chars_repl['"'] = UNICHR("""); | |
34 escaped_chars_repl['\''] = UNICHR("'"); | |
35 escaped_chars_repl['&'] = UNICHR("&"); | |
36 escaped_chars_repl['<'] = UNICHR("<"); | |
37 escaped_chars_repl['>'] = UNICHR(">"); | |
38 | |
39 /* lengths of those characters when replaced - 1 */ | |
40 memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); | |
41 escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ | |
42 escaped_chars_delta_len['&'] = 4; | |
43 escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; | |
44 | |
45 /* import markup type so that we can mark the return value */ | |
46 module = PyImport_ImportModule("markupsafe"); | |
47 if (!module) | |
48 return 0; | |
49 markup = PyObject_GetAttrString(module, "Markup"); | |
50 Py_DECREF(module); | |
51 | |
52 return 1; | |
53 } | |
54 | |
55 static PyObject* | |
56 escape_unicode(PyUnicodeObject *in) | |
57 { | |
58 PyUnicodeObject *out; | |
59 Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); | |
60 const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZ
E(in); | |
61 Py_UNICODE *next_escp; | |
62 Py_UNICODE *outp; | |
63 Py_ssize_t delta=0, erepl=0, delta_len=0; | |
64 | |
65 /* First we need to figure out how long the escaped string will be */ | |
66 while (*(inp) || inp < inp_end) { | |
67 if (*inp < ESCAPED_CHARS_TABLE_SIZE) { | |
68 delta += escaped_chars_delta_len[*inp]; | |
69 erepl += !!escaped_chars_delta_len[*inp]; | |
70 } | |
71 ++inp; | |
72 } | |
73 | |
74 /* Do we need to escape anything at all? */ | |
75 if (!erepl) { | |
76 Py_INCREF(in); | |
77 return (PyObject*)in; | |
78 } | |
79 | |
80 out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(i
n) + delta); | |
81 if (!out) | |
82 return NULL; | |
83 | |
84 outp = PyUnicode_AS_UNICODE(out); | |
85 inp = PyUnicode_AS_UNICODE(in); | |
86 while (erepl-- > 0) { | |
87 /* look for the next substitution */ | |
88 next_escp = inp; | |
89 while (next_escp < inp_end) { | |
90 if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && | |
91 (delta_len = escaped_chars_delta_len[*next_escp])) { | |
92 ++delta_len; | |
93 break; | |
94 } | |
95 ++next_escp; | |
96 } | |
97 | |
98 if (next_escp > inp) { | |
99 /* copy unescaped chars between inp and next_escp */ | |
100 Py_UNICODE_COPY(outp, inp, next_escp-inp); | |
101 outp += next_escp - inp; | |
102 } | |
103 | |
104 /* escape 'next_escp' */ | |
105 Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len)
; | |
106 outp += delta_len; | |
107 | |
108 inp = next_escp + 1; | |
109 } | |
110 if (inp < inp_end) | |
111 Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUni
code_AS_UNICODE(in))); | |
112 | |
113 return (PyObject*)out; | |
114 } | |
115 | |
116 | |
117 static PyObject* | |
118 escape(PyObject *self, PyObject *text) | |
119 { | |
120 PyObject *s = NULL, *rv = NULL, *html; | |
121 | |
122 /* we don't have to escape integers, bools or floats */ | |
123 if (PyLong_CheckExact(text) || | |
124 #if PY_MAJOR_VERSION < 3 | |
125 PyInt_CheckExact(text) || | |
126 #endif | |
127 PyFloat_CheckExact(text) || PyBool_Check(text) || | |
128 text == Py_None) | |
129 return PyObject_CallFunctionObjArgs(markup, text, NULL); | |
130 | |
131 /* if the object has an __html__ method that performs the escaping */ | |
132 html = PyObject_GetAttrString(text, "__html__"); | |
133 if (html) { | |
134 rv = PyObject_CallObject(html, NULL); | |
135 Py_DECREF(html); | |
136 return rv; | |
137 } | |
138 | |
139 /* otherwise make the object unicode if it isn't, then escape */ | |
140 PyErr_Clear(); | |
141 if (!PyUnicode_Check(text)) { | |
142 #if PY_MAJOR_VERSION < 3 | |
143 PyObject *unicode = PyObject_Unicode(text); | |
144 #else | |
145 PyObject *unicode = PyObject_Str(text); | |
146 #endif | |
147 if (!unicode) | |
148 return NULL; | |
149 s = escape_unicode((PyUnicodeObject*)unicode); | |
150 Py_DECREF(unicode); | |
151 } | |
152 else | |
153 s = escape_unicode((PyUnicodeObject*)text); | |
154 | |
155 /* convert the unicode string into a markup object. */ | |
156 rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); | |
157 Py_DECREF(s); | |
158 return rv; | |
159 } | |
160 | |
161 | |
162 static PyObject* | |
163 escape_silent(PyObject *self, PyObject *text) | |
164 { | |
165 if (text != Py_None) | |
166 return escape(self, text); | |
167 return PyObject_CallFunctionObjArgs(markup, NULL); | |
168 } | |
169 | |
170 | |
171 static PyObject* | |
172 soft_unicode(PyObject *self, PyObject *s) | |
173 { | |
174 if (!PyUnicode_Check(s)) | |
175 #if PY_MAJOR_VERSION < 3 | |
176 return PyObject_Unicode(s); | |
177 #else | |
178 return PyObject_Str(s); | |
179 #endif | |
180 Py_INCREF(s); | |
181 return s; | |
182 } | |
183 | |
184 | |
185 static PyMethodDef module_methods[] = { | |
186 {"escape", (PyCFunction)escape, METH_O, | |
187 "escape(s) -> markup\n\n" | |
188 "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n" | |
189 "sequences. Use this if you need to display text that might contain\n" | |
190 "such characters in HTML. Marks return value as markup string."}, | |
191 {"escape_silent", (PyCFunction)escape_silent, METH_O, | |
192 "escape_silent(s) -> markup\n\n" | |
193 "Like escape but converts None to an empty string."}, | |
194 {"soft_unicode", (PyCFunction)soft_unicode, METH_O, | |
195 "soft_unicode(object) -> string\n\n" | |
196 "Make a string unicode if it isn't already. That way a markup\n" | |
197 "string is not converted back to unicode."}, | |
198 {NULL, NULL, 0, NULL} /* Sentinel */ | |
199 }; | |
200 | |
201 | |
202 #if PY_MAJOR_VERSION < 3 | |
203 | |
204 #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ | |
205 #define PyMODINIT_FUNC void | |
206 #endif | |
207 PyMODINIT_FUNC | |
208 init_speedups(void) | |
209 { | |
210 if (!init_constants()) | |
211 return; | |
212 | |
213 Py_InitModule3("markupsafe._speedups", module_methods, ""); | |
214 } | |
215 | |
216 #else /* Python 3.x module initialization */ | |
217 | |
218 static struct PyModuleDef module_definition = { | |
219 PyModuleDef_HEAD_INIT, | |
220 "markupsafe._speedups", | |
221 NULL, | |
222 -1, | |
223 module_methods, | |
224 NULL, | |
225 NULL, | |
226 NULL, | |
227 NULL | |
228 }; | |
229 | |
230 PyMODINIT_FUNC | |
231 PyInit__speedups(void) | |
232 { | |
233 if (!init_constants()) | |
234 return NULL; | |
235 | |
236 return PyModule_Create(&module_definition); | |
237 } | |
238 | |
239 #endif | |
OLD | NEW |