OLD | NEW |
(Empty) | |
| 1 /** |
| 2 * markupsafe._speedups |
| 3 * ~~~~~~~~~~~~~~~~~~~~ |
| 4 * |
| 5 * This module implements functions for automatic escaping in C for better |
| 6 * performance. |
| 7 * |
| 8 * :copyright: (c) 2010 by Armin Ronacher. |
| 9 * :license: BSD. |
| 10 */ |
| 11 |
| 12 #include <Python.h> |
| 13 |
| 14 #define ESCAPED_CHARS_TABLE_SIZE 63 |
| 15 #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(
x, strlen(x), NULL))); |
| 16 |
| 17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) |
| 18 typedef int Py_ssize_t; |
| 19 #define PY_SSIZE_T_MAX INT_MAX |
| 20 #define PY_SSIZE_T_MIN INT_MIN |
| 21 #endif |
| 22 |
| 23 |
| 24 static PyObject* markup; |
| 25 static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; |
| 26 static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; |
| 27 |
| 28 static int |
| 29 init_constants(void) |
| 30 { |
| 31 PyObject *module; |
| 32 /* happing of characters to replace */ |
| 33 escaped_chars_repl['"'] = UNICHR("""); |
| 34 escaped_chars_repl['\''] = UNICHR("'"); |
| 35 escaped_chars_repl['&'] = UNICHR("&"); |
| 36 escaped_chars_repl['<'] = UNICHR("<"); |
| 37 escaped_chars_repl['>'] = UNICHR(">"); |
| 38 |
| 39 /* lengths of those characters when replaced - 1 */ |
| 40 memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); |
| 41 escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ |
| 42 escaped_chars_delta_len['&'] = 4; |
| 43 escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; |
| 44 |
| 45 /* import markup type so that we can mark the return value */ |
| 46 module = PyImport_ImportModule("markupsafe"); |
| 47 if (!module) |
| 48 return 0; |
| 49 markup = PyObject_GetAttrString(module, "Markup"); |
| 50 Py_DECREF(module); |
| 51 |
| 52 return 1; |
| 53 } |
| 54 |
| 55 static PyObject* |
| 56 escape_unicode(PyUnicodeObject *in) |
| 57 { |
| 58 PyUnicodeObject *out; |
| 59 Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); |
| 60 const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZ
E(in); |
| 61 Py_UNICODE *next_escp; |
| 62 Py_UNICODE *outp; |
| 63 Py_ssize_t delta=0, erepl=0, delta_len=0; |
| 64 |
| 65 /* First we need to figure out how long the escaped string will be */ |
| 66 while (*(inp) || inp < inp_end) { |
| 67 if (*inp < ESCAPED_CHARS_TABLE_SIZE) { |
| 68 delta += escaped_chars_delta_len[*inp]; |
| 69 erepl += !!escaped_chars_delta_len[*inp]; |
| 70 } |
| 71 ++inp; |
| 72 } |
| 73 |
| 74 /* Do we need to escape anything at all? */ |
| 75 if (!erepl) { |
| 76 Py_INCREF(in); |
| 77 return (PyObject*)in; |
| 78 } |
| 79 |
| 80 out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(i
n) + delta); |
| 81 if (!out) |
| 82 return NULL; |
| 83 |
| 84 outp = PyUnicode_AS_UNICODE(out); |
| 85 inp = PyUnicode_AS_UNICODE(in); |
| 86 while (erepl-- > 0) { |
| 87 /* look for the next substitution */ |
| 88 next_escp = inp; |
| 89 while (next_escp < inp_end) { |
| 90 if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && |
| 91 (delta_len = escaped_chars_delta_len[*next_escp])) { |
| 92 ++delta_len; |
| 93 break; |
| 94 } |
| 95 ++next_escp; |
| 96 } |
| 97 |
| 98 if (next_escp > inp) { |
| 99 /* copy unescaped chars between inp and next_escp */ |
| 100 Py_UNICODE_COPY(outp, inp, next_escp-inp); |
| 101 outp += next_escp - inp; |
| 102 } |
| 103 |
| 104 /* escape 'next_escp' */ |
| 105 Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len)
; |
| 106 outp += delta_len; |
| 107 |
| 108 inp = next_escp + 1; |
| 109 } |
| 110 if (inp < inp_end) |
| 111 Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUni
code_AS_UNICODE(in))); |
| 112 |
| 113 return (PyObject*)out; |
| 114 } |
| 115 |
| 116 |
| 117 static PyObject* |
| 118 escape(PyObject *self, PyObject *text) |
| 119 { |
| 120 PyObject *s = NULL, *rv = NULL, *html; |
| 121 |
| 122 /* we don't have to escape integers, bools or floats */ |
| 123 if (PyLong_CheckExact(text) || |
| 124 #if PY_MAJOR_VERSION < 3 |
| 125 PyInt_CheckExact(text) || |
| 126 #endif |
| 127 PyFloat_CheckExact(text) || PyBool_Check(text) || |
| 128 text == Py_None) |
| 129 return PyObject_CallFunctionObjArgs(markup, text, NULL); |
| 130 |
| 131 /* if the object has an __html__ method that performs the escaping */ |
| 132 html = PyObject_GetAttrString(text, "__html__"); |
| 133 if (html) { |
| 134 rv = PyObject_CallObject(html, NULL); |
| 135 Py_DECREF(html); |
| 136 return rv; |
| 137 } |
| 138 |
| 139 /* otherwise make the object unicode if it isn't, then escape */ |
| 140 PyErr_Clear(); |
| 141 if (!PyUnicode_Check(text)) { |
| 142 #if PY_MAJOR_VERSION < 3 |
| 143 PyObject *unicode = PyObject_Unicode(text); |
| 144 #else |
| 145 PyObject *unicode = PyObject_Str(text); |
| 146 #endif |
| 147 if (!unicode) |
| 148 return NULL; |
| 149 s = escape_unicode((PyUnicodeObject*)unicode); |
| 150 Py_DECREF(unicode); |
| 151 } |
| 152 else |
| 153 s = escape_unicode((PyUnicodeObject*)text); |
| 154 |
| 155 /* convert the unicode string into a markup object. */ |
| 156 rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); |
| 157 Py_DECREF(s); |
| 158 return rv; |
| 159 } |
| 160 |
| 161 |
| 162 static PyObject* |
| 163 escape_silent(PyObject *self, PyObject *text) |
| 164 { |
| 165 if (text != Py_None) |
| 166 return escape(self, text); |
| 167 return PyObject_CallFunctionObjArgs(markup, NULL); |
| 168 } |
| 169 |
| 170 |
| 171 static PyObject* |
| 172 soft_unicode(PyObject *self, PyObject *s) |
| 173 { |
| 174 if (!PyUnicode_Check(s)) |
| 175 #if PY_MAJOR_VERSION < 3 |
| 176 return PyObject_Unicode(s); |
| 177 #else |
| 178 return PyObject_Str(s); |
| 179 #endif |
| 180 Py_INCREF(s); |
| 181 return s; |
| 182 } |
| 183 |
| 184 |
| 185 static PyMethodDef module_methods[] = { |
| 186 {"escape", (PyCFunction)escape, METH_O, |
| 187 "escape(s) -> markup\n\n" |
| 188 "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n" |
| 189 "sequences. Use this if you need to display text that might contain\n" |
| 190 "such characters in HTML. Marks return value as markup string."}, |
| 191 {"escape_silent", (PyCFunction)escape_silent, METH_O, |
| 192 "escape_silent(s) -> markup\n\n" |
| 193 "Like escape but converts None to an empty string."}, |
| 194 {"soft_unicode", (PyCFunction)soft_unicode, METH_O, |
| 195 "soft_unicode(object) -> string\n\n" |
| 196 "Make a string unicode if it isn't already. That way a markup\n" |
| 197 "string is not converted back to unicode."}, |
| 198 {NULL, NULL, 0, NULL} /* Sentinel */ |
| 199 }; |
| 200 |
| 201 |
| 202 #if PY_MAJOR_VERSION < 3 |
| 203 |
| 204 #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ |
| 205 #define PyMODINIT_FUNC void |
| 206 #endif |
| 207 PyMODINIT_FUNC |
| 208 init_speedups(void) |
| 209 { |
| 210 if (!init_constants()) |
| 211 return; |
| 212 |
| 213 Py_InitModule3("markupsafe._speedups", module_methods, ""); |
| 214 } |
| 215 |
| 216 #else /* Python 3.x module initialization */ |
| 217 |
| 218 static struct PyModuleDef module_definition = { |
| 219 PyModuleDef_HEAD_INIT, |
| 220 "markupsafe._speedups", |
| 221 NULL, |
| 222 -1, |
| 223 module_methods, |
| 224 NULL, |
| 225 NULL, |
| 226 NULL, |
| 227 NULL |
| 228 }; |
| 229 |
| 230 PyMODINIT_FUNC |
| 231 PyInit__speedups(void) |
| 232 { |
| 233 if (!init_constants()) |
| 234 return NULL; |
| 235 |
| 236 return PyModule_Create(&module_definition); |
| 237 } |
| 238 |
| 239 #endif |
OLD | NEW |