OLD | NEW |
(Empty) | |
| 1 cdef extern from *: |
| 2 # Return true if the object o is a Unicode object or an instance |
| 3 # of a Unicode subtype. Changed in version 2.2: Allowed subtypes |
| 4 # to be accepted. |
| 5 bint PyUnicode_Check(object o) |
| 6 |
| 7 # Return true if the object o is a Unicode object, but not an |
| 8 # instance of a subtype. New in version 2.2. |
| 9 bint PyUnicode_CheckExact(object o) |
| 10 |
| 11 # Return the size of the object. o has to be a PyUnicodeObject |
| 12 # (not checked). |
| 13 Py_ssize_t PyUnicode_GET_SIZE(object o) |
| 14 |
| 15 # Return the size of the object's internal buffer in bytes. o has |
| 16 # to be a PyUnicodeObject (not checked). |
| 17 Py_ssize_t PyUnicode_GET_DATA_SIZE(object o) |
| 18 |
| 19 # Return a pointer to the internal Py_UNICODE buffer of the |
| 20 # object. o has to be a PyUnicodeObject (not checked). |
| 21 Py_UNICODE* PyUnicode_AS_UNICODE(object o) |
| 22 |
| 23 # Return a pointer to the internal buffer of the object. o has to |
| 24 # be a PyUnicodeObject (not checked). |
| 25 char* PyUnicode_AS_DATA(object o) |
| 26 |
| 27 # Return 1 or 0 depending on whether ch is a whitespace character. |
| 28 bint Py_UNICODE_ISSPACE(Py_UNICODE ch) |
| 29 |
| 30 # Return 1 or 0 depending on whether ch is a lowercase character. |
| 31 bint Py_UNICODE_ISLOWER(Py_UNICODE ch) |
| 32 |
| 33 # Return 1 or 0 depending on whether ch is an uppercase character. |
| 34 bint Py_UNICODE_ISUPPER(Py_UNICODE ch) |
| 35 |
| 36 # Return 1 or 0 depending on whether ch is a titlecase character. |
| 37 bint Py_UNICODE_ISTITLE(Py_UNICODE ch) |
| 38 |
| 39 # Return 1 or 0 depending on whether ch is a linebreak character. |
| 40 bint Py_UNICODE_ISLINEBREAK(Py_UNICODE ch) |
| 41 |
| 42 # Return 1 or 0 depending on whether ch is a decimal character. |
| 43 bint Py_UNICODE_ISDECIMAL(Py_UNICODE ch) |
| 44 |
| 45 # Return 1 or 0 depending on whether ch is a digit character. |
| 46 bint Py_UNICODE_ISDIGIT(Py_UNICODE ch) |
| 47 |
| 48 # Return 1 or 0 depending on whether ch is a numeric character. |
| 49 bint Py_UNICODE_ISNUMERIC(Py_UNICODE ch) |
| 50 |
| 51 # Return 1 or 0 depending on whether ch is an alphabetic character. |
| 52 bint Py_UNICODE_ISALPHA(Py_UNICODE ch) |
| 53 |
| 54 # Return 1 or 0 depending on whether ch is an alphanumeric character. |
| 55 bint Py_UNICODE_ISALNUM(Py_UNICODE ch) |
| 56 |
| 57 # Return the character ch converted to lower case. |
| 58 Py_UNICODE Py_UNICODE_TOLOWER(Py_UNICODE ch) |
| 59 |
| 60 # Return the character ch converted to upper case. |
| 61 Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch) |
| 62 |
| 63 # Return the character ch converted to title case. |
| 64 Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch) |
| 65 |
| 66 # Return the character ch converted to a decimal positive |
| 67 # integer. Return -1 if this is not possible. This macro does not |
| 68 # raise exceptions. |
| 69 int Py_UNICODE_TODECIMAL(Py_UNICODE ch) |
| 70 |
| 71 # Return the character ch converted to a single digit |
| 72 # integer. Return -1 if this is not possible. This macro does not |
| 73 # raise exceptions. |
| 74 int Py_UNICODE_TODIGIT(Py_UNICODE ch) |
| 75 |
| 76 # Return the character ch converted to a double. Return -1.0 if |
| 77 # this is not possible. This macro does not raise exceptions. |
| 78 double Py_UNICODE_TONUMERIC(Py_UNICODE ch) |
| 79 |
| 80 # To create Unicode objects and access their basic sequence |
| 81 # properties, use these APIs: |
| 82 |
| 83 # Create a Unicode Object from the Py_UNICODE buffer u of the |
| 84 # given size. u may be NULL which causes the contents to be |
| 85 # undefined. It is the user's responsibility to fill in the needed |
| 86 # data. The buffer is copied into the new object. If the buffer is |
| 87 # not NULL, the return value might be a shared object. Therefore, |
| 88 # modification of the resulting Unicode object is only allowed |
| 89 # when u is NULL. |
| 90 object PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size) |
| 91 |
| 92 # Create a Unicode Object from the given Unicode code point ordinal. |
| 93 # |
| 94 # The ordinal must be in range(0x10000) on narrow Python builds |
| 95 # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError |
| 96 # is raised in case it is not. |
| 97 object PyUnicode_FromOrdinal(int ordinal) |
| 98 |
| 99 # Return a read-only pointer to the Unicode object's internal |
| 100 # Py_UNICODE buffer, NULL if unicode is not a Unicode object. |
| 101 Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL |
| 102 |
| 103 # Return the length of the Unicode object. |
| 104 Py_ssize_t PyUnicode_GetSize(object o) except -1 |
| 105 |
| 106 # Coerce an encoded object obj to an Unicode object and return a |
| 107 # reference with incremented refcount. |
| 108 # String and other char buffer compatible objects are decoded |
| 109 # according to the given encoding and using the error handling |
| 110 # defined by errors. Both can be NULL to have the interface use |
| 111 # the default values (see the next section for details). |
| 112 # All other objects, including Unicode objects, cause a TypeError |
| 113 # to be set. |
| 114 object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors) |
| 115 |
| 116 # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict") |
| 117 # which is used throughout the interpreter whenever coercion to |
| 118 # Unicode is needed. |
| 119 object PyUnicode_FromObject(object obj) |
| 120 |
| 121 # If the platform supports wchar_t and provides a header file |
| 122 # wchar.h, Python can interface directly to this type using the |
| 123 # following functions. Support is optimized if Python's own |
| 124 # Py_UNICODE type is identical to the system's wchar_t. |
| 125 |
| 126 #ctypedef int wchar_t |
| 127 |
| 128 # Create a Unicode object from the wchar_t buffer w of the given |
| 129 # size. Return NULL on failure. |
| 130 #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size) |
| 131 |
| 132 #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size) |
| 133 |
| 134 # Codecs |
| 135 |
| 136 # Create a Unicode object by decoding size bytes of the encoded |
| 137 # string s. encoding and errors have the same meaning as the |
| 138 # parameters of the same name in the unicode() builtin |
| 139 # function. The codec to be used is looked up using the Python |
| 140 # codec registry. Return NULL if an exception was raised by the |
| 141 # codec. |
| 142 object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *erro
rs) |
| 143 |
| 144 # Encode the Py_UNICODE buffer of the given size and return a |
| 145 # Python string object. encoding and errors have the same meaning |
| 146 # as the parameters of the same name in the Unicode encode() |
| 147 # method. The codec to be used is looked up using the Python codec |
| 148 # registry. Return NULL if an exception was raised by the codec. |
| 149 object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size, |
| 150 char *encoding, char *errors) |
| 151 |
| 152 # Encode a Unicode object and return the result as Python string |
| 153 # object. encoding and errors have the same meaning as the |
| 154 # parameters of the same name in the Unicode encode() method. The |
| 155 # codec to be used is looked up using the Python codec |
| 156 # registry. Return NULL if an exception was raised by the codec. |
| 157 object PyUnicode_AsEncodedString(object unicode, char *encoding, char *error
s) |
| 158 |
| 159 # These are the UTF-8 codec APIs: |
| 160 |
| 161 # Create a Unicode object by decoding size bytes of the UTF-8 |
| 162 # encoded string s. Return NULL if an exception was raised by the |
| 163 # codec. |
| 164 object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors) |
| 165 |
| 166 # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If |
| 167 # consumed is not NULL, trailing incomplete UTF-8 byte sequences |
| 168 # will not be treated as an error. Those bytes will not be decoded |
| 169 # and the number of bytes that have been decoded will be stored in |
| 170 # consumed. New in version 2.4. |
| 171 object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors,
Py_ssize_t *consumed) |
| 172 |
| 173 # Encode the Py_UNICODE buffer of the given size using UTF-8 and |
| 174 # return a Python string object. Return NULL if an exception was |
| 175 # raised by the codec. |
| 176 object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors) |
| 177 |
| 178 # Encode a Unicode objects using UTF-8 and return the result as Python strin
g object. Error handling is ``strict''. Return NULL if an exception was raised b
y the codec. |
| 179 object PyUnicode_AsUTF8String(object unicode) |
| 180 |
| 181 # These are the UTF-16 codec APIs: |
| 182 |
| 183 # Decode length bytes from a UTF-16 encoded buffer string and |
| 184 # return the corresponding Unicode object. errors (if non-NULL) |
| 185 # defines the error handling. It defaults to ``strict''. |
| 186 # |
| 187 # If byteorder is non-NULL, the decoder starts decoding using the |
| 188 # given byte order: |
| 189 # |
| 190 # *byteorder == -1: little endian |
| 191 # *byteorder == 0: native order |
| 192 # *byteorder == 1: big endian |
| 193 # |
| 194 # and then switches if the first two bytes of the input data are a |
| 195 # byte order mark (BOM) and the specified byte order is native |
| 196 # order. This BOM is not copied into the resulting Unicode |
| 197 # string. After completion, *byteorder is set to the current byte |
| 198 # order at the. |
| 199 # |
| 200 # If byteorder is NULL, the codec starts in native order mode. |
| 201 object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *by
teorder) |
| 202 |
| 203 # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If |
| 204 # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not |
| 205 # treat trailing incomplete UTF-16 byte sequences (such as an odd |
| 206 # number of bytes or a split surrogate pair) as an error. Those |
| 207 # bytes will not be decoded and the number of bytes that have been |
| 208 # decoded will be stored in consumed. New in version 2.4. |
| 209 object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors,
int *byteorder, Py_ssize_t *consumed) |
| 210 |
| 211 # Return a Python string object holding the UTF-16 encoded value |
| 212 # of the Unicode data in s. If byteorder is not 0, output is |
| 213 # written according to the following byte order: |
| 214 # |
| 215 # byteorder == -1: little endian |
| 216 # byteorder == 0: native byte order (writes a BOM mark) |
| 217 # byteorder == 1: big endian |
| 218 # |
| 219 # If byteorder is 0, the output string will always start with the |
| 220 # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark |
| 221 # is prepended. |
| 222 # |
| 223 # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get |
| 224 # represented as a surrogate pair. If it is not defined, each |
| 225 # Py_UNICODE values is interpreted as an UCS-2 character. |
| 226 object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, i
nt byteorder) |
| 227 |
| 228 # Return a Python string using the UTF-16 encoding in native byte |
| 229 # order. The string always starts with a BOM mark. Error handling |
| 230 # is ``strict''. Return NULL if an exception was raised by the |
| 231 # codec. |
| 232 object PyUnicode_AsUTF16String(object unicode) |
| 233 |
| 234 # These are the ``Unicode Escape'' codec APIs: |
| 235 |
| 236 # Create a Unicode object by decoding size bytes of the |
| 237 # Unicode-Escape encoded string s. Return NULL if an exception was |
| 238 # raised by the codec. |
| 239 object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors) |
| 240 |
| 241 # Encode the Py_UNICODE buffer of the given size using |
| 242 # Unicode-Escape and return a Python string object. Return NULL if |
| 243 # an exception was raised by the codec. |
| 244 object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size) |
| 245 |
| 246 # Encode a Unicode objects using Unicode-Escape and return the |
| 247 # result as Python string object. Error handling is |
| 248 # ``strict''. Return NULL if an exception was raised by the codec. |
| 249 object PyUnicode_AsUnicodeEscapeString(object unicode) |
| 250 |
| 251 # These are the ``Raw Unicode Escape'' codec APIs: |
| 252 |
| 253 # Create a Unicode object by decoding size bytes of the |
| 254 # Raw-Unicode-Escape encoded string s. Return NULL if an exception |
| 255 # was raised by the codec. |
| 256 object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *erro
rs) |
| 257 |
| 258 # Encode the Py_UNICODE buffer of the given size using |
| 259 # Raw-Unicode-Escape and return a Python string object. Return |
| 260 # NULL if an exception was raised by the codec. |
| 261 object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char
*errors) |
| 262 |
| 263 # Encode a Unicode objects using Raw-Unicode-Escape and return the |
| 264 # result as Python string object. Error handling is |
| 265 # ``strict''. Return NULL if an exception was raised by the codec. |
| 266 object PyUnicode_AsRawUnicodeEscapeString(object unicode) |
| 267 |
| 268 # These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode
ordinals and only these are accepted by the codecs during encoding. |
| 269 |
| 270 # Create a Unicode object by decoding size bytes of the Latin-1 |
| 271 # encoded string s. Return NULL if an exception was raised by the |
| 272 # codec. |
| 273 object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors) |
| 274 |
| 275 # Encode the Py_UNICODE buffer of the given size using Latin-1 and |
| 276 # return a Python string object. Return NULL if an exception was |
| 277 # raised by the codec. |
| 278 object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors) |
| 279 |
| 280 # Encode a Unicode objects using Latin-1 and return the result as |
| 281 # Python string object. Error handling is ``strict''. Return NULL |
| 282 # if an exception was raised by the codec. |
| 283 object PyUnicode_AsLatin1String(object unicode) |
| 284 |
| 285 # These are the ASCII codec APIs. Only 7-bit ASCII data is |
| 286 # accepted. All other codes generate errors. |
| 287 |
| 288 # Create a Unicode object by decoding size bytes of the ASCII |
| 289 # encoded string s. Return NULL if an exception was raised by the |
| 290 # codec. |
| 291 object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors) |
| 292 |
| 293 # Encode the Py_UNICODE buffer of the given size using ASCII and |
| 294 # return a Python string object. Return NULL if an exception was |
| 295 # raised by the codec. |
| 296 object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors) |
| 297 |
| 298 # Encode a Unicode objects using ASCII and return the result as |
| 299 # Python string object. Error handling is ``strict''. Return NULL |
| 300 # if an exception was raised by the codec. |
| 301 object PyUnicode_AsASCIIString(object o) |
| 302 |
| 303 # These are the mapping codec APIs: |
| 304 # |
| 305 # This codec is special in that it can be used to implement many |
| 306 # different codecs (and this is in fact what was done to obtain most |
| 307 # of the standard codecs included in the encodings package). The codec |
| 308 # uses mapping to encode and decode characters. |
| 309 # |
| 310 # Decoding mappings must map single string characters to single |
| 311 # Unicode characters, integers (which are then interpreted as Unicode |
| 312 # ordinals) or None (meaning "undefined mapping" and causing an |
| 313 # error). |
| 314 # |
| 315 # Encoding mappings must map single Unicode characters to single |
| 316 # string characters, integers (which are then interpreted as Latin-1 |
| 317 # ordinals) or None (meaning "undefined mapping" and causing an |
| 318 # error). |
| 319 # |
| 320 # The mapping objects provided must only support the __getitem__ |
| 321 # mapping interface. |
| 322 # |
| 323 # If a character lookup fails with a LookupError, the character is |
| 324 # copied as-is meaning that its ordinal value will be interpreted as |
| 325 # Unicode or Latin-1 ordinal resp. Because of this, mappings only need |
| 326 # to contain those mappings which map characters to different code |
| 327 # points. |
| 328 |
| 329 # Create a Unicode object by decoding size bytes of the encoded |
| 330 # string s using the given mapping object. Return NULL if an |
| 331 # exception was raised by the codec. If mapping is NULL latin-1 |
| 332 # decoding will be done. Else it can be a dictionary mapping byte |
| 333 # or a unicode string, which is treated as a lookup table. Byte |
| 334 # values greater that the length of the string and U+FFFE |
| 335 # "characters" are treated as "undefined mapping". Changed in |
| 336 # version 2.4: Allowed unicode string as mapping argument. |
| 337 object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, cha
r *errors) |
| 338 |
| 339 # Encode the Py_UNICODE buffer of the given size using the given |
| 340 # mapping object and return a Python string object. Return NULL if |
| 341 # an exception was raised by the codec. |
| 342 object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mappin
g, char *errors) |
| 343 |
| 344 # Encode a Unicode objects using the given mapping object and |
| 345 # return the result as Python string object. Error handling is |
| 346 # ``strict''. Return NULL if an exception was raised by the codec. |
| 347 object PyUnicode_AsCharmapString(object o, object mapping) |
| 348 |
| 349 # The following codec API is special in that maps Unicode to Unicode. |
| 350 |
| 351 # Translate a Py_UNICODE buffer of the given length by applying a |
| 352 # character mapping table to it and return the resulting Unicode |
| 353 # object. Return NULL when an exception was raised by the codec. |
| 354 # |
| 355 # The mapping table must map Unicode ordinal integers to Unicode |
| 356 # ordinal integers or None (causing deletion of the character). |
| 357 # |
| 358 # Mapping tables need only provide the __getitem__() interface; |
| 359 # dictionaries and sequences work well. Unmapped character |
| 360 # ordinals (ones which cause a LookupError) are left untouched and |
| 361 # are copied as-is. |
| 362 object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size, |
| 363 object table, char *errors) |
| 364 |
| 365 # These are the MBCS codec APIs. They are currently only available on |
| 366 # Windows and use the Win32 MBCS converters to implement the |
| 367 # conversions. Note that MBCS (or DBCS) is a class of encodings, not |
| 368 # just one. The target encoding is defined by the user settings on the |
| 369 # machine running the codec. |
| 370 |
| 371 # Create a Unicode object by decoding size bytes of the MBCS |
| 372 # encoded string s. Return NULL if an exception was raised by the |
| 373 # codec. |
| 374 object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors) |
| 375 |
| 376 # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If |
| 377 # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not |
| 378 # decode trailing lead byte and the number of bytes that have been |
| 379 # decoded will be stored in consumed. New in version 2.5. |
| 380 # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3
.0) |
| 381 object PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors,
Py_ssize_t *consumed) |
| 382 |
| 383 # Encode the Py_UNICODE buffer of the given size using MBCS and |
| 384 # return a Python string object. Return NULL if an exception was |
| 385 # raised by the codec. |
| 386 object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors) |
| 387 |
| 388 # Encode a Unicode objects using MBCS and return the result as |
| 389 # Python string object. Error handling is ``strict''. Return NULL |
| 390 # if an exception was raised by the codec. |
| 391 object PyUnicode_AsMBCSString(object o) |
OLD | NEW |