| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2011 - 2014 | |
| 3 * Andr\xe9 Malo or his licensors, as applicable | |
| 4 * | |
| 5 * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 * you may not use this file except in compliance with the License. | |
| 7 * You may obtain a copy of the License at | |
| 8 * | |
| 9 * http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 * | |
| 11 * Unless required by applicable law or agreed to in writing, software | |
| 12 * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 * See the License for the specific language governing permissions and | |
| 15 * limitations under the License. | |
| 16 */ | |
| 17 | |
| 18 #include "cext.h" | |
| 19 EXT_INIT_FUNC; | |
| 20 | |
| 21 #ifdef EXT3 | |
| 22 typedef Py_UNICODE rchar; | |
| 23 #else | |
| 24 typedef unsigned char rchar; | |
| 25 #endif | |
| 26 #define U(c) ((rchar)(c)) | |
| 27 | |
| 28 typedef struct { | |
| 29 const rchar *start; | |
| 30 const rchar *sentinel; | |
| 31 const rchar *tsentinel; | |
| 32 Py_ssize_t at_group; | |
| 33 int in_macie5; | |
| 34 int in_rule; | |
| 35 int keep_bang_comments; | |
| 36 } rcssmin_ctx_t; | |
| 37 | |
| 38 typedef enum { | |
| 39 NEED_SPACE_MAYBE = 0, | |
| 40 NEED_SPACE_NEVER | |
| 41 } need_space_flag; | |
| 42 | |
| 43 | |
| 44 #define RCSSMIN_DULL_BIT (1 << 0) | |
| 45 #define RCSSMIN_HEX_BIT (1 << 1) | |
| 46 #define RCSSMIN_ESC_BIT (1 << 2) | |
| 47 #define RCSSMIN_SPACE_BIT (1 << 3) | |
| 48 #define RCSSMIN_STRING_DULL_BIT (1 << 4) | |
| 49 #define RCSSMIN_NMCHAR_BIT (1 << 5) | |
| 50 #define RCSSMIN_URI_DULL_BIT (1 << 6) | |
| 51 #define RCSSMIN_PRE_CHAR_BIT (1 << 7) | |
| 52 #define RCSSMIN_POST_CHAR_BIT (1 << 8) | |
| 53 | |
| 54 static const unsigned short rcssmin_charmask[128] = { | |
| 55 21, 21, 21, 21, 21, 21, 21, 21, | |
| 56 21, 28, 8, 21, 8, 8, 21, 21, | |
| 57 21, 21, 21, 21, 21, 21, 21, 21, | |
| 58 21, 21, 21, 21, 21, 21, 21, 21, | |
| 59 28, 469, 4, 85, 85, 85, 85, 4, | |
| 60 149, 277, 85, 469, 469, 117, 85, 84, | |
| 61 115, 115, 115, 115, 115, 115, 115, 115, | |
| 62 115, 115, 468, 340, 85, 469, 468, 85, | |
| 63 84, 115, 115, 115, 115, 115, 115, 117, | |
| 64 117, 117, 117, 117, 117, 117, 117, 117, | |
| 65 117, 117, 117, 117, 117, 117, 117, 117, | |
| 66 117, 117, 117, 213, 4, 341, 85, 117, | |
| 67 85, 115, 115, 115, 115, 115, 115, 117, | |
| 68 117, 117, 117, 117, 117, 117, 117, 117, | |
| 69 117, 117, 117, 117, 117, 116, 117, 117, | |
| 70 117, 117, 117, 468, 85, 468, 85, 21 | |
| 71 }; | |
| 72 | |
| 73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \ | |
| 74 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT)) | |
| 75 | |
| 76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \ | |
| 77 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT)) | |
| 78 | |
| 79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \ | |
| 80 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT)) | |
| 81 | |
| 82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \ | |
| 83 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT)) | |
| 84 | |
| 85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ | |
| 86 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT)) | |
| 87 | |
| 88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \ | |
| 89 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT)) | |
| 90 | |
| 91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \ | |
| 92 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT)) | |
| 93 | |
| 94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \ | |
| 95 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT)) | |
| 96 | |
| 97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \ | |
| 98 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT)) | |
| 99 | |
| 100 | |
| 101 static const rchar pattern_url[] = { | |
| 102 /*U('u'),*/ U('r'), U('l'), U('(') | |
| 103 }; | |
| 104 | |
| 105 static const rchar pattern_ie7[] = { | |
| 106 /*U('>'),*/ U('/'), U('*'), U('*'), U('/') | |
| 107 }; | |
| 108 | |
| 109 static const rchar pattern_media[] = { | |
| 110 U('m'), U('e'), U('d'), U('i'), U('a'), | |
| 111 U('M'), U('E'), U('D'), U('I'), U('A') | |
| 112 }; | |
| 113 | |
| 114 static const rchar pattern_document[] = { | |
| 115 U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'), | |
| 116 U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T') | |
| 117 }; | |
| 118 | |
| 119 static const rchar pattern_supports[] = { | |
| 120 U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'), | |
| 121 U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S') | |
| 122 }; | |
| 123 | |
| 124 static const rchar pattern_keyframes[] = { | |
| 125 U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'), | |
| 126 U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S') | |
| 127 }; | |
| 128 | |
| 129 static const rchar pattern_vendor_o[] = { | |
| 130 U('-'), U('o'), U('-'), | |
| 131 U('-'), U('O'), U('-') | |
| 132 }; | |
| 133 | |
| 134 static const rchar pattern_vendor_moz[] = { | |
| 135 U('-'), U('m'), U('o'), U('z'), U('-'), | |
| 136 U('-'), U('M'), U('O'), U('Z'), U('-') | |
| 137 }; | |
| 138 | |
| 139 static const rchar pattern_vendor_webkit[] = { | |
| 140 U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'), | |
| 141 U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-') | |
| 142 }; | |
| 143 | |
| 144 static const rchar pattern_vendor_ms[] = { | |
| 145 U('-'), U('m'), U('s'), U('-'), | |
| 146 U('-'), U('M'), U('S'), U('-') | |
| 147 }; | |
| 148 | |
| 149 static const rchar pattern_first[] = { | |
| 150 U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'), | |
| 151 U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L') | |
| 152 }; | |
| 153 | |
| 154 static const rchar pattern_line[] = { | |
| 155 U('i'), U('n'), U('e'), | |
| 156 U('I'), U('N'), U('E'), | |
| 157 }; | |
| 158 | |
| 159 static const rchar pattern_letter[] = { | |
| 160 U('e'), U('t'), U('t'), U('e'), U('r'), | |
| 161 U('E'), U('T'), U('T'), U('E'), U('R') | |
| 162 }; | |
| 163 | |
| 164 static const rchar pattern_macie5_init[] = { | |
| 165 U('/'), U('*'), U('\\'), U('*'), U('/') | |
| 166 }; | |
| 167 | |
| 168 static const rchar pattern_macie5_exit[] = { | |
| 169 U('/'), U('*'), U('*'), U('/') | |
| 170 }; | |
| 171 | |
| 172 /* | |
| 173 * Match a pattern (and copy immediately to target) | |
| 174 */ | |
| 175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
| 176 #pragma GCC diagnostic push | |
| 177 #pragma GCC diagnostic ignored "-Wstrict-overflow" | |
| 178 #endif | |
| 179 static int | |
| 180 copy_match(const rchar *pattern, const rchar *psentinel, | |
| 181 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 182 { | |
| 183 const rchar *source = *source_; | |
| 184 rchar *target = *target_; | |
| 185 rchar c; | |
| 186 | |
| 187 while (pattern < psentinel | |
| 188 && source < ctx->sentinel && target < ctx->tsentinel | |
| 189 && ((c = *source++) == *pattern++)) | |
| 190 *target++ = c; | |
| 191 | |
| 192 *source_ = source; | |
| 193 *target_ = target; | |
| 194 | |
| 195 return (pattern == psentinel); | |
| 196 } | |
| 197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
| 198 #pragma GCC diagnostic pop | |
| 199 #endif | |
| 200 | |
| 201 #define MATCH(PAT, source, target, ctx) ( \ | |
| 202 copy_match(pattern_##PAT, \ | |
| 203 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ | |
| 204 source, target, ctx) \ | |
| 205 ) | |
| 206 | |
| 207 | |
| 208 /* | |
| 209 * Match a pattern (and copy immediately to target) - CI version | |
| 210 */ | |
| 211 static int | |
| 212 copy_imatch(const rchar *pattern, const rchar *psentinel, | |
| 213 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 214 { | |
| 215 const rchar *source = *source_, *pstart = pattern; | |
| 216 rchar *target = *target_; | |
| 217 rchar c; | |
| 218 | |
| 219 while (pattern < psentinel | |
| 220 && source < ctx->sentinel && target < ctx->tsentinel | |
| 221 && ((c = *source++) == *pattern | |
| 222 || c == pstart[(pattern - pstart) + (psentinel - pstart)])) { | |
| 223 ++pattern; | |
| 224 *target++ = c; | |
| 225 } | |
| 226 | |
| 227 *source_ = source; | |
| 228 *target_ = target; | |
| 229 | |
| 230 return (pattern == psentinel); | |
| 231 } | |
| 232 | |
| 233 #define IMATCH(PAT, source, target, ctx) ( \ | |
| 234 copy_imatch(pattern_##PAT, \ | |
| 235 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2, \ | |
| 236 source, target, ctx) \ | |
| 237 ) | |
| 238 | |
| 239 | |
| 240 /* | |
| 241 * Copy characters | |
| 242 */ | |
| 243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
| 244 #pragma GCC diagnostic push | |
| 245 #pragma GCC diagnostic ignored "-Wstrict-overflow" | |
| 246 #endif | |
| 247 static int | |
| 248 copy(const rchar *source, const rchar *sentinel, rchar **target_, | |
| 249 rcssmin_ctx_t *ctx) | |
| 250 { | |
| 251 rchar *target = *target_; | |
| 252 | |
| 253 while (source < sentinel && target < ctx->tsentinel) | |
| 254 *target++ = *source++; | |
| 255 | |
| 256 *target_ = target; | |
| 257 | |
| 258 return (source == sentinel); | |
| 259 } | |
| 260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
| 261 #pragma GCC diagnostic pop | |
| 262 #endif | |
| 263 | |
| 264 #define COPY_PAT(PAT, target, ctx) ( \ | |
| 265 copy(pattern_##PAT, \ | |
| 266 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ | |
| 267 target, ctx) \ | |
| 268 ) | |
| 269 | |
| 270 | |
| 271 /* | |
| 272 * The ABORT macros work with known local variables! | |
| 273 */ | |
| 274 #define ABORT_(RET) do { \ | |
| 275 if (source < ctx->sentinel && !(target < ctx->tsentinel)) { \ | |
| 276 *source_ = source; \ | |
| 277 *target_ = target; \ | |
| 278 } \ | |
| 279 return RET; \ | |
| 280 } while(0) | |
| 281 | |
| 282 | |
| 283 #define CRAPPY_C90_COMPATIBLE_EMPTY | |
| 284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY) | |
| 285 #define RABORT(RET) ABORT_((RET)) | |
| 286 | |
| 287 | |
| 288 /* | |
| 289 * Copy escape | |
| 290 */ | |
| 291 static void | |
| 292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 293 { | |
| 294 const rchar *source = *source_, *hsentinel; | |
| 295 rchar *target = *target_; | |
| 296 rchar c; | |
| 297 | |
| 298 *target++ = U('\\'); | |
| 299 *target_ = target; | |
| 300 | |
| 301 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 302 c = *source++; | |
| 303 if (RCSSMIN_IS_ESC(c)) { | |
| 304 *target++ = c; | |
| 305 } | |
| 306 else if (RCSSMIN_IS_HEX(c)) { | |
| 307 *target++ = c; | |
| 308 | |
| 309 /* 6 hex chars max, one we got already */ | |
| 310 if (ctx->sentinel - source > 5) | |
| 311 hsentinel = source + 5; | |
| 312 else | |
| 313 hsentinel = ctx->sentinel; | |
| 314 | |
| 315 while (source < hsentinel && target < ctx->tsentinel | |
| 316 && (c = *source, RCSSMIN_IS_HEX(c))) { | |
| 317 ++source; | |
| 318 *target++ = c; | |
| 319 } | |
| 320 | |
| 321 /* One optional space after */ | |
| 322 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 323 if (source == hsentinel) | |
| 324 c = *source; | |
| 325 if (RCSSMIN_IS_SPACE(c)) { | |
| 326 ++source; | |
| 327 *target++ = U(' '); | |
| 328 if (c == U('\r') && source < ctx->sentinel | |
| 329 && *source == U('\n')) | |
| 330 ++source; | |
| 331 } | |
| 332 } | |
| 333 } | |
| 334 } | |
| 335 | |
| 336 *target_ = target; | |
| 337 *source_ = source; | |
| 338 } | |
| 339 | |
| 340 | |
| 341 /* | |
| 342 * Copy string | |
| 343 */ | |
| 344 static void | |
| 345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 346 { | |
| 347 const rchar *source = *source_; | |
| 348 rchar *target = *target_; | |
| 349 rchar c, quote = source[-1]; | |
| 350 | |
| 351 *target++ = quote; | |
| 352 *target_ = target; | |
| 353 | |
| 354 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 355 c = *target++ = *source++; | |
| 356 if (RCSSMIN_IS_STRING_DULL(c)) | |
| 357 continue; | |
| 358 | |
| 359 switch (c) { | |
| 360 case U('\''): case U('"'): | |
| 361 if (c == quote) { | |
| 362 *target_ = target; | |
| 363 *source_ = source; | |
| 364 return; | |
| 365 } | |
| 366 continue; | |
| 367 | |
| 368 case U('\\'): | |
| 369 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 370 c = *source++; | |
| 371 switch (c) { | |
| 372 case U('\r'): | |
| 373 if (source < ctx->sentinel && *source == U('\n')) | |
| 374 ++source; | |
| 375 /* fall through */ | |
| 376 | |
| 377 case U('\n'): case U('\f'): | |
| 378 --target; | |
| 379 break; | |
| 380 | |
| 381 default: | |
| 382 *target++ = c; | |
| 383 } | |
| 384 } | |
| 385 continue; | |
| 386 } | |
| 387 break; /* forbidden characters */ | |
| 388 } | |
| 389 | |
| 390 ABORT; | |
| 391 } | |
| 392 | |
| 393 | |
| 394 /* | |
| 395 * Copy URI string | |
| 396 */ | |
| 397 static int | |
| 398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 399 { | |
| 400 const rchar *source = *source_; | |
| 401 rchar *target = *target_; | |
| 402 rchar c, quote = source[-1]; | |
| 403 | |
| 404 *target++ = quote; | |
| 405 *target_ = target; | |
| 406 | |
| 407 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 408 c = *source++; | |
| 409 if (RCSSMIN_IS_SPACE(c)) | |
| 410 continue; | |
| 411 *target++ = c; | |
| 412 if (RCSSMIN_IS_STRING_DULL(c)) | |
| 413 continue; | |
| 414 | |
| 415 switch (c) { | |
| 416 case U('\''): case U('"'): | |
| 417 if (c == quote) { | |
| 418 *target_ = target; | |
| 419 *source_ = source; | |
| 420 return 0; | |
| 421 } | |
| 422 continue; | |
| 423 | |
| 424 case U('\\'): | |
| 425 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 426 c = *source; | |
| 427 switch (c) { | |
| 428 case U('\r'): | |
| 429 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) | |
| 430 ++source; | |
| 431 /* fall through */ | |
| 432 | |
| 433 case U('\n'): case U('\f'): | |
| 434 --target; | |
| 435 ++source; | |
| 436 break; | |
| 437 | |
| 438 default: | |
| 439 --target; | |
| 440 copy_escape(&source, &target, ctx); | |
| 441 } | |
| 442 } | |
| 443 continue; | |
| 444 } | |
| 445 | |
| 446 break; /* forbidden characters */ | |
| 447 } | |
| 448 | |
| 449 RABORT(-1); | |
| 450 } | |
| 451 | |
| 452 | |
| 453 /* | |
| 454 * Copy URI (unquoted) | |
| 455 */ | |
| 456 static int | |
| 457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 458 { | |
| 459 const rchar *source = *source_; | |
| 460 rchar *target = *target_; | |
| 461 rchar c; | |
| 462 | |
| 463 *target++ = source[-1]; | |
| 464 *target_ = target; | |
| 465 | |
| 466 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 467 c = *source++; | |
| 468 if (RCSSMIN_IS_SPACE(c)) | |
| 469 continue; | |
| 470 *target++ = c; | |
| 471 if (RCSSMIN_IS_URI_DULL(c)) | |
| 472 continue; | |
| 473 | |
| 474 switch (c) { | |
| 475 | |
| 476 case U(')'): | |
| 477 *target_ = target - 1; | |
| 478 *source_ = source - 1; | |
| 479 return 0; | |
| 480 | |
| 481 case U('\\'): | |
| 482 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 483 c = *source; | |
| 484 switch (c) { | |
| 485 case U('\r'): | |
| 486 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) | |
| 487 ++source; | |
| 488 /* fall through */ | |
| 489 | |
| 490 case U('\n'): case U('\f'): | |
| 491 --target; | |
| 492 ++source; | |
| 493 break; | |
| 494 | |
| 495 default: | |
| 496 --target; | |
| 497 copy_escape(&source, &target, ctx); | |
| 498 } | |
| 499 } | |
| 500 continue; | |
| 501 } | |
| 502 | |
| 503 break; /* forbidden characters */ | |
| 504 } | |
| 505 | |
| 506 RABORT(-1); | |
| 507 } | |
| 508 | |
| 509 | |
| 510 /* | |
| 511 * Copy url | |
| 512 */ | |
| 513 static void | |
| 514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 515 { | |
| 516 const rchar *source = *source_; | |
| 517 rchar *target = *target_; | |
| 518 rchar c; | |
| 519 | |
| 520 *target++ = U('u'); | |
| 521 *target_ = target; | |
| 522 | |
| 523 /* Must not be inside an identifier */ | |
| 524 if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2])) | |
| 525 return; | |
| 526 | |
| 527 if (!MATCH(url, &source, &target, ctx) | |
| 528 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
| 529 ABORT; | |
| 530 | |
| 531 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) | |
| 532 ++source; | |
| 533 | |
| 534 if (!(source < ctx->sentinel)) | |
| 535 ABORT; | |
| 536 | |
| 537 c = *source++; | |
| 538 switch (c) { | |
| 539 case U('"'): case U('\''): | |
| 540 if (copy_uri_string(&source, &target, ctx) == -1) | |
| 541 ABORT; | |
| 542 | |
| 543 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) | |
| 544 ++source; | |
| 545 break; | |
| 546 | |
| 547 default: | |
| 548 if (copy_uri_unquoted(&source, &target, ctx) == -1) | |
| 549 ABORT; | |
| 550 } | |
| 551 | |
| 552 if (!(source < ctx->sentinel && target < ctx->tsentinel)) | |
| 553 ABORT; | |
| 554 | |
| 555 if ((*target++ = *source++) != U(')')) | |
| 556 ABORT; | |
| 557 | |
| 558 *target_ = target; | |
| 559 *source_ = source; | |
| 560 } | |
| 561 | |
| 562 | |
| 563 /* | |
| 564 * Copy @-group | |
| 565 */ | |
| 566 static void | |
| 567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 568 { | |
| 569 const rchar *source = *source_; | |
| 570 rchar *target = *target_; | |
| 571 | |
| 572 *target++ = U('@'); | |
| 573 *target_ = target; | |
| 574 | |
| 575 #define REMATCH(what) ( \ | |
| 576 source = *source_, \ | |
| 577 target = *target_, \ | |
| 578 IMATCH(what, &source, &target, ctx) \ | |
| 579 ) | |
| 580 #define CMATCH(what) IMATCH(what, &source, &target, ctx) | |
| 581 | |
| 582 if (( !CMATCH(media) | |
| 583 && !REMATCH(supports) | |
| 584 && !REMATCH(document) | |
| 585 && !REMATCH(keyframes) | |
| 586 && !(REMATCH(vendor_webkit) && CMATCH(keyframes)) | |
| 587 && !(REMATCH(vendor_moz) && CMATCH(keyframes)) | |
| 588 && !(REMATCH(vendor_o) && CMATCH(keyframes)) | |
| 589 && !(REMATCH(vendor_ms) && CMATCH(keyframes))) | |
| 590 || !(source < ctx->sentinel && target < ctx->tsentinel) | |
| 591 || RCSSMIN_IS_NMCHAR(*source)) | |
| 592 ABORT; | |
| 593 | |
| 594 #undef CMATCH | |
| 595 #undef REMATCH | |
| 596 | |
| 597 ++ctx->at_group; | |
| 598 | |
| 599 *target_ = target; | |
| 600 *source_ = source; | |
| 601 } | |
| 602 | |
| 603 | |
| 604 /* | |
| 605 * Skip space | |
| 606 */ | |
| 607 static const rchar * | |
| 608 skip_space(const rchar *source, rcssmin_ctx_t *ctx) | |
| 609 { | |
| 610 const rchar *begin = source; | |
| 611 int res; | |
| 612 rchar c; | |
| 613 | |
| 614 while (source < ctx->sentinel) { | |
| 615 c = *source; | |
| 616 if (RCSSMIN_IS_SPACE(c)) { | |
| 617 ++source; | |
| 618 continue; | |
| 619 } | |
| 620 else if (c == U('/')) { | |
| 621 ++source; | |
| 622 if (!(source < ctx->sentinel && *source == U('*'))) { | |
| 623 --source; | |
| 624 break; | |
| 625 } | |
| 626 ++source; | |
| 627 res = 0; | |
| 628 while (source < ctx->sentinel) { | |
| 629 c = *source++; | |
| 630 if (c != U('*')) | |
| 631 continue; | |
| 632 if (!(source < ctx->sentinel)) | |
| 633 return begin; | |
| 634 if (*source != U('/')) | |
| 635 continue; | |
| 636 | |
| 637 /* Comment complete */ | |
| 638 ++source; | |
| 639 res = 1; | |
| 640 break; | |
| 641 } | |
| 642 if (!res) | |
| 643 return begin; | |
| 644 | |
| 645 continue; | |
| 646 } | |
| 647 | |
| 648 break; | |
| 649 } | |
| 650 | |
| 651 return source; | |
| 652 } | |
| 653 | |
| 654 | |
| 655 /* | |
| 656 * Copy space | |
| 657 */ | |
| 658 static void | |
| 659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx, | |
| 660 need_space_flag need_space) | |
| 661 { | |
| 662 const rchar *source = *source_, *end, *comment; | |
| 663 rchar *target = *target_; | |
| 664 int res; | |
| 665 rchar c; | |
| 666 | |
| 667 --source; | |
| 668 if (need_space == NEED_SPACE_MAYBE | |
| 669 && source > ctx->start | |
| 670 && !RCSSMIN_IS_PRE_CHAR(source[-1]) | |
| 671 && (end = skip_space(source, ctx)) < ctx->sentinel | |
| 672 && (!RCSSMIN_IS_POST_CHAR(*end) | |
| 673 || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) { | |
| 674 | |
| 675 if (!(target < ctx->tsentinel)) | |
| 676 ABORT; | |
| 677 *target++ = U(' '); | |
| 678 } | |
| 679 | |
| 680 while (source < ctx->sentinel) { | |
| 681 switch (c = *source) { | |
| 682 | |
| 683 /* comment */ | |
| 684 case U('/'): | |
| 685 comment = source++; | |
| 686 if (!((source < ctx->sentinel && *source == U('*')))) { | |
| 687 --source; | |
| 688 break; | |
| 689 } | |
| 690 ++source; | |
| 691 res = 0; | |
| 692 while (source < ctx->sentinel) { | |
| 693 c = *source++; | |
| 694 if (c != U('*')) | |
| 695 continue; | |
| 696 if (!(source < ctx->sentinel)) | |
| 697 ABORT; | |
| 698 if (*source != U('/')) | |
| 699 continue; | |
| 700 | |
| 701 /* Comment complete */ | |
| 702 ++source; | |
| 703 res = 1; | |
| 704 | |
| 705 if (ctx->keep_bang_comments && comment[2] == U('!')) { | |
| 706 ctx->in_macie5 = (source[-3] == U('\\')); | |
| 707 if (!copy(comment, source, &target, ctx)) | |
| 708 ABORT; | |
| 709 } | |
| 710 else if (source[-3] == U('\\')) { | |
| 711 if (!ctx->in_macie5) { | |
| 712 if (!COPY_PAT(macie5_init, &target, ctx)) | |
| 713 ABORT; | |
| 714 } | |
| 715 ctx->in_macie5 = 1; | |
| 716 } | |
| 717 else if (ctx->in_macie5) { | |
| 718 if (!COPY_PAT(macie5_exit, &target, ctx)) | |
| 719 ABORT; | |
| 720 ctx->in_macie5 = 0; | |
| 721 } | |
| 722 /* else don't copy anything */ | |
| 723 break; | |
| 724 } | |
| 725 if (!res) | |
| 726 ABORT; | |
| 727 continue; | |
| 728 | |
| 729 /* space */ | |
| 730 case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'): | |
| 731 ++source; | |
| 732 continue; | |
| 733 } | |
| 734 | |
| 735 break; | |
| 736 } | |
| 737 | |
| 738 *source_ = source; | |
| 739 *target_ = target; | |
| 740 } | |
| 741 | |
| 742 | |
| 743 /* | |
| 744 * Copy space if comment | |
| 745 */ | |
| 746 static int | |
| 747 copy_space_comment(const rchar **source_, rchar **target_, | |
| 748 rcssmin_ctx_t *ctx, need_space_flag need_space) | |
| 749 { | |
| 750 const rchar *source = *source_; | |
| 751 rchar *target = *target_; | |
| 752 | |
| 753 if (source < ctx->sentinel && *source == U('*')) { | |
| 754 copy_space(source_, target_, ctx, need_space); | |
| 755 if (*source_ > source) | |
| 756 return 0; | |
| 757 } | |
| 758 if (!(target < ctx->tsentinel)) | |
| 759 RABORT(-1); | |
| 760 | |
| 761 *target++ = source[-1]; | |
| 762 | |
| 763 /* *source_ = source; <-- unchanged */ | |
| 764 *target_ = target; | |
| 765 | |
| 766 return -1; | |
| 767 } | |
| 768 | |
| 769 | |
| 770 /* | |
| 771 * Copy space if exists | |
| 772 */ | |
| 773 static int | |
| 774 copy_space_optional(const rchar **source_, rchar **target_, | |
| 775 rcssmin_ctx_t *ctx) | |
| 776 { | |
| 777 const rchar *source = *source_; | |
| 778 | |
| 779 if (!(source < ctx->sentinel)) | |
| 780 return -1; | |
| 781 | |
| 782 if (*source == U('/')) { | |
| 783 *source_ = source + 1; | |
| 784 return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER); | |
| 785 } | |
| 786 else if (RCSSMIN_IS_SPACE(*source)) { | |
| 787 *source_ = source + 1; | |
| 788 copy_space(source_, target_, ctx, NEED_SPACE_NEVER); | |
| 789 return 0; | |
| 790 } | |
| 791 | |
| 792 return -1; | |
| 793 } | |
| 794 | |
| 795 | |
| 796 /* | |
| 797 * Copy :first-line|letter | |
| 798 */ | |
| 799 static void | |
| 800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 801 { | |
| 802 const rchar *source = *source_, *next, *source_fork; | |
| 803 rchar *target = *target_, *target_fork; | |
| 804 | |
| 805 *target++ = U(':'); | |
| 806 *target_ = target; | |
| 807 | |
| 808 if (!IMATCH(first, &source, &target, ctx) | |
| 809 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
| 810 ABORT; | |
| 811 | |
| 812 source_fork = source; | |
| 813 target_fork = target; | |
| 814 | |
| 815 if (!IMATCH(line, &source, &target, ctx)) { | |
| 816 source = source_fork; | |
| 817 target = target_fork; | |
| 818 | |
| 819 if (!IMATCH(letter, &source, &target, ctx) | |
| 820 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
| 821 ABORT; | |
| 822 } | |
| 823 | |
| 824 next = skip_space(source, ctx); | |
| 825 if (!(next < ctx->sentinel && target < ctx->tsentinel | |
| 826 && (*next == U('{') || *next == U(',')))) | |
| 827 ABORT; | |
| 828 | |
| 829 *target++ = U(' '); | |
| 830 *target_ = target; | |
| 831 *source_ = source; | |
| 832 (void)copy_space_optional(source_, target_, ctx); | |
| 833 } | |
| 834 | |
| 835 | |
| 836 /* | |
| 837 * Copy IE7 hack | |
| 838 */ | |
| 839 static void | |
| 840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 841 { | |
| 842 const rchar *source = *source_; | |
| 843 rchar *target = *target_; | |
| 844 | |
| 845 *target++ = U('>'); | |
| 846 *target_ = target; | |
| 847 | |
| 848 if (ctx->in_rule || ctx->at_group) | |
| 849 return; /* abort */ | |
| 850 | |
| 851 if (!MATCH(ie7, &source, &target, ctx)) | |
| 852 ABORT; | |
| 853 | |
| 854 ctx->in_macie5 = 0; | |
| 855 | |
| 856 *target_ = target; | |
| 857 *source_ = source; | |
| 858 | |
| 859 (void)copy_space_optional(source_, target_, ctx); | |
| 860 } | |
| 861 | |
| 862 | |
| 863 /* | |
| 864 * Copy semicolon; miss out duplicates or even this one (before '}') | |
| 865 */ | |
| 866 static void | |
| 867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
| 868 { | |
| 869 const rchar *source = *source_, *begin, *end; | |
| 870 rchar *target = *target_; | |
| 871 | |
| 872 begin = source; | |
| 873 while (source < ctx->sentinel) { | |
| 874 end = skip_space(source, ctx); | |
| 875 if (!(end < ctx->sentinel)) { | |
| 876 if (!(target < ctx->tsentinel)) | |
| 877 ABORT; | |
| 878 *target++ = U(';'); | |
| 879 break; | |
| 880 } | |
| 881 switch (*end) { | |
| 882 case U(';'): | |
| 883 source = end + 1; | |
| 884 continue; | |
| 885 | |
| 886 case U('}'): | |
| 887 if (ctx->in_rule) | |
| 888 break; | |
| 889 | |
| 890 /* fall through */ | |
| 891 default: | |
| 892 if (!(target < ctx->tsentinel)) | |
| 893 ABORT; | |
| 894 *target++ = U(';'); | |
| 895 break; | |
| 896 } | |
| 897 | |
| 898 break; | |
| 899 } | |
| 900 | |
| 901 source = begin; | |
| 902 *target_ = target; | |
| 903 while (source < ctx->sentinel) { | |
| 904 if (*source == U(';')) { | |
| 905 ++source; | |
| 906 continue; | |
| 907 } | |
| 908 | |
| 909 if (copy_space_optional(&source, target_, ctx) == 0) | |
| 910 continue; | |
| 911 | |
| 912 break; | |
| 913 } | |
| 914 | |
| 915 *source_ = source; | |
| 916 } | |
| 917 | |
| 918 | |
| 919 /* | |
| 920 * Main function | |
| 921 * | |
| 922 * The return value determines the result length (kept in the target buffer). | |
| 923 * However, if the target buffer is too small, the return value is greater | |
| 924 * than tlength. The difference to tlength is the number of unconsumed source | |
| 925 * characters at the time the buffer was full. In this case you should resize | |
| 926 * the target buffer to the return value and call rcssmin again. Repeat as | |
| 927 * often as needed. | |
| 928 */ | |
| 929 static Py_ssize_t | |
| 930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength, | |
| 931 Py_ssize_t tlength, int keep_bang_comments) | |
| 932 { | |
| 933 rcssmin_ctx_t ctx_, *ctx = &ctx_; | |
| 934 const rchar *tstart = target; | |
| 935 rchar c; | |
| 936 | |
| 937 ctx->start = source; | |
| 938 ctx->sentinel = source + slength; | |
| 939 ctx->tsentinel = target + tlength; | |
| 940 ctx->at_group = 0; | |
| 941 ctx->in_macie5 = 0; | |
| 942 ctx->in_rule = 0; | |
| 943 ctx->keep_bang_comments = keep_bang_comments; | |
| 944 | |
| 945 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
| 946 c = *source++; | |
| 947 if (RCSSMIN_IS_DULL(c)) { | |
| 948 *target++ = c; | |
| 949 continue; | |
| 950 } | |
| 951 else if (RCSSMIN_IS_SPACE(c)) { | |
| 952 copy_space(&source, &target, ctx, NEED_SPACE_MAYBE); | |
| 953 continue; | |
| 954 } | |
| 955 | |
| 956 switch (c) { | |
| 957 | |
| 958 /* Escape */ | |
| 959 case U('\\'): | |
| 960 copy_escape(&source, &target, ctx); | |
| 961 continue; | |
| 962 | |
| 963 /* String */ | |
| 964 case U('"'): case U('\''): | |
| 965 copy_string(&source, &target, ctx); | |
| 966 continue; | |
| 967 | |
| 968 /* URL */ | |
| 969 case U('u'): | |
| 970 copy_url(&source, &target, ctx); | |
| 971 continue; | |
| 972 | |
| 973 /* IE7hack */ | |
| 974 case U('>'): | |
| 975 copy_ie7hack(&source, &target, ctx); | |
| 976 continue; | |
| 977 | |
| 978 /* @-group */ | |
| 979 case U('@'): | |
| 980 copy_at_group(&source, &target, ctx); | |
| 981 continue; | |
| 982 | |
| 983 /* ; */ | |
| 984 case U(';'): | |
| 985 copy_semicolon(&source, &target, ctx); | |
| 986 continue; | |
| 987 | |
| 988 /* :first-line|letter followed by [{,] */ | |
| 989 /* (apparently needed for IE6) */ | |
| 990 case U(':'): | |
| 991 copy_first(&source, &target, ctx); | |
| 992 continue; | |
| 993 | |
| 994 /* { */ | |
| 995 case U('{'): | |
| 996 if (ctx->at_group) | |
| 997 --ctx->at_group; | |
| 998 else | |
| 999 ++ctx->in_rule; | |
| 1000 *target++ = c; | |
| 1001 continue; | |
| 1002 | |
| 1003 /* } */ | |
| 1004 case U('}'): | |
| 1005 if (ctx->in_rule) | |
| 1006 --ctx->in_rule; | |
| 1007 *target++ = c; | |
| 1008 continue; | |
| 1009 | |
| 1010 /* space starting with comment */ | |
| 1011 case U('/'): | |
| 1012 (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE); | |
| 1013 continue; | |
| 1014 | |
| 1015 /* Fallback: copy character. Better safe than sorry. Should not be | |
| 1016 * reached, though */ | |
| 1017 default: | |
| 1018 *target++ = c; | |
| 1019 continue; | |
| 1020 } | |
| 1021 } | |
| 1022 | |
| 1023 return | |
| 1024 (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source); | |
| 1025 } | |
| 1026 | |
| 1027 | |
| 1028 PyDoc_STRVAR(rcssmin_cssmin__doc__, | |
| 1029 "cssmin(style, keep_bang_comments=False)\n\ | |
| 1030 \n\ | |
| 1031 Minify CSS.\n\ | |
| 1032 \n\ | |
| 1033 :Note: This is a hand crafted C implementation built on the regex\n\ | |
| 1034 semantics.\n\ | |
| 1035 \n\ | |
| 1036 :Parameters:\n\ | |
| 1037 `style` : ``str``\n\ | |
| 1038 CSS to minify\n\ | |
| 1039 \n\ | |
| 1040 :Return: Minified style\n\ | |
| 1041 :Rtype: ``str``"); | |
| 1042 | |
| 1043 static PyObject * | |
| 1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds) | |
| 1045 { | |
| 1046 PyObject *style, *keep_bang_comments_ = NULL, *result; | |
| 1047 static char *kwlist[] = {"style", "keep_bang_comments", NULL}; | |
| 1048 Py_ssize_t rlength, slength, length; | |
| 1049 int keep_bang_comments; | |
| 1050 #ifdef EXT2 | |
| 1051 int uni; | |
| 1052 #define UOBJ "O" | |
| 1053 #endif | |
| 1054 #ifdef EXT3 | |
| 1055 #define UOBJ "U" | |
| 1056 #endif | |
| 1057 | |
| 1058 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, | |
| 1059 &style, &keep_bang_comments_)) | |
| 1060 return NULL; | |
| 1061 | |
| 1062 if (!keep_bang_comments_) | |
| 1063 keep_bang_comments = 0; | |
| 1064 else { | |
| 1065 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); | |
| 1066 if (keep_bang_comments == -1) | |
| 1067 return NULL; | |
| 1068 } | |
| 1069 | |
| 1070 #ifdef EXT2 | |
| 1071 if (PyUnicode_Check(style)) { | |
| 1072 if (!(style = PyUnicode_AsUTF8String(style))) | |
| 1073 return NULL; | |
| 1074 uni = 1; | |
| 1075 } | |
| 1076 else { | |
| 1077 if (!(style = PyObject_Str(style))) | |
| 1078 return NULL; | |
| 1079 uni = 0; | |
| 1080 } | |
| 1081 #endif | |
| 1082 | |
| 1083 #ifdef EXT3 | |
| 1084 Py_INCREF(style); | |
| 1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE | |
| 1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE | |
| 1087 #define _PyString_Resize PyUnicode_Resize | |
| 1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode | |
| 1089 #endif | |
| 1090 | |
| 1091 rlength = slength = PyString_GET_SIZE(style); | |
| 1092 | |
| 1093 again: | |
| 1094 if (!(result = PyString_FromStringAndSize(NULL, rlength))) { | |
| 1095 Py_DECREF(style); | |
| 1096 return NULL; | |
| 1097 } | |
| 1098 Py_BEGIN_ALLOW_THREADS | |
| 1099 length = rcssmin((rchar *)PyString_AS_STRING(style), | |
| 1100 (rchar *)PyString_AS_STRING(result), | |
| 1101 slength, rlength, keep_bang_comments); | |
| 1102 Py_END_ALLOW_THREADS | |
| 1103 | |
| 1104 if (length > rlength) { | |
| 1105 Py_DECREF(result); | |
| 1106 rlength = length; | |
| 1107 goto again; | |
| 1108 } | |
| 1109 | |
| 1110 Py_DECREF(style); | |
| 1111 if (length < 0) { | |
| 1112 Py_DECREF(result); | |
| 1113 return NULL; | |
| 1114 } | |
| 1115 if (length != rlength && _PyString_Resize(&result, length) == -1) | |
| 1116 return NULL; | |
| 1117 | |
| 1118 #ifdef EXT2 | |
| 1119 if (uni) { | |
| 1120 style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), | |
| 1121 PyString_GET_SIZE(result), "strict"); | |
| 1122 Py_DECREF(result); | |
| 1123 if (!style) | |
| 1124 return NULL; | |
| 1125 result = style; | |
| 1126 } | |
| 1127 #endif | |
| 1128 return result; | |
| 1129 } | |
| 1130 | |
| 1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ | |
| 1132 | |
| 1133 EXT_METHODS = { | |
| 1134 {"cssmin", | |
| 1135 (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS, | |
| 1136 rcssmin_cssmin__doc__}, | |
| 1137 | |
| 1138 {NULL} /* Sentinel */ | |
| 1139 }; | |
| 1140 | |
| 1141 PyDoc_STRVAR(EXT_DOCS_VAR, | |
| 1142 "C implementation of rcssmin\n\ | |
| 1143 ===========================\n\ | |
| 1144 \n\ | |
| 1145 C implementation of rcssmin."); | |
| 1146 | |
| 1147 | |
| 1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); | |
| 1149 | |
| 1150 EXT_INIT_FUNC { | |
| 1151 PyObject *m; | |
| 1152 | |
| 1153 /* Create the module and populate stuff */ | |
| 1154 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) | |
| 1155 EXT_INIT_ERROR(NULL); | |
| 1156 | |
| 1157 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); | |
| 1158 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); | |
| 1159 | |
| 1160 EXT_INIT_RETURN(m); | |
| 1161 } | |
| 1162 | |
| 1163 /* ------------------------- END MODULE DEFINITION ------------------------- */ | |
| OLD | NEW |