OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2011 - 2014 | |
3 * Andr\xe9 Malo or his licensors, as applicable | |
4 * | |
5 * Licensed under the Apache License, Version 2.0 (the "License"); | |
6 * you may not use this file except in compliance with the License. | |
7 * You may obtain a copy of the License at | |
8 * | |
9 * http://www.apache.org/licenses/LICENSE-2.0 | |
10 * | |
11 * Unless required by applicable law or agreed to in writing, software | |
12 * distributed under the License is distributed on an "AS IS" BASIS, | |
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 * See the License for the specific language governing permissions and | |
15 * limitations under the License. | |
16 */ | |
17 | |
18 #include "cext.h" | |
19 EXT_INIT_FUNC; | |
20 | |
21 #ifdef EXT3 | |
22 typedef Py_UNICODE rchar; | |
23 #else | |
24 typedef unsigned char rchar; | |
25 #endif | |
26 #define U(c) ((rchar)(c)) | |
27 | |
28 typedef struct { | |
29 const rchar *start; | |
30 const rchar *sentinel; | |
31 const rchar *tsentinel; | |
32 Py_ssize_t at_group; | |
33 int in_macie5; | |
34 int in_rule; | |
35 int keep_bang_comments; | |
36 } rcssmin_ctx_t; | |
37 | |
38 typedef enum { | |
39 NEED_SPACE_MAYBE = 0, | |
40 NEED_SPACE_NEVER | |
41 } need_space_flag; | |
42 | |
43 | |
44 #define RCSSMIN_DULL_BIT (1 << 0) | |
45 #define RCSSMIN_HEX_BIT (1 << 1) | |
46 #define RCSSMIN_ESC_BIT (1 << 2) | |
47 #define RCSSMIN_SPACE_BIT (1 << 3) | |
48 #define RCSSMIN_STRING_DULL_BIT (1 << 4) | |
49 #define RCSSMIN_NMCHAR_BIT (1 << 5) | |
50 #define RCSSMIN_URI_DULL_BIT (1 << 6) | |
51 #define RCSSMIN_PRE_CHAR_BIT (1 << 7) | |
52 #define RCSSMIN_POST_CHAR_BIT (1 << 8) | |
53 | |
54 static const unsigned short rcssmin_charmask[128] = { | |
55 21, 21, 21, 21, 21, 21, 21, 21, | |
56 21, 28, 8, 21, 8, 8, 21, 21, | |
57 21, 21, 21, 21, 21, 21, 21, 21, | |
58 21, 21, 21, 21, 21, 21, 21, 21, | |
59 28, 469, 4, 85, 85, 85, 85, 4, | |
60 149, 277, 85, 469, 469, 117, 85, 84, | |
61 115, 115, 115, 115, 115, 115, 115, 115, | |
62 115, 115, 468, 340, 85, 469, 468, 85, | |
63 84, 115, 115, 115, 115, 115, 115, 117, | |
64 117, 117, 117, 117, 117, 117, 117, 117, | |
65 117, 117, 117, 117, 117, 117, 117, 117, | |
66 117, 117, 117, 213, 4, 341, 85, 117, | |
67 85, 115, 115, 115, 115, 115, 115, 117, | |
68 117, 117, 117, 117, 117, 117, 117, 117, | |
69 117, 117, 117, 117, 117, 116, 117, 117, | |
70 117, 117, 117, 468, 85, 468, 85, 21 | |
71 }; | |
72 | |
73 #define RCSSMIN_IS_DULL(c) ((U(c) > 127) || \ | |
74 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_DULL_BIT)) | |
75 | |
76 #define RCSSMIN_IS_HEX(c) ((U(c) <= 127) && \ | |
77 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_HEX_BIT)) | |
78 | |
79 #define RCSSMIN_IS_ESC(c) ((U(c) > 127) || \ | |
80 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_ESC_BIT)) | |
81 | |
82 #define RCSSMIN_IS_SPACE(c) ((U(c) <= 127) && \ | |
83 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_SPACE_BIT)) | |
84 | |
85 #define RCSSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ | |
86 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_STRING_DULL_BIT)) | |
87 | |
88 #define RCSSMIN_IS_NMCHAR(c) ((U(c) > 127) || \ | |
89 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_NMCHAR_BIT)) | |
90 | |
91 #define RCSSMIN_IS_URI_DULL(c) ((U(c) > 127) || \ | |
92 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_URI_DULL_BIT)) | |
93 | |
94 #define RCSSMIN_IS_PRE_CHAR(c) ((U(c) <= 127) && \ | |
95 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_PRE_CHAR_BIT)) | |
96 | |
97 #define RCSSMIN_IS_POST_CHAR(c) ((U(c) <= 127) && \ | |
98 (rcssmin_charmask[U(c) & 0x7F] & RCSSMIN_POST_CHAR_BIT)) | |
99 | |
100 | |
101 static const rchar pattern_url[] = { | |
102 /*U('u'),*/ U('r'), U('l'), U('(') | |
103 }; | |
104 | |
105 static const rchar pattern_ie7[] = { | |
106 /*U('>'),*/ U('/'), U('*'), U('*'), U('/') | |
107 }; | |
108 | |
109 static const rchar pattern_media[] = { | |
110 U('m'), U('e'), U('d'), U('i'), U('a'), | |
111 U('M'), U('E'), U('D'), U('I'), U('A') | |
112 }; | |
113 | |
114 static const rchar pattern_document[] = { | |
115 U('d'), U('o'), U('c'), U('u'), U('m'), U('e'), U('n'), U('t'), | |
116 U('D'), U('O'), U('C'), U('U'), U('M'), U('E'), U('N'), U('T') | |
117 }; | |
118 | |
119 static const rchar pattern_supports[] = { | |
120 U('s'), U('u'), U('p'), U('p'), U('o'), U('r'), U('t'), U('s'), | |
121 U('S'), U('U'), U('P'), U('P'), U('O'), U('R'), U('T'), U('S') | |
122 }; | |
123 | |
124 static const rchar pattern_keyframes[] = { | |
125 U('k'), U('e'), U('y'), U('f'), U('r'), U('a'), U('m'), U('e'), U('s'), | |
126 U('K'), U('E'), U('Y'), U('F'), U('R'), U('A'), U('M'), U('E'), U('S') | |
127 }; | |
128 | |
129 static const rchar pattern_vendor_o[] = { | |
130 U('-'), U('o'), U('-'), | |
131 U('-'), U('O'), U('-') | |
132 }; | |
133 | |
134 static const rchar pattern_vendor_moz[] = { | |
135 U('-'), U('m'), U('o'), U('z'), U('-'), | |
136 U('-'), U('M'), U('O'), U('Z'), U('-') | |
137 }; | |
138 | |
139 static const rchar pattern_vendor_webkit[] = { | |
140 U('-'), U('w'), U('e'), U('b'), U('k'), U('i'), U('t'), U('-'), | |
141 U('-'), U('W'), U('E'), U('B'), U('K'), U('I'), U('T'), U('-') | |
142 }; | |
143 | |
144 static const rchar pattern_vendor_ms[] = { | |
145 U('-'), U('m'), U('s'), U('-'), | |
146 U('-'), U('M'), U('S'), U('-') | |
147 }; | |
148 | |
149 static const rchar pattern_first[] = { | |
150 U('f'), U('i'), U('r'), U('s'), U('t'), U('-'), U('l'), | |
151 U('F'), U('I'), U('R'), U('S'), U('T'), U('-'), U('L') | |
152 }; | |
153 | |
154 static const rchar pattern_line[] = { | |
155 U('i'), U('n'), U('e'), | |
156 U('I'), U('N'), U('E'), | |
157 }; | |
158 | |
159 static const rchar pattern_letter[] = { | |
160 U('e'), U('t'), U('t'), U('e'), U('r'), | |
161 U('E'), U('T'), U('T'), U('E'), U('R') | |
162 }; | |
163 | |
164 static const rchar pattern_macie5_init[] = { | |
165 U('/'), U('*'), U('\\'), U('*'), U('/') | |
166 }; | |
167 | |
168 static const rchar pattern_macie5_exit[] = { | |
169 U('/'), U('*'), U('*'), U('/') | |
170 }; | |
171 | |
172 /* | |
173 * Match a pattern (and copy immediately to target) | |
174 */ | |
175 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
176 #pragma GCC diagnostic push | |
177 #pragma GCC diagnostic ignored "-Wstrict-overflow" | |
178 #endif | |
179 static int | |
180 copy_match(const rchar *pattern, const rchar *psentinel, | |
181 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
182 { | |
183 const rchar *source = *source_; | |
184 rchar *target = *target_; | |
185 rchar c; | |
186 | |
187 while (pattern < psentinel | |
188 && source < ctx->sentinel && target < ctx->tsentinel | |
189 && ((c = *source++) == *pattern++)) | |
190 *target++ = c; | |
191 | |
192 *source_ = source; | |
193 *target_ = target; | |
194 | |
195 return (pattern == psentinel); | |
196 } | |
197 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
198 #pragma GCC diagnostic pop | |
199 #endif | |
200 | |
201 #define MATCH(PAT, source, target, ctx) ( \ | |
202 copy_match(pattern_##PAT, \ | |
203 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ | |
204 source, target, ctx) \ | |
205 ) | |
206 | |
207 | |
208 /* | |
209 * Match a pattern (and copy immediately to target) - CI version | |
210 */ | |
211 static int | |
212 copy_imatch(const rchar *pattern, const rchar *psentinel, | |
213 const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
214 { | |
215 const rchar *source = *source_, *pstart = pattern; | |
216 rchar *target = *target_; | |
217 rchar c; | |
218 | |
219 while (pattern < psentinel | |
220 && source < ctx->sentinel && target < ctx->tsentinel | |
221 && ((c = *source++) == *pattern | |
222 || c == pstart[(pattern - pstart) + (psentinel - pstart)])) { | |
223 ++pattern; | |
224 *target++ = c; | |
225 } | |
226 | |
227 *source_ = source; | |
228 *target_ = target; | |
229 | |
230 return (pattern == psentinel); | |
231 } | |
232 | |
233 #define IMATCH(PAT, source, target, ctx) ( \ | |
234 copy_imatch(pattern_##PAT, \ | |
235 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar) / 2, \ | |
236 source, target, ctx) \ | |
237 ) | |
238 | |
239 | |
240 /* | |
241 * Copy characters | |
242 */ | |
243 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
244 #pragma GCC diagnostic push | |
245 #pragma GCC diagnostic ignored "-Wstrict-overflow" | |
246 #endif | |
247 static int | |
248 copy(const rchar *source, const rchar *sentinel, rchar **target_, | |
249 rcssmin_ctx_t *ctx) | |
250 { | |
251 rchar *target = *target_; | |
252 | |
253 while (source < sentinel && target < ctx->tsentinel) | |
254 *target++ = *source++; | |
255 | |
256 *target_ = target; | |
257 | |
258 return (source == sentinel); | |
259 } | |
260 #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
) | |
261 #pragma GCC diagnostic pop | |
262 #endif | |
263 | |
264 #define COPY_PAT(PAT, target, ctx) ( \ | |
265 copy(pattern_##PAT, \ | |
266 pattern_##PAT + sizeof(pattern_##PAT) / sizeof(rchar), \ | |
267 target, ctx) \ | |
268 ) | |
269 | |
270 | |
271 /* | |
272 * The ABORT macros work with known local variables! | |
273 */ | |
274 #define ABORT_(RET) do { \ | |
275 if (source < ctx->sentinel && !(target < ctx->tsentinel)) { \ | |
276 *source_ = source; \ | |
277 *target_ = target; \ | |
278 } \ | |
279 return RET; \ | |
280 } while(0) | |
281 | |
282 | |
283 #define CRAPPY_C90_COMPATIBLE_EMPTY | |
284 #define ABORT ABORT_(CRAPPY_C90_COMPATIBLE_EMPTY) | |
285 #define RABORT(RET) ABORT_((RET)) | |
286 | |
287 | |
288 /* | |
289 * Copy escape | |
290 */ | |
291 static void | |
292 copy_escape(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
293 { | |
294 const rchar *source = *source_, *hsentinel; | |
295 rchar *target = *target_; | |
296 rchar c; | |
297 | |
298 *target++ = U('\\'); | |
299 *target_ = target; | |
300 | |
301 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
302 c = *source++; | |
303 if (RCSSMIN_IS_ESC(c)) { | |
304 *target++ = c; | |
305 } | |
306 else if (RCSSMIN_IS_HEX(c)) { | |
307 *target++ = c; | |
308 | |
309 /* 6 hex chars max, one we got already */ | |
310 if (ctx->sentinel - source > 5) | |
311 hsentinel = source + 5; | |
312 else | |
313 hsentinel = ctx->sentinel; | |
314 | |
315 while (source < hsentinel && target < ctx->tsentinel | |
316 && (c = *source, RCSSMIN_IS_HEX(c))) { | |
317 ++source; | |
318 *target++ = c; | |
319 } | |
320 | |
321 /* One optional space after */ | |
322 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
323 if (source == hsentinel) | |
324 c = *source; | |
325 if (RCSSMIN_IS_SPACE(c)) { | |
326 ++source; | |
327 *target++ = U(' '); | |
328 if (c == U('\r') && source < ctx->sentinel | |
329 && *source == U('\n')) | |
330 ++source; | |
331 } | |
332 } | |
333 } | |
334 } | |
335 | |
336 *target_ = target; | |
337 *source_ = source; | |
338 } | |
339 | |
340 | |
341 /* | |
342 * Copy string | |
343 */ | |
344 static void | |
345 copy_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
346 { | |
347 const rchar *source = *source_; | |
348 rchar *target = *target_; | |
349 rchar c, quote = source[-1]; | |
350 | |
351 *target++ = quote; | |
352 *target_ = target; | |
353 | |
354 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
355 c = *target++ = *source++; | |
356 if (RCSSMIN_IS_STRING_DULL(c)) | |
357 continue; | |
358 | |
359 switch (c) { | |
360 case U('\''): case U('"'): | |
361 if (c == quote) { | |
362 *target_ = target; | |
363 *source_ = source; | |
364 return; | |
365 } | |
366 continue; | |
367 | |
368 case U('\\'): | |
369 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
370 c = *source++; | |
371 switch (c) { | |
372 case U('\r'): | |
373 if (source < ctx->sentinel && *source == U('\n')) | |
374 ++source; | |
375 /* fall through */ | |
376 | |
377 case U('\n'): case U('\f'): | |
378 --target; | |
379 break; | |
380 | |
381 default: | |
382 *target++ = c; | |
383 } | |
384 } | |
385 continue; | |
386 } | |
387 break; /* forbidden characters */ | |
388 } | |
389 | |
390 ABORT; | |
391 } | |
392 | |
393 | |
394 /* | |
395 * Copy URI string | |
396 */ | |
397 static int | |
398 copy_uri_string(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
399 { | |
400 const rchar *source = *source_; | |
401 rchar *target = *target_; | |
402 rchar c, quote = source[-1]; | |
403 | |
404 *target++ = quote; | |
405 *target_ = target; | |
406 | |
407 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
408 c = *source++; | |
409 if (RCSSMIN_IS_SPACE(c)) | |
410 continue; | |
411 *target++ = c; | |
412 if (RCSSMIN_IS_STRING_DULL(c)) | |
413 continue; | |
414 | |
415 switch (c) { | |
416 case U('\''): case U('"'): | |
417 if (c == quote) { | |
418 *target_ = target; | |
419 *source_ = source; | |
420 return 0; | |
421 } | |
422 continue; | |
423 | |
424 case U('\\'): | |
425 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
426 c = *source; | |
427 switch (c) { | |
428 case U('\r'): | |
429 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) | |
430 ++source; | |
431 /* fall through */ | |
432 | |
433 case U('\n'): case U('\f'): | |
434 --target; | |
435 ++source; | |
436 break; | |
437 | |
438 default: | |
439 --target; | |
440 copy_escape(&source, &target, ctx); | |
441 } | |
442 } | |
443 continue; | |
444 } | |
445 | |
446 break; /* forbidden characters */ | |
447 } | |
448 | |
449 RABORT(-1); | |
450 } | |
451 | |
452 | |
453 /* | |
454 * Copy URI (unquoted) | |
455 */ | |
456 static int | |
457 copy_uri_unquoted(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
458 { | |
459 const rchar *source = *source_; | |
460 rchar *target = *target_; | |
461 rchar c; | |
462 | |
463 *target++ = source[-1]; | |
464 *target_ = target; | |
465 | |
466 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
467 c = *source++; | |
468 if (RCSSMIN_IS_SPACE(c)) | |
469 continue; | |
470 *target++ = c; | |
471 if (RCSSMIN_IS_URI_DULL(c)) | |
472 continue; | |
473 | |
474 switch (c) { | |
475 | |
476 case U(')'): | |
477 *target_ = target - 1; | |
478 *source_ = source - 1; | |
479 return 0; | |
480 | |
481 case U('\\'): | |
482 if (source < ctx->sentinel && target < ctx->tsentinel) { | |
483 c = *source; | |
484 switch (c) { | |
485 case U('\r'): | |
486 if ((source + 1) < ctx->sentinel && source[1] == U('\n')) | |
487 ++source; | |
488 /* fall through */ | |
489 | |
490 case U('\n'): case U('\f'): | |
491 --target; | |
492 ++source; | |
493 break; | |
494 | |
495 default: | |
496 --target; | |
497 copy_escape(&source, &target, ctx); | |
498 } | |
499 } | |
500 continue; | |
501 } | |
502 | |
503 break; /* forbidden characters */ | |
504 } | |
505 | |
506 RABORT(-1); | |
507 } | |
508 | |
509 | |
510 /* | |
511 * Copy url | |
512 */ | |
513 static void | |
514 copy_url(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
515 { | |
516 const rchar *source = *source_; | |
517 rchar *target = *target_; | |
518 rchar c; | |
519 | |
520 *target++ = U('u'); | |
521 *target_ = target; | |
522 | |
523 /* Must not be inside an identifier */ | |
524 if ((source != ctx->start + 1) && RCSSMIN_IS_NMCHAR(source[-2])) | |
525 return; | |
526 | |
527 if (!MATCH(url, &source, &target, ctx) | |
528 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
529 ABORT; | |
530 | |
531 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) | |
532 ++source; | |
533 | |
534 if (!(source < ctx->sentinel)) | |
535 ABORT; | |
536 | |
537 c = *source++; | |
538 switch (c) { | |
539 case U('"'): case U('\''): | |
540 if (copy_uri_string(&source, &target, ctx) == -1) | |
541 ABORT; | |
542 | |
543 while (source < ctx->sentinel && RCSSMIN_IS_SPACE(*source)) | |
544 ++source; | |
545 break; | |
546 | |
547 default: | |
548 if (copy_uri_unquoted(&source, &target, ctx) == -1) | |
549 ABORT; | |
550 } | |
551 | |
552 if (!(source < ctx->sentinel && target < ctx->tsentinel)) | |
553 ABORT; | |
554 | |
555 if ((*target++ = *source++) != U(')')) | |
556 ABORT; | |
557 | |
558 *target_ = target; | |
559 *source_ = source; | |
560 } | |
561 | |
562 | |
563 /* | |
564 * Copy @-group | |
565 */ | |
566 static void | |
567 copy_at_group(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
568 { | |
569 const rchar *source = *source_; | |
570 rchar *target = *target_; | |
571 | |
572 *target++ = U('@'); | |
573 *target_ = target; | |
574 | |
575 #define REMATCH(what) ( \ | |
576 source = *source_, \ | |
577 target = *target_, \ | |
578 IMATCH(what, &source, &target, ctx) \ | |
579 ) | |
580 #define CMATCH(what) IMATCH(what, &source, &target, ctx) | |
581 | |
582 if (( !CMATCH(media) | |
583 && !REMATCH(supports) | |
584 && !REMATCH(document) | |
585 && !REMATCH(keyframes) | |
586 && !(REMATCH(vendor_webkit) && CMATCH(keyframes)) | |
587 && !(REMATCH(vendor_moz) && CMATCH(keyframes)) | |
588 && !(REMATCH(vendor_o) && CMATCH(keyframes)) | |
589 && !(REMATCH(vendor_ms) && CMATCH(keyframes))) | |
590 || !(source < ctx->sentinel && target < ctx->tsentinel) | |
591 || RCSSMIN_IS_NMCHAR(*source)) | |
592 ABORT; | |
593 | |
594 #undef CMATCH | |
595 #undef REMATCH | |
596 | |
597 ++ctx->at_group; | |
598 | |
599 *target_ = target; | |
600 *source_ = source; | |
601 } | |
602 | |
603 | |
604 /* | |
605 * Skip space | |
606 */ | |
607 static const rchar * | |
608 skip_space(const rchar *source, rcssmin_ctx_t *ctx) | |
609 { | |
610 const rchar *begin = source; | |
611 int res; | |
612 rchar c; | |
613 | |
614 while (source < ctx->sentinel) { | |
615 c = *source; | |
616 if (RCSSMIN_IS_SPACE(c)) { | |
617 ++source; | |
618 continue; | |
619 } | |
620 else if (c == U('/')) { | |
621 ++source; | |
622 if (!(source < ctx->sentinel && *source == U('*'))) { | |
623 --source; | |
624 break; | |
625 } | |
626 ++source; | |
627 res = 0; | |
628 while (source < ctx->sentinel) { | |
629 c = *source++; | |
630 if (c != U('*')) | |
631 continue; | |
632 if (!(source < ctx->sentinel)) | |
633 return begin; | |
634 if (*source != U('/')) | |
635 continue; | |
636 | |
637 /* Comment complete */ | |
638 ++source; | |
639 res = 1; | |
640 break; | |
641 } | |
642 if (!res) | |
643 return begin; | |
644 | |
645 continue; | |
646 } | |
647 | |
648 break; | |
649 } | |
650 | |
651 return source; | |
652 } | |
653 | |
654 | |
655 /* | |
656 * Copy space | |
657 */ | |
658 static void | |
659 copy_space(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx, | |
660 need_space_flag need_space) | |
661 { | |
662 const rchar *source = *source_, *end, *comment; | |
663 rchar *target = *target_; | |
664 int res; | |
665 rchar c; | |
666 | |
667 --source; | |
668 if (need_space == NEED_SPACE_MAYBE | |
669 && source > ctx->start | |
670 && !RCSSMIN_IS_PRE_CHAR(source[-1]) | |
671 && (end = skip_space(source, ctx)) < ctx->sentinel | |
672 && (!RCSSMIN_IS_POST_CHAR(*end) | |
673 || (*end == U(':') && !ctx->in_rule && !ctx->at_group))) { | |
674 | |
675 if (!(target < ctx->tsentinel)) | |
676 ABORT; | |
677 *target++ = U(' '); | |
678 } | |
679 | |
680 while (source < ctx->sentinel) { | |
681 switch (c = *source) { | |
682 | |
683 /* comment */ | |
684 case U('/'): | |
685 comment = source++; | |
686 if (!((source < ctx->sentinel && *source == U('*')))) { | |
687 --source; | |
688 break; | |
689 } | |
690 ++source; | |
691 res = 0; | |
692 while (source < ctx->sentinel) { | |
693 c = *source++; | |
694 if (c != U('*')) | |
695 continue; | |
696 if (!(source < ctx->sentinel)) | |
697 ABORT; | |
698 if (*source != U('/')) | |
699 continue; | |
700 | |
701 /* Comment complete */ | |
702 ++source; | |
703 res = 1; | |
704 | |
705 if (ctx->keep_bang_comments && comment[2] == U('!')) { | |
706 ctx->in_macie5 = (source[-3] == U('\\')); | |
707 if (!copy(comment, source, &target, ctx)) | |
708 ABORT; | |
709 } | |
710 else if (source[-3] == U('\\')) { | |
711 if (!ctx->in_macie5) { | |
712 if (!COPY_PAT(macie5_init, &target, ctx)) | |
713 ABORT; | |
714 } | |
715 ctx->in_macie5 = 1; | |
716 } | |
717 else if (ctx->in_macie5) { | |
718 if (!COPY_PAT(macie5_exit, &target, ctx)) | |
719 ABORT; | |
720 ctx->in_macie5 = 0; | |
721 } | |
722 /* else don't copy anything */ | |
723 break; | |
724 } | |
725 if (!res) | |
726 ABORT; | |
727 continue; | |
728 | |
729 /* space */ | |
730 case U(' '): case U('\t'): case U('\r'): case U('\n'): case U('\f'): | |
731 ++source; | |
732 continue; | |
733 } | |
734 | |
735 break; | |
736 } | |
737 | |
738 *source_ = source; | |
739 *target_ = target; | |
740 } | |
741 | |
742 | |
743 /* | |
744 * Copy space if comment | |
745 */ | |
746 static int | |
747 copy_space_comment(const rchar **source_, rchar **target_, | |
748 rcssmin_ctx_t *ctx, need_space_flag need_space) | |
749 { | |
750 const rchar *source = *source_; | |
751 rchar *target = *target_; | |
752 | |
753 if (source < ctx->sentinel && *source == U('*')) { | |
754 copy_space(source_, target_, ctx, need_space); | |
755 if (*source_ > source) | |
756 return 0; | |
757 } | |
758 if (!(target < ctx->tsentinel)) | |
759 RABORT(-1); | |
760 | |
761 *target++ = source[-1]; | |
762 | |
763 /* *source_ = source; <-- unchanged */ | |
764 *target_ = target; | |
765 | |
766 return -1; | |
767 } | |
768 | |
769 | |
770 /* | |
771 * Copy space if exists | |
772 */ | |
773 static int | |
774 copy_space_optional(const rchar **source_, rchar **target_, | |
775 rcssmin_ctx_t *ctx) | |
776 { | |
777 const rchar *source = *source_; | |
778 | |
779 if (!(source < ctx->sentinel)) | |
780 return -1; | |
781 | |
782 if (*source == U('/')) { | |
783 *source_ = source + 1; | |
784 return copy_space_comment(source_, target_, ctx, NEED_SPACE_NEVER); | |
785 } | |
786 else if (RCSSMIN_IS_SPACE(*source)) { | |
787 *source_ = source + 1; | |
788 copy_space(source_, target_, ctx, NEED_SPACE_NEVER); | |
789 return 0; | |
790 } | |
791 | |
792 return -1; | |
793 } | |
794 | |
795 | |
796 /* | |
797 * Copy :first-line|letter | |
798 */ | |
799 static void | |
800 copy_first(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
801 { | |
802 const rchar *source = *source_, *next, *source_fork; | |
803 rchar *target = *target_, *target_fork; | |
804 | |
805 *target++ = U(':'); | |
806 *target_ = target; | |
807 | |
808 if (!IMATCH(first, &source, &target, ctx) | |
809 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
810 ABORT; | |
811 | |
812 source_fork = source; | |
813 target_fork = target; | |
814 | |
815 if (!IMATCH(line, &source, &target, ctx)) { | |
816 source = source_fork; | |
817 target = target_fork; | |
818 | |
819 if (!IMATCH(letter, &source, &target, ctx) | |
820 || !(source < ctx->sentinel && target < ctx->tsentinel)) | |
821 ABORT; | |
822 } | |
823 | |
824 next = skip_space(source, ctx); | |
825 if (!(next < ctx->sentinel && target < ctx->tsentinel | |
826 && (*next == U('{') || *next == U(',')))) | |
827 ABORT; | |
828 | |
829 *target++ = U(' '); | |
830 *target_ = target; | |
831 *source_ = source; | |
832 (void)copy_space_optional(source_, target_, ctx); | |
833 } | |
834 | |
835 | |
836 /* | |
837 * Copy IE7 hack | |
838 */ | |
839 static void | |
840 copy_ie7hack(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
841 { | |
842 const rchar *source = *source_; | |
843 rchar *target = *target_; | |
844 | |
845 *target++ = U('>'); | |
846 *target_ = target; | |
847 | |
848 if (ctx->in_rule || ctx->at_group) | |
849 return; /* abort */ | |
850 | |
851 if (!MATCH(ie7, &source, &target, ctx)) | |
852 ABORT; | |
853 | |
854 ctx->in_macie5 = 0; | |
855 | |
856 *target_ = target; | |
857 *source_ = source; | |
858 | |
859 (void)copy_space_optional(source_, target_, ctx); | |
860 } | |
861 | |
862 | |
863 /* | |
864 * Copy semicolon; miss out duplicates or even this one (before '}') | |
865 */ | |
866 static void | |
867 copy_semicolon(const rchar **source_, rchar **target_, rcssmin_ctx_t *ctx) | |
868 { | |
869 const rchar *source = *source_, *begin, *end; | |
870 rchar *target = *target_; | |
871 | |
872 begin = source; | |
873 while (source < ctx->sentinel) { | |
874 end = skip_space(source, ctx); | |
875 if (!(end < ctx->sentinel)) { | |
876 if (!(target < ctx->tsentinel)) | |
877 ABORT; | |
878 *target++ = U(';'); | |
879 break; | |
880 } | |
881 switch (*end) { | |
882 case U(';'): | |
883 source = end + 1; | |
884 continue; | |
885 | |
886 case U('}'): | |
887 if (ctx->in_rule) | |
888 break; | |
889 | |
890 /* fall through */ | |
891 default: | |
892 if (!(target < ctx->tsentinel)) | |
893 ABORT; | |
894 *target++ = U(';'); | |
895 break; | |
896 } | |
897 | |
898 break; | |
899 } | |
900 | |
901 source = begin; | |
902 *target_ = target; | |
903 while (source < ctx->sentinel) { | |
904 if (*source == U(';')) { | |
905 ++source; | |
906 continue; | |
907 } | |
908 | |
909 if (copy_space_optional(&source, target_, ctx) == 0) | |
910 continue; | |
911 | |
912 break; | |
913 } | |
914 | |
915 *source_ = source; | |
916 } | |
917 | |
918 | |
919 /* | |
920 * Main function | |
921 * | |
922 * The return value determines the result length (kept in the target buffer). | |
923 * However, if the target buffer is too small, the return value is greater | |
924 * than tlength. The difference to tlength is the number of unconsumed source | |
925 * characters at the time the buffer was full. In this case you should resize | |
926 * the target buffer to the return value and call rcssmin again. Repeat as | |
927 * often as needed. | |
928 */ | |
929 static Py_ssize_t | |
930 rcssmin(const rchar *source, rchar *target, Py_ssize_t slength, | |
931 Py_ssize_t tlength, int keep_bang_comments) | |
932 { | |
933 rcssmin_ctx_t ctx_, *ctx = &ctx_; | |
934 const rchar *tstart = target; | |
935 rchar c; | |
936 | |
937 ctx->start = source; | |
938 ctx->sentinel = source + slength; | |
939 ctx->tsentinel = target + tlength; | |
940 ctx->at_group = 0; | |
941 ctx->in_macie5 = 0; | |
942 ctx->in_rule = 0; | |
943 ctx->keep_bang_comments = keep_bang_comments; | |
944 | |
945 while (source < ctx->sentinel && target < ctx->tsentinel) { | |
946 c = *source++; | |
947 if (RCSSMIN_IS_DULL(c)) { | |
948 *target++ = c; | |
949 continue; | |
950 } | |
951 else if (RCSSMIN_IS_SPACE(c)) { | |
952 copy_space(&source, &target, ctx, NEED_SPACE_MAYBE); | |
953 continue; | |
954 } | |
955 | |
956 switch (c) { | |
957 | |
958 /* Escape */ | |
959 case U('\\'): | |
960 copy_escape(&source, &target, ctx); | |
961 continue; | |
962 | |
963 /* String */ | |
964 case U('"'): case U('\''): | |
965 copy_string(&source, &target, ctx); | |
966 continue; | |
967 | |
968 /* URL */ | |
969 case U('u'): | |
970 copy_url(&source, &target, ctx); | |
971 continue; | |
972 | |
973 /* IE7hack */ | |
974 case U('>'): | |
975 copy_ie7hack(&source, &target, ctx); | |
976 continue; | |
977 | |
978 /* @-group */ | |
979 case U('@'): | |
980 copy_at_group(&source, &target, ctx); | |
981 continue; | |
982 | |
983 /* ; */ | |
984 case U(';'): | |
985 copy_semicolon(&source, &target, ctx); | |
986 continue; | |
987 | |
988 /* :first-line|letter followed by [{,] */ | |
989 /* (apparently needed for IE6) */ | |
990 case U(':'): | |
991 copy_first(&source, &target, ctx); | |
992 continue; | |
993 | |
994 /* { */ | |
995 case U('{'): | |
996 if (ctx->at_group) | |
997 --ctx->at_group; | |
998 else | |
999 ++ctx->in_rule; | |
1000 *target++ = c; | |
1001 continue; | |
1002 | |
1003 /* } */ | |
1004 case U('}'): | |
1005 if (ctx->in_rule) | |
1006 --ctx->in_rule; | |
1007 *target++ = c; | |
1008 continue; | |
1009 | |
1010 /* space starting with comment */ | |
1011 case U('/'): | |
1012 (void)copy_space_comment(&source, &target, ctx, NEED_SPACE_MAYBE); | |
1013 continue; | |
1014 | |
1015 /* Fallback: copy character. Better safe than sorry. Should not be | |
1016 * reached, though */ | |
1017 default: | |
1018 *target++ = c; | |
1019 continue; | |
1020 } | |
1021 } | |
1022 | |
1023 return | |
1024 (Py_ssize_t)(target - tstart) + (Py_ssize_t)(ctx->sentinel - source); | |
1025 } | |
1026 | |
1027 | |
1028 PyDoc_STRVAR(rcssmin_cssmin__doc__, | |
1029 "cssmin(style, keep_bang_comments=False)\n\ | |
1030 \n\ | |
1031 Minify CSS.\n\ | |
1032 \n\ | |
1033 :Note: This is a hand crafted C implementation built on the regex\n\ | |
1034 semantics.\n\ | |
1035 \n\ | |
1036 :Parameters:\n\ | |
1037 `style` : ``str``\n\ | |
1038 CSS to minify\n\ | |
1039 \n\ | |
1040 :Return: Minified style\n\ | |
1041 :Rtype: ``str``"); | |
1042 | |
1043 static PyObject * | |
1044 rcssmin_cssmin(PyObject *self, PyObject *args, PyObject *kwds) | |
1045 { | |
1046 PyObject *style, *keep_bang_comments_ = NULL, *result; | |
1047 static char *kwlist[] = {"style", "keep_bang_comments", NULL}; | |
1048 Py_ssize_t rlength, slength, length; | |
1049 int keep_bang_comments; | |
1050 #ifdef EXT2 | |
1051 int uni; | |
1052 #define UOBJ "O" | |
1053 #endif | |
1054 #ifdef EXT3 | |
1055 #define UOBJ "U" | |
1056 #endif | |
1057 | |
1058 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, | |
1059 &style, &keep_bang_comments_)) | |
1060 return NULL; | |
1061 | |
1062 if (!keep_bang_comments_) | |
1063 keep_bang_comments = 0; | |
1064 else { | |
1065 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); | |
1066 if (keep_bang_comments == -1) | |
1067 return NULL; | |
1068 } | |
1069 | |
1070 #ifdef EXT2 | |
1071 if (PyUnicode_Check(style)) { | |
1072 if (!(style = PyUnicode_AsUTF8String(style))) | |
1073 return NULL; | |
1074 uni = 1; | |
1075 } | |
1076 else { | |
1077 if (!(style = PyObject_Str(style))) | |
1078 return NULL; | |
1079 uni = 0; | |
1080 } | |
1081 #endif | |
1082 | |
1083 #ifdef EXT3 | |
1084 Py_INCREF(style); | |
1085 #define PyString_GET_SIZE PyUnicode_GET_SIZE | |
1086 #define PyString_AS_STRING PyUnicode_AS_UNICODE | |
1087 #define _PyString_Resize PyUnicode_Resize | |
1088 #define PyString_FromStringAndSize PyUnicode_FromUnicode | |
1089 #endif | |
1090 | |
1091 rlength = slength = PyString_GET_SIZE(style); | |
1092 | |
1093 again: | |
1094 if (!(result = PyString_FromStringAndSize(NULL, rlength))) { | |
1095 Py_DECREF(style); | |
1096 return NULL; | |
1097 } | |
1098 Py_BEGIN_ALLOW_THREADS | |
1099 length = rcssmin((rchar *)PyString_AS_STRING(style), | |
1100 (rchar *)PyString_AS_STRING(result), | |
1101 slength, rlength, keep_bang_comments); | |
1102 Py_END_ALLOW_THREADS | |
1103 | |
1104 if (length > rlength) { | |
1105 Py_DECREF(result); | |
1106 rlength = length; | |
1107 goto again; | |
1108 } | |
1109 | |
1110 Py_DECREF(style); | |
1111 if (length < 0) { | |
1112 Py_DECREF(result); | |
1113 return NULL; | |
1114 } | |
1115 if (length != rlength && _PyString_Resize(&result, length) == -1) | |
1116 return NULL; | |
1117 | |
1118 #ifdef EXT2 | |
1119 if (uni) { | |
1120 style = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), | |
1121 PyString_GET_SIZE(result), "strict"); | |
1122 Py_DECREF(result); | |
1123 if (!style) | |
1124 return NULL; | |
1125 result = style; | |
1126 } | |
1127 #endif | |
1128 return result; | |
1129 } | |
1130 | |
1131 /* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ | |
1132 | |
1133 EXT_METHODS = { | |
1134 {"cssmin", | |
1135 (PyCFunction)rcssmin_cssmin, METH_VARARGS | METH_KEYWORDS, | |
1136 rcssmin_cssmin__doc__}, | |
1137 | |
1138 {NULL} /* Sentinel */ | |
1139 }; | |
1140 | |
1141 PyDoc_STRVAR(EXT_DOCS_VAR, | |
1142 "C implementation of rcssmin\n\ | |
1143 ===========================\n\ | |
1144 \n\ | |
1145 C implementation of rcssmin."); | |
1146 | |
1147 | |
1148 EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); | |
1149 | |
1150 EXT_INIT_FUNC { | |
1151 PyObject *m; | |
1152 | |
1153 /* Create the module and populate stuff */ | |
1154 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) | |
1155 EXT_INIT_ERROR(NULL); | |
1156 | |
1157 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); | |
1158 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); | |
1159 | |
1160 EXT_INIT_RETURN(m); | |
1161 } | |
1162 | |
1163 /* ------------------------- END MODULE DEFINITION ------------------------- */ | |
OLD | NEW |