OLD | NEW |
1 // Copyright 2010 Google Inc. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // This code is licensed under the same terms as WebM: | 3 // This code is licensed under the same terms as WebM: |
4 // Software License Agreement: http://www.webmproject.org/license/software/ | 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ | 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
6 // ----------------------------------------------------------------------------- | 6 // ----------------------------------------------------------------------------- |
7 // | 7 // |
8 // Speed-critical decoding functions. | 8 // Speed-critical decoding functions. |
9 // | 9 // |
10 // Author: Skal (pascal.massimino@gmail.com) | 10 // Author: Skal (pascal.massimino@gmail.com) |
11 | 11 |
(...skipping 30 matching lines...) Expand all Loading... |
42 for (i = -112; i <= 112; ++i) { | 42 for (i = -112; i <= 112; ++i) { |
43 sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; | 43 sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; |
44 } | 44 } |
45 for (i = -255; i <= 255 + 255; ++i) { | 45 for (i = -255; i <= 255 + 255; ++i) { |
46 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; | 46 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; |
47 } | 47 } |
48 tables_ok = 1; | 48 tables_ok = 1; |
49 } | 49 } |
50 } | 50 } |
51 | 51 |
52 static inline uint8_t clip_8b(int v) { | 52 static WEBP_INLINE uint8_t clip_8b(int v) { |
53 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; | 53 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
54 } | 54 } |
55 | 55 |
56 //------------------------------------------------------------------------------ | 56 //------------------------------------------------------------------------------ |
57 // Transforms (Paragraph 14.4) | 57 // Transforms (Paragraph 14.4) |
58 | 58 |
59 #define STORE(x, y, v) \ | 59 #define STORE(x, y, v) \ |
60 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) | 60 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) |
61 | 61 |
62 static const int kC1 = 20091 + (1 << 16); | 62 static const int kC1 = 20091 + (1 << 16); |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
164 } | 164 } |
165 } | 165 } |
166 | 166 |
167 void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT; | 167 void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT; |
168 | 168 |
169 //------------------------------------------------------------------------------ | 169 //------------------------------------------------------------------------------ |
170 // Intra predictions | 170 // Intra predictions |
171 | 171 |
172 #define DST(x, y) dst[(x) + (y) * BPS] | 172 #define DST(x, y) dst[(x) + (y) * BPS] |
173 | 173 |
174 static inline void TrueMotion(uint8_t *dst, int size) { | 174 static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) { |
175 const uint8_t* top = dst - BPS; | 175 const uint8_t* top = dst - BPS; |
176 const uint8_t* const clip0 = clip1 + 255 - top[-1]; | 176 const uint8_t* const clip0 = clip1 + 255 - top[-1]; |
177 int y; | 177 int y; |
178 for (y = 0; y < size; ++y) { | 178 for (y = 0; y < size; ++y) { |
179 const uint8_t* const clip = clip0 + dst[-1]; | 179 const uint8_t* const clip = clip0 + dst[-1]; |
180 int x; | 180 int x; |
181 for (x = 0; x < size; ++x) { | 181 for (x = 0; x < size; ++x) { |
182 dst[x] = clip[top[x]]; | 182 dst[x] = clip[top[x]]; |
183 } | 183 } |
184 dst += BPS; | 184 dst += BPS; |
(...skipping 14 matching lines...) Expand all Loading... |
199 } | 199 } |
200 | 200 |
201 static void HE16(uint8_t *dst) { // horizontal | 201 static void HE16(uint8_t *dst) { // horizontal |
202 int j; | 202 int j; |
203 for (j = 16; j > 0; --j) { | 203 for (j = 16; j > 0; --j) { |
204 memset(dst, dst[-1], 16); | 204 memset(dst, dst[-1], 16); |
205 dst += BPS; | 205 dst += BPS; |
206 } | 206 } |
207 } | 207 } |
208 | 208 |
209 static inline void Put16(int v, uint8_t* dst) { | 209 static WEBP_INLINE void Put16(int v, uint8_t* dst) { |
210 int j; | 210 int j; |
211 for (j = 0; j < 16; ++j) { | 211 for (j = 0; j < 16; ++j) { |
212 memset(dst + j * BPS, v, 16); | 212 memset(dst + j * BPS, v, 16); |
213 } | 213 } |
214 } | 214 } |
215 | 215 |
216 static void DC16(uint8_t *dst) { // DC | 216 static void DC16(uint8_t *dst) { // DC |
217 int DC = 16; | 217 int DC = 16; |
218 int j; | 218 int j; |
219 for (j = 0; j < 16; ++j) { | 219 for (j = 0; j < 16; ++j) { |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
419 | 419 |
420 static void HE8uv(uint8_t *dst) { // horizontal | 420 static void HE8uv(uint8_t *dst) { // horizontal |
421 int j; | 421 int j; |
422 for (j = 0; j < 8; ++j) { | 422 for (j = 0; j < 8; ++j) { |
423 memset(dst, dst[-1], 8); | 423 memset(dst, dst[-1], 8); |
424 dst += BPS; | 424 dst += BPS; |
425 } | 425 } |
426 } | 426 } |
427 | 427 |
428 // helper for chroma-DC predictions | 428 // helper for chroma-DC predictions |
429 static inline void Put8x8uv(uint64_t v, uint8_t* dst) { | 429 static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) { |
430 int j; | 430 int j; |
431 for (j = 0; j < 8; ++j) { | 431 for (j = 0; j < 8; ++j) { |
432 *(uint64_t*)(dst + j * BPS) = v; | 432 *(uint64_t*)(dst + j * BPS) = v; |
433 } | 433 } |
434 } | 434 } |
435 | 435 |
436 static void DC8uv(uint8_t *dst) { // DC | 436 static void DC8uv(uint8_t *dst) { // DC |
437 int dc0 = 8; | 437 int dc0 = 8; |
438 int i; | 438 int i; |
439 for (i = 0; i < 8; ++i) { | 439 for (i = 0; i < 8; ++i) { |
(...skipping 20 matching lines...) Expand all Loading... |
460 Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); | 460 Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); |
461 } | 461 } |
462 | 462 |
463 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing | 463 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing |
464 Put8x8uv(0x8080808080808080ULL, dst); | 464 Put8x8uv(0x8080808080808080ULL, dst); |
465 } | 465 } |
466 | 466 |
467 //------------------------------------------------------------------------------ | 467 //------------------------------------------------------------------------------ |
468 // default C implementations | 468 // default C implementations |
469 | 469 |
470 VP8PredFunc VP8PredLuma4[/* NUM_BMODES */] = { | 470 const VP8PredFunc VP8PredLuma4[NUM_BMODES] = { |
471 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 | 471 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 |
472 }; | 472 }; |
473 | 473 |
474 VP8PredFunc VP8PredLuma16[/*NUM_B_DC_MODES */] = { | 474 const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = { |
475 DC16, TM16, VE16, HE16, | 475 DC16, TM16, VE16, HE16, |
476 DC16NoTop, DC16NoLeft, DC16NoTopLeft | 476 DC16NoTop, DC16NoLeft, DC16NoTopLeft |
477 }; | 477 }; |
478 | 478 |
479 VP8PredFunc VP8PredChroma8[/*NUM_B_DC_MODES */] = { | 479 const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = { |
480 DC8uv, TM8uv, VE8uv, HE8uv, | 480 DC8uv, TM8uv, VE8uv, HE8uv, |
481 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft | 481 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft |
482 }; | 482 }; |
483 | 483 |
484 //------------------------------------------------------------------------------ | 484 //------------------------------------------------------------------------------ |
485 // Edge filtering functions | 485 // Edge filtering functions |
486 | 486 |
487 // 4 pixels in, 2 pixels out | 487 // 4 pixels in, 2 pixels out |
488 static inline void do_filter2(uint8_t* p, int step) { | 488 static WEBP_INLINE void do_filter2(uint8_t* p, int step) { |
489 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 489 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
490 const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; | 490 const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; |
491 const int a1 = sclip2[112 + ((a + 4) >> 3)]; | 491 const int a1 = sclip2[112 + ((a + 4) >> 3)]; |
492 const int a2 = sclip2[112 + ((a + 3) >> 3)]; | 492 const int a2 = sclip2[112 + ((a + 3) >> 3)]; |
493 p[-step] = clip1[255 + p0 + a2]; | 493 p[-step] = clip1[255 + p0 + a2]; |
494 p[ 0] = clip1[255 + q0 - a1]; | 494 p[ 0] = clip1[255 + q0 - a1]; |
495 } | 495 } |
496 | 496 |
497 // 4 pixels in, 4 pixels out | 497 // 4 pixels in, 4 pixels out |
498 static inline void do_filter4(uint8_t* p, int step) { | 498 static WEBP_INLINE void do_filter4(uint8_t* p, int step) { |
499 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 499 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
500 const int a = 3 * (q0 - p0); | 500 const int a = 3 * (q0 - p0); |
501 const int a1 = sclip2[112 + ((a + 4) >> 3)]; | 501 const int a1 = sclip2[112 + ((a + 4) >> 3)]; |
502 const int a2 = sclip2[112 + ((a + 3) >> 3)]; | 502 const int a2 = sclip2[112 + ((a + 3) >> 3)]; |
503 const int a3 = (a1 + 1) >> 1; | 503 const int a3 = (a1 + 1) >> 1; |
504 p[-2*step] = clip1[255 + p1 + a3]; | 504 p[-2*step] = clip1[255 + p1 + a3]; |
505 p[- step] = clip1[255 + p0 + a2]; | 505 p[- step] = clip1[255 + p0 + a2]; |
506 p[ 0] = clip1[255 + q0 - a1]; | 506 p[ 0] = clip1[255 + q0 - a1]; |
507 p[ step] = clip1[255 + q1 - a3]; | 507 p[ step] = clip1[255 + q1 - a3]; |
508 } | 508 } |
509 | 509 |
510 // 6 pixels in, 6 pixels out | 510 // 6 pixels in, 6 pixels out |
511 static inline void do_filter6(uint8_t* p, int step) { | 511 static WEBP_INLINE void do_filter6(uint8_t* p, int step) { |
512 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; | 512 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; |
513 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; | 513 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; |
514 const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]]; | 514 const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]]; |
515 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 | 515 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 |
516 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 | 516 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 |
517 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 | 517 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 |
518 p[-3*step] = clip1[255 + p2 + a3]; | 518 p[-3*step] = clip1[255 + p2 + a3]; |
519 p[-2*step] = clip1[255 + p1 + a2]; | 519 p[-2*step] = clip1[255 + p1 + a2]; |
520 p[- step] = clip1[255 + p0 + a1]; | 520 p[- step] = clip1[255 + p0 + a1]; |
521 p[ 0] = clip1[255 + q0 - a1]; | 521 p[ 0] = clip1[255 + q0 - a1]; |
522 p[ step] = clip1[255 + q1 - a2]; | 522 p[ step] = clip1[255 + q1 - a2]; |
523 p[ 2*step] = clip1[255 + q2 - a3]; | 523 p[ 2*step] = clip1[255 + q2 - a3]; |
524 } | 524 } |
525 | 525 |
526 static inline int hev(const uint8_t* p, int step, int thresh) { | 526 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { |
527 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 527 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
528 return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); | 528 return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); |
529 } | 529 } |
530 | 530 |
531 static inline int needs_filter(const uint8_t* p, int step, int thresh) { | 531 static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) { |
532 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 532 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
533 return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; | 533 return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; |
534 } | 534 } |
535 | 535 |
536 static inline int needs_filter2(const uint8_t* p, int step, int t, int it) { | 536 static WEBP_INLINE int needs_filter2(const uint8_t* p, |
| 537 int step, int t, int it) { |
537 const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; | 538 const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; |
538 const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; | 539 const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; |
539 if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) | 540 if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) |
540 return 0; | 541 return 0; |
541 return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && | 542 return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && |
542 abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && | 543 abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && |
543 abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; | 544 abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; |
544 } | 545 } |
545 | 546 |
546 //------------------------------------------------------------------------------ | 547 //------------------------------------------------------------------------------ |
(...skipping 29 matching lines...) Expand all Loading... |
576 int k; | 577 int k; |
577 for (k = 3; k > 0; --k) { | 578 for (k = 3; k > 0; --k) { |
578 p += 4; | 579 p += 4; |
579 SimpleHFilter16(p, stride, thresh); | 580 SimpleHFilter16(p, stride, thresh); |
580 } | 581 } |
581 } | 582 } |
582 | 583 |
583 //------------------------------------------------------------------------------ | 584 //------------------------------------------------------------------------------ |
584 // Complex In-loop filtering (Paragraph 15.3) | 585 // Complex In-loop filtering (Paragraph 15.3) |
585 | 586 |
586 static inline void FilterLoop26(uint8_t* p, int hstride, int vstride, int size, | 587 static WEBP_INLINE void FilterLoop26(uint8_t* p, |
587 int thresh, int ithresh, int hev_thresh) { | 588 int hstride, int vstride, int size, |
| 589 int thresh, int ithresh, int hev_thresh) { |
588 while (size-- > 0) { | 590 while (size-- > 0) { |
589 if (needs_filter2(p, hstride, thresh, ithresh)) { | 591 if (needs_filter2(p, hstride, thresh, ithresh)) { |
590 if (hev(p, hstride, hev_thresh)) { | 592 if (hev(p, hstride, hev_thresh)) { |
591 do_filter2(p, hstride); | 593 do_filter2(p, hstride); |
592 } else { | 594 } else { |
593 do_filter6(p, hstride); | 595 do_filter6(p, hstride); |
594 } | 596 } |
595 } | 597 } |
596 p += vstride; | 598 p += vstride; |
597 } | 599 } |
598 } | 600 } |
599 | 601 |
600 static inline void FilterLoop24(uint8_t* p, int hstride, int vstride, int size, | 602 static WEBP_INLINE void FilterLoop24(uint8_t* p, |
601 int thresh, int ithresh, int hev_thresh) { | 603 int hstride, int vstride, int size, |
| 604 int thresh, int ithresh, int hev_thresh) { |
602 while (size-- > 0) { | 605 while (size-- > 0) { |
603 if (needs_filter2(p, hstride, thresh, ithresh)) { | 606 if (needs_filter2(p, hstride, thresh, ithresh)) { |
604 if (hev(p, hstride, hev_thresh)) { | 607 if (hev(p, hstride, hev_thresh)) { |
605 do_filter2(p, hstride); | 608 do_filter2(p, hstride); |
606 } else { | 609 } else { |
607 do_filter4(p, hstride); | 610 do_filter4(p, hstride); |
608 } | 611 } |
609 } | 612 } |
610 p += vstride; | 613 p += vstride; |
611 } | 614 } |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
705 VP8HFilter16i = HFilter16i; | 708 VP8HFilter16i = HFilter16i; |
706 VP8VFilter8i = VFilter8i; | 709 VP8VFilter8i = VFilter8i; |
707 VP8HFilter8i = HFilter8i; | 710 VP8HFilter8i = HFilter8i; |
708 VP8SimpleVFilter16 = SimpleVFilter16; | 711 VP8SimpleVFilter16 = SimpleVFilter16; |
709 VP8SimpleHFilter16 = SimpleHFilter16; | 712 VP8SimpleHFilter16 = SimpleHFilter16; |
710 VP8SimpleVFilter16i = SimpleVFilter16i; | 713 VP8SimpleVFilter16i = SimpleVFilter16i; |
711 VP8SimpleHFilter16i = SimpleHFilter16i; | 714 VP8SimpleHFilter16i = SimpleHFilter16i; |
712 | 715 |
713 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 716 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
714 if (VP8GetCPUInfo) { | 717 if (VP8GetCPUInfo) { |
715 #if defined(__SSE2__) || defined(_MSC_VER) | 718 #if defined(WEBP_USE_SSE2) |
716 if (VP8GetCPUInfo(kSSE2)) { | 719 if (VP8GetCPUInfo(kSSE2)) { |
717 VP8DspInitSSE2(); | 720 VP8DspInitSSE2(); |
718 } | 721 } |
719 #elif defined(__GNUC__) && defined(__ARM_NEON__) | 722 #elif defined(WEBP_USE_NEON) |
720 if (VP8GetCPUInfo(kNEON)) { | 723 if (VP8GetCPUInfo(kNEON)) { |
721 VP8DspInitNEON(); | 724 VP8DspInitNEON(); |
722 } | 725 } |
723 #endif | 726 #endif |
724 } | 727 } |
725 } | 728 } |
726 | 729 |
727 #if defined(__cplusplus) || defined(c_plusplus) | 730 #if defined(__cplusplus) || defined(c_plusplus) |
728 } // extern "C" | 731 } // extern "C" |
729 #endif | 732 #endif |
OLD | NEW |