| OLD | NEW |
| 1 // Copyright 2010 Google Inc. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // This code is licensed under the same terms as WebM: | 3 // This code is licensed under the same terms as WebM: |
| 4 // Software License Agreement: http://www.webmproject.org/license/software/ | 4 // Software License Agreement: http://www.webmproject.org/license/software/ |
| 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ | 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ |
| 6 // ----------------------------------------------------------------------------- | 6 // ----------------------------------------------------------------------------- |
| 7 // | 7 // |
| 8 // Speed-critical decoding functions. | 8 // Speed-critical decoding functions. |
| 9 // | 9 // |
| 10 // Author: Skal (pascal.massimino@gmail.com) | 10 // Author: Skal (pascal.massimino@gmail.com) |
| 11 | 11 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 42 for (i = -112; i <= 112; ++i) { | 42 for (i = -112; i <= 112; ++i) { |
| 43 sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; | 43 sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i; |
| 44 } | 44 } |
| 45 for (i = -255; i <= 255 + 255; ++i) { | 45 for (i = -255; i <= 255 + 255; ++i) { |
| 46 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; | 46 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; |
| 47 } | 47 } |
| 48 tables_ok = 1; | 48 tables_ok = 1; |
| 49 } | 49 } |
| 50 } | 50 } |
| 51 | 51 |
| 52 static inline uint8_t clip_8b(int v) { | 52 static WEBP_INLINE uint8_t clip_8b(int v) { |
| 53 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; | 53 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
| 54 } | 54 } |
| 55 | 55 |
| 56 //------------------------------------------------------------------------------ | 56 //------------------------------------------------------------------------------ |
| 57 // Transforms (Paragraph 14.4) | 57 // Transforms (Paragraph 14.4) |
| 58 | 58 |
| 59 #define STORE(x, y, v) \ | 59 #define STORE(x, y, v) \ |
| 60 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) | 60 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3)) |
| 61 | 61 |
| 62 static const int kC1 = 20091 + (1 << 16); | 62 static const int kC1 = 20091 + (1 << 16); |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 } | 164 } |
| 165 } | 165 } |
| 166 | 166 |
| 167 void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT; | 167 void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT; |
| 168 | 168 |
| 169 //------------------------------------------------------------------------------ | 169 //------------------------------------------------------------------------------ |
| 170 // Intra predictions | 170 // Intra predictions |
| 171 | 171 |
| 172 #define DST(x, y) dst[(x) + (y) * BPS] | 172 #define DST(x, y) dst[(x) + (y) * BPS] |
| 173 | 173 |
| 174 static inline void TrueMotion(uint8_t *dst, int size) { | 174 static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) { |
| 175 const uint8_t* top = dst - BPS; | 175 const uint8_t* top = dst - BPS; |
| 176 const uint8_t* const clip0 = clip1 + 255 - top[-1]; | 176 const uint8_t* const clip0 = clip1 + 255 - top[-1]; |
| 177 int y; | 177 int y; |
| 178 for (y = 0; y < size; ++y) { | 178 for (y = 0; y < size; ++y) { |
| 179 const uint8_t* const clip = clip0 + dst[-1]; | 179 const uint8_t* const clip = clip0 + dst[-1]; |
| 180 int x; | 180 int x; |
| 181 for (x = 0; x < size; ++x) { | 181 for (x = 0; x < size; ++x) { |
| 182 dst[x] = clip[top[x]]; | 182 dst[x] = clip[top[x]]; |
| 183 } | 183 } |
| 184 dst += BPS; | 184 dst += BPS; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 199 } | 199 } |
| 200 | 200 |
| 201 static void HE16(uint8_t *dst) { // horizontal | 201 static void HE16(uint8_t *dst) { // horizontal |
| 202 int j; | 202 int j; |
| 203 for (j = 16; j > 0; --j) { | 203 for (j = 16; j > 0; --j) { |
| 204 memset(dst, dst[-1], 16); | 204 memset(dst, dst[-1], 16); |
| 205 dst += BPS; | 205 dst += BPS; |
| 206 } | 206 } |
| 207 } | 207 } |
| 208 | 208 |
| 209 static inline void Put16(int v, uint8_t* dst) { | 209 static WEBP_INLINE void Put16(int v, uint8_t* dst) { |
| 210 int j; | 210 int j; |
| 211 for (j = 0; j < 16; ++j) { | 211 for (j = 0; j < 16; ++j) { |
| 212 memset(dst + j * BPS, v, 16); | 212 memset(dst + j * BPS, v, 16); |
| 213 } | 213 } |
| 214 } | 214 } |
| 215 | 215 |
| 216 static void DC16(uint8_t *dst) { // DC | 216 static void DC16(uint8_t *dst) { // DC |
| 217 int DC = 16; | 217 int DC = 16; |
| 218 int j; | 218 int j; |
| 219 for (j = 0; j < 16; ++j) { | 219 for (j = 0; j < 16; ++j) { |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 419 | 419 |
| 420 static void HE8uv(uint8_t *dst) { // horizontal | 420 static void HE8uv(uint8_t *dst) { // horizontal |
| 421 int j; | 421 int j; |
| 422 for (j = 0; j < 8; ++j) { | 422 for (j = 0; j < 8; ++j) { |
| 423 memset(dst, dst[-1], 8); | 423 memset(dst, dst[-1], 8); |
| 424 dst += BPS; | 424 dst += BPS; |
| 425 } | 425 } |
| 426 } | 426 } |
| 427 | 427 |
| 428 // helper for chroma-DC predictions | 428 // helper for chroma-DC predictions |
| 429 static inline void Put8x8uv(uint64_t v, uint8_t* dst) { | 429 static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) { |
| 430 int j; | 430 int j; |
| 431 for (j = 0; j < 8; ++j) { | 431 for (j = 0; j < 8; ++j) { |
| 432 *(uint64_t*)(dst + j * BPS) = v; | 432 *(uint64_t*)(dst + j * BPS) = v; |
| 433 } | 433 } |
| 434 } | 434 } |
| 435 | 435 |
| 436 static void DC8uv(uint8_t *dst) { // DC | 436 static void DC8uv(uint8_t *dst) { // DC |
| 437 int dc0 = 8; | 437 int dc0 = 8; |
| 438 int i; | 438 int i; |
| 439 for (i = 0; i < 8; ++i) { | 439 for (i = 0; i < 8; ++i) { |
| (...skipping 20 matching lines...) Expand all Loading... |
| 460 Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); | 460 Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst); |
| 461 } | 461 } |
| 462 | 462 |
| 463 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing | 463 static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing |
| 464 Put8x8uv(0x8080808080808080ULL, dst); | 464 Put8x8uv(0x8080808080808080ULL, dst); |
| 465 } | 465 } |
| 466 | 466 |
| 467 //------------------------------------------------------------------------------ | 467 //------------------------------------------------------------------------------ |
| 468 // default C implementations | 468 // default C implementations |
| 469 | 469 |
| 470 VP8PredFunc VP8PredLuma4[/* NUM_BMODES */] = { | 470 const VP8PredFunc VP8PredLuma4[NUM_BMODES] = { |
| 471 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 | 471 DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4 |
| 472 }; | 472 }; |
| 473 | 473 |
| 474 VP8PredFunc VP8PredLuma16[/*NUM_B_DC_MODES */] = { | 474 const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = { |
| 475 DC16, TM16, VE16, HE16, | 475 DC16, TM16, VE16, HE16, |
| 476 DC16NoTop, DC16NoLeft, DC16NoTopLeft | 476 DC16NoTop, DC16NoLeft, DC16NoTopLeft |
| 477 }; | 477 }; |
| 478 | 478 |
| 479 VP8PredFunc VP8PredChroma8[/*NUM_B_DC_MODES */] = { | 479 const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = { |
| 480 DC8uv, TM8uv, VE8uv, HE8uv, | 480 DC8uv, TM8uv, VE8uv, HE8uv, |
| 481 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft | 481 DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft |
| 482 }; | 482 }; |
| 483 | 483 |
| 484 //------------------------------------------------------------------------------ | 484 //------------------------------------------------------------------------------ |
| 485 // Edge filtering functions | 485 // Edge filtering functions |
| 486 | 486 |
| 487 // 4 pixels in, 2 pixels out | 487 // 4 pixels in, 2 pixels out |
| 488 static inline void do_filter2(uint8_t* p, int step) { | 488 static WEBP_INLINE void do_filter2(uint8_t* p, int step) { |
| 489 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 489 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
| 490 const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; | 490 const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1]; |
| 491 const int a1 = sclip2[112 + ((a + 4) >> 3)]; | 491 const int a1 = sclip2[112 + ((a + 4) >> 3)]; |
| 492 const int a2 = sclip2[112 + ((a + 3) >> 3)]; | 492 const int a2 = sclip2[112 + ((a + 3) >> 3)]; |
| 493 p[-step] = clip1[255 + p0 + a2]; | 493 p[-step] = clip1[255 + p0 + a2]; |
| 494 p[ 0] = clip1[255 + q0 - a1]; | 494 p[ 0] = clip1[255 + q0 - a1]; |
| 495 } | 495 } |
| 496 | 496 |
| 497 // 4 pixels in, 4 pixels out | 497 // 4 pixels in, 4 pixels out |
| 498 static inline void do_filter4(uint8_t* p, int step) { | 498 static WEBP_INLINE void do_filter4(uint8_t* p, int step) { |
| 499 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 499 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
| 500 const int a = 3 * (q0 - p0); | 500 const int a = 3 * (q0 - p0); |
| 501 const int a1 = sclip2[112 + ((a + 4) >> 3)]; | 501 const int a1 = sclip2[112 + ((a + 4) >> 3)]; |
| 502 const int a2 = sclip2[112 + ((a + 3) >> 3)]; | 502 const int a2 = sclip2[112 + ((a + 3) >> 3)]; |
| 503 const int a3 = (a1 + 1) >> 1; | 503 const int a3 = (a1 + 1) >> 1; |
| 504 p[-2*step] = clip1[255 + p1 + a3]; | 504 p[-2*step] = clip1[255 + p1 + a3]; |
| 505 p[- step] = clip1[255 + p0 + a2]; | 505 p[- step] = clip1[255 + p0 + a2]; |
| 506 p[ 0] = clip1[255 + q0 - a1]; | 506 p[ 0] = clip1[255 + q0 - a1]; |
| 507 p[ step] = clip1[255 + q1 - a3]; | 507 p[ step] = clip1[255 + q1 - a3]; |
| 508 } | 508 } |
| 509 | 509 |
| 510 // 6 pixels in, 6 pixels out | 510 // 6 pixels in, 6 pixels out |
| 511 static inline void do_filter6(uint8_t* p, int step) { | 511 static WEBP_INLINE void do_filter6(uint8_t* p, int step) { |
| 512 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; | 512 const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; |
| 513 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; | 513 const int q0 = p[0], q1 = p[step], q2 = p[2*step]; |
| 514 const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]]; | 514 const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]]; |
| 515 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 | 515 const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 |
| 516 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 | 516 const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 |
| 517 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 | 517 const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 |
| 518 p[-3*step] = clip1[255 + p2 + a3]; | 518 p[-3*step] = clip1[255 + p2 + a3]; |
| 519 p[-2*step] = clip1[255 + p1 + a2]; | 519 p[-2*step] = clip1[255 + p1 + a2]; |
| 520 p[- step] = clip1[255 + p0 + a1]; | 520 p[- step] = clip1[255 + p0 + a1]; |
| 521 p[ 0] = clip1[255 + q0 - a1]; | 521 p[ 0] = clip1[255 + q0 - a1]; |
| 522 p[ step] = clip1[255 + q1 - a2]; | 522 p[ step] = clip1[255 + q1 - a2]; |
| 523 p[ 2*step] = clip1[255 + q2 - a3]; | 523 p[ 2*step] = clip1[255 + q2 - a3]; |
| 524 } | 524 } |
| 525 | 525 |
| 526 static inline int hev(const uint8_t* p, int step, int thresh) { | 526 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) { |
| 527 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 527 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
| 528 return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); | 528 return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh); |
| 529 } | 529 } |
| 530 | 530 |
| 531 static inline int needs_filter(const uint8_t* p, int step, int thresh) { | 531 static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) { |
| 532 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; | 532 const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
| 533 return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; | 533 return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh; |
| 534 } | 534 } |
| 535 | 535 |
| 536 static inline int needs_filter2(const uint8_t* p, int step, int t, int it) { | 536 static WEBP_INLINE int needs_filter2(const uint8_t* p, |
| 537 int step, int t, int it) { |
| 537 const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; | 538 const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; |
| 538 const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; | 539 const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step]; |
| 539 if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) | 540 if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t) |
| 540 return 0; | 541 return 0; |
| 541 return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && | 542 return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it && |
| 542 abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && | 543 abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it && |
| 543 abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; | 544 abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it; |
| 544 } | 545 } |
| 545 | 546 |
| 546 //------------------------------------------------------------------------------ | 547 //------------------------------------------------------------------------------ |
| (...skipping 29 matching lines...) Expand all Loading... |
| 576 int k; | 577 int k; |
| 577 for (k = 3; k > 0; --k) { | 578 for (k = 3; k > 0; --k) { |
| 578 p += 4; | 579 p += 4; |
| 579 SimpleHFilter16(p, stride, thresh); | 580 SimpleHFilter16(p, stride, thresh); |
| 580 } | 581 } |
| 581 } | 582 } |
| 582 | 583 |
| 583 //------------------------------------------------------------------------------ | 584 //------------------------------------------------------------------------------ |
| 584 // Complex In-loop filtering (Paragraph 15.3) | 585 // Complex In-loop filtering (Paragraph 15.3) |
| 585 | 586 |
| 586 static inline void FilterLoop26(uint8_t* p, int hstride, int vstride, int size, | 587 static WEBP_INLINE void FilterLoop26(uint8_t* p, |
| 587 int thresh, int ithresh, int hev_thresh) { | 588 int hstride, int vstride, int size, |
| 589 int thresh, int ithresh, int hev_thresh) { |
| 588 while (size-- > 0) { | 590 while (size-- > 0) { |
| 589 if (needs_filter2(p, hstride, thresh, ithresh)) { | 591 if (needs_filter2(p, hstride, thresh, ithresh)) { |
| 590 if (hev(p, hstride, hev_thresh)) { | 592 if (hev(p, hstride, hev_thresh)) { |
| 591 do_filter2(p, hstride); | 593 do_filter2(p, hstride); |
| 592 } else { | 594 } else { |
| 593 do_filter6(p, hstride); | 595 do_filter6(p, hstride); |
| 594 } | 596 } |
| 595 } | 597 } |
| 596 p += vstride; | 598 p += vstride; |
| 597 } | 599 } |
| 598 } | 600 } |
| 599 | 601 |
| 600 static inline void FilterLoop24(uint8_t* p, int hstride, int vstride, int size, | 602 static WEBP_INLINE void FilterLoop24(uint8_t* p, |
| 601 int thresh, int ithresh, int hev_thresh) { | 603 int hstride, int vstride, int size, |
| 604 int thresh, int ithresh, int hev_thresh) { |
| 602 while (size-- > 0) { | 605 while (size-- > 0) { |
| 603 if (needs_filter2(p, hstride, thresh, ithresh)) { | 606 if (needs_filter2(p, hstride, thresh, ithresh)) { |
| 604 if (hev(p, hstride, hev_thresh)) { | 607 if (hev(p, hstride, hev_thresh)) { |
| 605 do_filter2(p, hstride); | 608 do_filter2(p, hstride); |
| 606 } else { | 609 } else { |
| 607 do_filter4(p, hstride); | 610 do_filter4(p, hstride); |
| 608 } | 611 } |
| 609 } | 612 } |
| 610 p += vstride; | 613 p += vstride; |
| 611 } | 614 } |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 705 VP8HFilter16i = HFilter16i; | 708 VP8HFilter16i = HFilter16i; |
| 706 VP8VFilter8i = VFilter8i; | 709 VP8VFilter8i = VFilter8i; |
| 707 VP8HFilter8i = HFilter8i; | 710 VP8HFilter8i = HFilter8i; |
| 708 VP8SimpleVFilter16 = SimpleVFilter16; | 711 VP8SimpleVFilter16 = SimpleVFilter16; |
| 709 VP8SimpleHFilter16 = SimpleHFilter16; | 712 VP8SimpleHFilter16 = SimpleHFilter16; |
| 710 VP8SimpleVFilter16i = SimpleVFilter16i; | 713 VP8SimpleVFilter16i = SimpleVFilter16i; |
| 711 VP8SimpleHFilter16i = SimpleHFilter16i; | 714 VP8SimpleHFilter16i = SimpleHFilter16i; |
| 712 | 715 |
| 713 // If defined, use CPUInfo() to overwrite some pointers with faster versions. | 716 // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
| 714 if (VP8GetCPUInfo) { | 717 if (VP8GetCPUInfo) { |
| 715 #if defined(__SSE2__) || defined(_MSC_VER) | 718 #if defined(WEBP_USE_SSE2) |
| 716 if (VP8GetCPUInfo(kSSE2)) { | 719 if (VP8GetCPUInfo(kSSE2)) { |
| 717 VP8DspInitSSE2(); | 720 VP8DspInitSSE2(); |
| 718 } | 721 } |
| 719 #elif defined(__GNUC__) && defined(__ARM_NEON__) | 722 #elif defined(WEBP_USE_NEON) |
| 720 if (VP8GetCPUInfo(kNEON)) { | 723 if (VP8GetCPUInfo(kNEON)) { |
| 721 VP8DspInitNEON(); | 724 VP8DspInitNEON(); |
| 722 } | 725 } |
| 723 #endif | 726 #endif |
| 724 } | 727 } |
| 725 } | 728 } |
| 726 | 729 |
| 727 #if defined(__cplusplus) || defined(c_plusplus) | 730 #if defined(__cplusplus) || defined(c_plusplus) |
| 728 } // extern "C" | 731 } // extern "C" |
| 729 #endif | 732 #endif |
| OLD | NEW |