Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(263)

Side by Side Diff: third_party/libwebp/dsp/enc.c

Issue 12942006: libwebp: update snapshot to v0.3.0-rc6 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: local webkit layout expectations Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 Google Inc. All Rights Reserved. 1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // 2 //
3 // This code is licensed under the same terms as WebM: 3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/ 4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // ----------------------------------------------------------------------------- 6 // -----------------------------------------------------------------------------
7 // 7 //
8 // Speed-critical encoding functions. 8 // Speed-critical encoding functions.
9 // 9 //
10 // Author: Skal (pascal.massimino@gmail.com) 10 // Author: Skal (pascal.massimino@gmail.com)
11 11
12 #include <stdlib.h> // for abs() 12 #include <stdlib.h> // for abs()
13 #include "./dsp.h" 13 #include "./dsp.h"
14 #include "../enc/vp8enci.h" 14 #include "../enc/vp8enci.h"
15 15
16 #if defined(__cplusplus) || defined(c_plusplus) 16 #if defined(__cplusplus) || defined(c_plusplus)
17 extern "C" { 17 extern "C" {
18 #endif 18 #endif
19 19
20 static WEBP_INLINE uint8_t clip_8b(int v) {
21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
22 }
23
24 static WEBP_INLINE int clip_max(int v, int max) {
25 return (v > max) ? max : v;
26 }
27
20 //------------------------------------------------------------------------------ 28 //------------------------------------------------------------------------------
21 // Compute susceptibility based on DCT-coeff histograms: 29 // Compute susceptibility based on DCT-coeff histograms:
22 // the higher, the "easier" the macroblock is to compress. 30 // the higher, the "easier" the macroblock is to compress.
23 31
24 static int ClipAlpha(int alpha) {
25 return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
26 }
27
28 int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
29 int num = 0, den = 0, val = 0;
30 int k;
31 int alpha;
32 // note: changing this loop to avoid the numerous "k + 1" slows things down.
33 for (k = 0; k < MAX_COEFF_THRESH; ++k) {
34 if (histo[k + 1]) {
35 val += histo[k + 1];
36 num += val * (k + 1);
37 den += (k + 1) * (k + 1);
38 }
39 }
40 // we scale the value to a usable [0..255] range
41 alpha = den ? 10 * num / den - 5 : 0;
42 return ClipAlpha(alpha);
43 }
44
45 const int VP8DspScan[16 + 4 + 4] = { 32 const int VP8DspScan[16 + 4 + 4] = {
46 // Luma 33 // Luma
47 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
48 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
49 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
50 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS, 37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
51 38
52 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U 39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
53 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V 40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
54 }; 41 };
55 42
56 static int CollectHistogram(const uint8_t* ref, const uint8_t* pred, 43 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
57 int start_block, int end_block) { 44 int start_block, int end_block,
58 int histo[MAX_COEFF_THRESH + 1] = { 0 }; 45 VP8Histogram* const histo) {
59 int16_t out[16]; 46 int j;
60 int j, k;
61 for (j = start_block; j < end_block; ++j) { 47 for (j = start_block; j < end_block; ++j) {
48 int k;
49 int16_t out[16];
50
62 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); 51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
63 52
64 // Convert coefficients to bin (within out[]). 53 // Convert coefficients to bin.
65 for (k = 0; k < 16; ++k) { 54 for (k = 0; k < 16; ++k) {
66 const int v = abs(out[k]) >> 2; 55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?
67 out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v; 56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
68 } 57 histo->distribution[clipped_value]++;
69
70 // Use bin to update histogram.
71 for (k = 0; k < 16; ++k) {
72 histo[out[k]]++;
73 } 58 }
74 } 59 }
75
76 return VP8GetAlpha(histo);
77 } 60 }
78 61
79 //------------------------------------------------------------------------------ 62 //------------------------------------------------------------------------------
80 // run-time tables (~4k) 63 // run-time tables (~4k)
81 64
82 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255] 65 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]
83 66
84 // We declare this variable 'volatile' to prevent instruction reordering 67 // We declare this variable 'volatile' to prevent instruction reordering
85 // and make sure it's set to true _last_ (so as to be thread-safe) 68 // and make sure it's set to true _last_ (so as to be thread-safe)
86 static volatile int tables_ok = 0; 69 static volatile int tables_ok = 0;
87 70
88 static void InitTables(void) { 71 static void InitTables(void) {
89 if (!tables_ok) { 72 if (!tables_ok) {
90 int i; 73 int i;
91 for (i = -255; i <= 255 + 255; ++i) { 74 for (i = -255; i <= 255 + 255; ++i) {
92 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; 75 clip1[255 + i] = clip_8b(i);
93 } 76 }
94 tables_ok = 1; 77 tables_ok = 1;
95 } 78 }
96 } 79 }
97 80
98 static WEBP_INLINE uint8_t clip_8b(int v) {
99 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
100 }
101 81
102 //------------------------------------------------------------------------------ 82 //------------------------------------------------------------------------------
103 // Transforms (Paragraph 14.4) 83 // Transforms (Paragraph 14.4)
104 84
105 #define STORE(x, y, v) \ 85 #define STORE(x, y, v) \
106 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3)) 86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
107 87
108 static const int kC1 = 20091 + (1 << 16); 88 static const int kC1 = 20091 + (1 << 16);
109 static const int kC2 = 35468; 89 static const int kC2 = 35468;
110 #define MUL(a, b) (((a) * (b)) >> 16) 90 #define MUL(a, b) (((a) * (b)) >> 16)
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 ITransformOne(ref, in, dst); 127 ITransformOne(ref, in, dst);
148 if (do_two) { 128 if (do_two) {
149 ITransformOne(ref + 4, in + 16, dst + 4); 129 ITransformOne(ref + 4, in + 16, dst + 4);
150 } 130 }
151 } 131 }
152 132
153 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { 133 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
154 int i; 134 int i;
155 int tmp[16]; 135 int tmp[16];
156 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) { 136 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
157 const int d0 = src[0] - ref[0]; 137 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
158 const int d1 = src[1] - ref[1]; 138 const int d1 = src[1] - ref[1];
159 const int d2 = src[2] - ref[2]; 139 const int d2 = src[2] - ref[2];
160 const int d3 = src[3] - ref[3]; 140 const int d3 = src[3] - ref[3];
161 const int a0 = (d0 + d3) << 3; 141 const int a0 = (d0 + d3); // 10b [-510,510]
162 const int a1 = (d1 + d2) << 3; 142 const int a1 = (d1 + d2);
163 const int a2 = (d1 - d2) << 3; 143 const int a2 = (d1 - d2);
164 const int a3 = (d0 - d3) << 3; 144 const int a3 = (d0 - d3);
165 tmp[0 + i * 4] = (a0 + a1); 145 tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160]
166 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12; 146 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
167 tmp[2 + i * 4] = (a0 - a1); 147 tmp[2 + i * 4] = (a0 - a1) << 3;
168 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12; 148 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
169 } 149 }
170 for (i = 0; i < 4; ++i) { 150 for (i = 0; i < 4; ++i) {
171 const int a0 = (tmp[0 + i] + tmp[12 + i]); 151 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
172 const int a1 = (tmp[4 + i] + tmp[ 8 + i]); 152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
173 const int a2 = (tmp[4 + i] - tmp[ 8 + i]); 153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
174 const int a3 = (tmp[0 + i] - tmp[12 + i]); 154 const int a3 = (tmp[0 + i] - tmp[12 + i]);
175 out[0 + i] = (a0 + a1 + 7) >> 4; 155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
176 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0); 156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
177 out[8 + i] = (a0 - a1 + 7) >> 4; 157 out[8 + i] = (a0 - a1 + 7) >> 4;
178 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16); 158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
179 } 159 }
180 } 160 }
181 161
182 static void ITransformWHT(const int16_t* in, int16_t* out) { 162 static void ITransformWHT(const int16_t* in, int16_t* out) {
183 int tmp[16]; 163 int tmp[16];
184 int i; 164 int i;
185 for (i = 0; i < 4; ++i) { 165 for (i = 0; i < 4; ++i) {
(...skipping 396 matching lines...) Expand 10 before | Expand all | Expand 10 after
582 // reconstructed samples. 562 // reconstructed samples.
583 563
584 // Hadamard transform 564 // Hadamard transform
585 // Returns the weighted sum of the absolute value of transformed coefficients. 565 // Returns the weighted sum of the absolute value of transformed coefficients.
586 static int TTransform(const uint8_t* in, const uint16_t* w) { 566 static int TTransform(const uint8_t* in, const uint16_t* w) {
587 int sum = 0; 567 int sum = 0;
588 int tmp[16]; 568 int tmp[16];
589 int i; 569 int i;
590 // horizontal pass 570 // horizontal pass
591 for (i = 0; i < 4; ++i, in += BPS) { 571 for (i = 0; i < 4; ++i, in += BPS) {
592 const int a0 = (in[0] + in[2]) << 2; 572 const int a0 = in[0] + in[2];
593 const int a1 = (in[1] + in[3]) << 2; 573 const int a1 = in[1] + in[3];
594 const int a2 = (in[1] - in[3]) << 2; 574 const int a2 = in[1] - in[3];
595 const int a3 = (in[0] - in[2]) << 2; 575 const int a3 = in[0] - in[2];
596 tmp[0 + i * 4] = a0 + a1 + (a0 != 0); 576 tmp[0 + i * 4] = a0 + a1;
597 tmp[1 + i * 4] = a3 + a2; 577 tmp[1 + i * 4] = a3 + a2;
598 tmp[2 + i * 4] = a3 - a2; 578 tmp[2 + i * 4] = a3 - a2;
599 tmp[3 + i * 4] = a0 - a1; 579 tmp[3 + i * 4] = a0 - a1;
600 } 580 }
601 // vertical pass 581 // vertical pass
602 for (i = 0; i < 4; ++i, ++w) { 582 for (i = 0; i < 4; ++i, ++w) {
603 const int a0 = (tmp[0 + i] + tmp[8 + i]); 583 const int a0 = tmp[0 + i] + tmp[8 + i];
604 const int a1 = (tmp[4 + i] + tmp[12+ i]); 584 const int a1 = tmp[4 + i] + tmp[12+ i];
605 const int a2 = (tmp[4 + i] - tmp[12+ i]); 585 const int a2 = tmp[4 + i] - tmp[12+ i];
606 const int a3 = (tmp[0 + i] - tmp[8 + i]); 586 const int a3 = tmp[0 + i] - tmp[8 + i];
607 const int b0 = a0 + a1; 587 const int b0 = a0 + a1;
608 const int b1 = a3 + a2; 588 const int b1 = a3 + a2;
609 const int b2 = a3 - a2; 589 const int b2 = a3 - a2;
610 const int b3 = a0 - a1; 590 const int b3 = a0 - a1;
611 // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3 591
612 sum += w[ 0] * ((abs(b0) + 3) >> 3); 592 sum += w[ 0] * abs(b0);
613 sum += w[ 4] * ((abs(b1) + 3) >> 3); 593 sum += w[ 4] * abs(b1);
614 sum += w[ 8] * ((abs(b2) + 3) >> 3); 594 sum += w[ 8] * abs(b2);
615 sum += w[12] * ((abs(b3) + 3) >> 3); 595 sum += w[12] * abs(b3);
616 } 596 }
617 return sum; 597 return sum;
618 } 598 }
619 599
620 static int Disto4x4(const uint8_t* const a, const uint8_t* const b, 600 static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
621 const uint16_t* const w) { 601 const uint16_t* const w) {
622 const int sum1 = TTransform(a, w); 602 const int sum1 = TTransform(a, w);
623 const int sum2 = TTransform(b, w); 603 const int sum2 = TTransform(b, w);
624 return (abs(sum2 - sum1) + 8) >> 4; 604 return abs(sum2 - sum1) >> 5;
625 } 605 }
626 606
627 static int Disto16x16(const uint8_t* const a, const uint8_t* const b, 607 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
628 const uint16_t* const w) { 608 const uint16_t* const w) {
629 int D = 0; 609 int D = 0;
630 int x, y; 610 int x, y;
631 for (y = 0; y < 16 * BPS; y += 4 * BPS) { 611 for (y = 0; y < 16 * BPS; y += 4 * BPS) {
632 for (x = 0; x < 16; x += 4) { 612 for (x = 0; x < 16; x += 4) {
633 D += Disto4x4(a + x + y, b + x + y, w); 613 D += Disto4x4(a + x + y, b + x + y, w);
634 } 614 }
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
699 VP8Metric VP8SSE16x16; 679 VP8Metric VP8SSE16x16;
700 VP8Metric VP8SSE8x8; 680 VP8Metric VP8SSE8x8;
701 VP8Metric VP8SSE16x8; 681 VP8Metric VP8SSE16x8;
702 VP8Metric VP8SSE4x4; 682 VP8Metric VP8SSE4x4;
703 VP8WMetric VP8TDisto4x4; 683 VP8WMetric VP8TDisto4x4;
704 VP8WMetric VP8TDisto16x16; 684 VP8WMetric VP8TDisto16x16;
705 VP8QuantizeBlock VP8EncQuantizeBlock; 685 VP8QuantizeBlock VP8EncQuantizeBlock;
706 VP8BlockCopy VP8Copy4x4; 686 VP8BlockCopy VP8Copy4x4;
707 687
708 extern void VP8EncDspInitSSE2(void); 688 extern void VP8EncDspInitSSE2(void);
689 extern void VP8EncDspInitNEON(void);
709 690
710 void VP8EncDspInit(void) { 691 void VP8EncDspInit(void) {
711 InitTables(); 692 InitTables();
712 693
713 // default C implementations 694 // default C implementations
714 VP8CollectHistogram = CollectHistogram; 695 VP8CollectHistogram = CollectHistogram;
715 VP8ITransform = ITransform; 696 VP8ITransform = ITransform;
716 VP8FTransform = FTransform; 697 VP8FTransform = FTransform;
717 VP8ITransformWHT = ITransformWHT; 698 VP8ITransformWHT = ITransformWHT;
718 VP8FTransformWHT = FTransformWHT; 699 VP8FTransformWHT = FTransformWHT;
719 VP8EncPredLuma4 = Intra4Preds; 700 VP8EncPredLuma4 = Intra4Preds;
720 VP8EncPredLuma16 = Intra16Preds; 701 VP8EncPredLuma16 = Intra16Preds;
721 VP8EncPredChroma8 = IntraChromaPreds; 702 VP8EncPredChroma8 = IntraChromaPreds;
722 VP8SSE16x16 = SSE16x16; 703 VP8SSE16x16 = SSE16x16;
723 VP8SSE8x8 = SSE8x8; 704 VP8SSE8x8 = SSE8x8;
724 VP8SSE16x8 = SSE16x8; 705 VP8SSE16x8 = SSE16x8;
725 VP8SSE4x4 = SSE4x4; 706 VP8SSE4x4 = SSE4x4;
726 VP8TDisto4x4 = Disto4x4; 707 VP8TDisto4x4 = Disto4x4;
727 VP8TDisto16x16 = Disto16x16; 708 VP8TDisto16x16 = Disto16x16;
728 VP8EncQuantizeBlock = QuantizeBlock; 709 VP8EncQuantizeBlock = QuantizeBlock;
729 VP8Copy4x4 = Copy4x4; 710 VP8Copy4x4 = Copy4x4;
730 711
731 // If defined, use CPUInfo() to overwrite some pointers with faster versions. 712 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
732 if (VP8GetCPUInfo) { 713 if (VP8GetCPUInfo) {
733 #if defined(WEBP_USE_SSE2) 714 #if defined(WEBP_USE_SSE2)
734 if (VP8GetCPUInfo(kSSE2)) { 715 if (VP8GetCPUInfo(kSSE2)) {
735 VP8EncDspInitSSE2(); 716 VP8EncDspInitSSE2();
736 } 717 }
718 #elif defined(WEBP_USE_NEON)
719 if (VP8GetCPUInfo(kNEON)) {
720 VP8EncDspInitNEON();
721 }
737 #endif 722 #endif
738 } 723 }
739 } 724 }
740 725
741 #if defined(__cplusplus) || defined(c_plusplus) 726 #if defined(__cplusplus) || defined(c_plusplus)
742 } // extern "C" 727 } // extern "C"
743 #endif 728 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698