third_party/libwebp/dsp/enc.c - Issue 12942006: libwebp: update snapshot to v0.3.0-rc6

Side by Side Diff: third_party/libwebp/dsp/enc.c

Issue 12942006: libwebp: update snapshot to v0.3.0-rc6 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: local webkit layout expectations Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 Google Inc. All Rights Reserved.	1 // Copyright 2011 Google Inc. All Rights Reserved.

2 //	2 //

3 // This code is licensed under the same terms as WebM:	3 // This code is licensed under the same terms as WebM:

4 // Software License Agreement: http://www.webmproject.org/license/software/	4 // Software License Agreement: http://www.webmproject.org/license/software/

5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/	5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/

6 // -----------------------------------------------------------------------------	6 // -----------------------------------------------------------------------------

7 //	7 //

8 // Speed-critical encoding functions.	8 // Speed-critical encoding functions.

9 //	9 //

10 // Author: Skal (pascal.massimino@gmail.com)	10 // Author: Skal (pascal.massimino@gmail.com)

11	11

12 #include <stdlib.h> // for abs()	12 #include <stdlib.h> // for abs()

13 #include "./dsp.h"	13 #include "./dsp.h"

14 #include "../enc/vp8enci.h"	14 #include "../enc/vp8enci.h"

15	15

16 #if defined(__cplusplus) \|\| defined(c_plusplus)	16 #if defined(__cplusplus) \|\| defined(c_plusplus)

17 extern "C" {	17 extern "C" {

18 #endif	18 #endif

19	19

	20 static WEBP_INLINE uint8_t clip_8b(int v) {

	21 return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;

	22 }

	23

	24 static WEBP_INLINE int clip_max(int v, int max) {

	25 return (v > max) ? max : v;

	26 }

	27

20 //------------------------------------------------------------------------------	28 //------------------------------------------------------------------------------

21 // Compute susceptibility based on DCT-coeff histograms:	29 // Compute susceptibility based on DCT-coeff histograms:

22 // the higher, the "easier" the macroblock is to compress.	30 // the higher, the "easier" the macroblock is to compress.

23	31

24 static int ClipAlpha(int alpha) {

25 return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;

26 }

27

28 int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {

29 int num = 0, den = 0, val = 0;

30 int k;

31 int alpha;

32 // note: changing this loop to avoid the numerous "k + 1" slows things down.

33 for (k = 0; k < MAX_COEFF_THRESH; ++k) {

34 if (histo[k + 1]) {

35 val += histo[k + 1];

36 num += val * (k + 1);

37 den += (k + 1) * (k + 1);

38 }

39 }

40 // we scale the value to a usable [0..255] range

41 alpha = den ? 10 * num / den - 5 : 0;

42 return ClipAlpha(alpha);

43 }

44

45 const int VP8DspScan[16 + 4 + 4] = {	32 const int VP8DspScan[16 + 4 + 4] = {

46 // Luma	33 // Luma

47 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,	34 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,

48 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,	35 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,

49 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,	36 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,

50 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,	37 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,

51	38

52 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U	39 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U

53 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V	40 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V

54 };	41 };

55	42

56 static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,	43 static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,

57 int start_block, int end_block) {	44 int start_block, int end_block,

58 int histo[MAX_COEFF_THRESH + 1] = { 0 };	45 VP8Histogram* const histo) {

59 int16_t out[16];	46 int j;

60 int j, k;

61 for (j = start_block; j < end_block; ++j) {	47 for (j = start_block; j < end_block; ++j) {

	48 int k;

	49 int16_t out[16];

	50

62 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);	51 VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

63	52

64 // Convert coefficients to bin (within out[]).	53 // Convert coefficients to bin.

65 for (k = 0; k < 16; ++k) {	54 for (k = 0; k < 16; ++k) {

66 const int v = abs(out[k]) >> 2;	55 const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?

67 out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;	56 const int clipped_value = clip_max(v, MAX_COEFF_THRESH);

68 }	57 histo->distribution[clipped_value]++;

69

70 // Use bin to update histogram.

71 for (k = 0; k < 16; ++k) {

72 histo[out[k]]++;

73 }	58 }

74 }	59 }

75

76 return VP8GetAlpha(histo);

77 }	60 }

78	61

79 //------------------------------------------------------------------------------	62 //------------------------------------------------------------------------------

80 // run-time tables (~4k)	63 // run-time tables (~4k)

81	64

82 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]	65 static uint8_t clip1[255 + 510 + 1]; // clips [-255,510] to [0,255]

83	66

84 // We declare this variable 'volatile' to prevent instruction reordering	67 // We declare this variable 'volatile' to prevent instruction reordering

85 // and make sure it's set to true _last_ (so as to be thread-safe)	68 // and make sure it's set to true _last_ (so as to be thread-safe)

86 static volatile int tables_ok = 0;	69 static volatile int tables_ok = 0;

87	70

88 static void InitTables(void) {	71 static void InitTables(void) {

89 if (!tables_ok) {	72 if (!tables_ok) {

90 int i;	73 int i;

91 for (i = -255; i <= 255 + 255; ++i) {	74 for (i = -255; i <= 255 + 255; ++i) {

92 clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;	75 clip1[255 + i] = clip_8b(i);

93 }	76 }

94 tables_ok = 1;	77 tables_ok = 1;

95 }	78 }

96 }	79 }

97	80

98 static WEBP_INLINE uint8_t clip_8b(int v) {

99 return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;

100 }

101	81

102 //------------------------------------------------------------------------------	82 //------------------------------------------------------------------------------

103 // Transforms (Paragraph 14.4)	83 // Transforms (Paragraph 14.4)

104	84

105 #define STORE(x, y, v) \	85 #define STORE(x, y, v) \

106 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))	86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))

107	87

108 static const int kC1 = 20091 + (1 << 16);	88 static const int kC1 = 20091 + (1 << 16);

109 static const int kC2 = 35468;	89 static const int kC2 = 35468;

110 #define MUL(a, b) (((a) * (b)) >> 16)	90 #define MUL(a, b) (((a) * (b)) >> 16)

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 ITransformOne(ref, in, dst);	127 ITransformOne(ref, in, dst);

148 if (do_two) {	128 if (do_two) {

149 ITransformOne(ref + 4, in + 16, dst + 4);	129 ITransformOne(ref + 4, in + 16, dst + 4);

150 }	130 }

151 }	131 }

152	132

153 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {	133 static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {

154 int i;	134 int i;

155 int tmp[16];	135 int tmp[16];

156 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {	136 for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {

157 const int d0 = src[0] - ref[0];	137 const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])

158 const int d1 = src[1] - ref[1];	138 const int d1 = src[1] - ref[1];

159 const int d2 = src[2] - ref[2];	139 const int d2 = src[2] - ref[2];

160 const int d3 = src[3] - ref[3];	140 const int d3 = src[3] - ref[3];

161 const int a0 = (d0 + d3) << 3;	141 const int a0 = (d0 + d3); // 10b [-510,510]

162 const int a1 = (d1 + d2) << 3;	142 const int a1 = (d1 + d2);

163 const int a2 = (d1 - d2) << 3;	143 const int a2 = (d1 - d2);

164 const int a3 = (d0 - d3) << 3;	144 const int a3 = (d0 - d3);

165 tmp[0 + i * 4] = (a0 + a1);	145 tmp[0 + i * 4] = (a0 + a1) << 3; // 14b [-8160,8160]

166 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;	146 tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]

167 tmp[2 + i * 4] = (a0 - a1);	147 tmp[2 + i * 4] = (a0 - a1) << 3;

168 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12;	148 tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;

169 }	149 }

170 for (i = 0; i < 4; ++i) {	150 for (i = 0; i < 4; ++i) {

171 const int a0 = (tmp[0 + i] + tmp[12 + i]);	151 const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b

172 const int a1 = (tmp[4 + i] + tmp[ 8 + i]);	152 const int a1 = (tmp[4 + i] + tmp[ 8 + i]);

173 const int a2 = (tmp[4 + i] - tmp[ 8 + i]);	153 const int a2 = (tmp[4 + i] - tmp[ 8 + i]);

174 const int a3 = (tmp[0 + i] - tmp[12 + i]);	154 const int a3 = (tmp[0 + i] - tmp[12 + i]);

175 out[0 + i] = (a0 + a1 + 7) >> 4;	155 out[0 + i] = (a0 + a1 + 7) >> 4; // 12b

176 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);	156 out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);

177 out[8 + i] = (a0 - a1 + 7) >> 4;	157 out[8 + i] = (a0 - a1 + 7) >> 4;

178 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);	158 out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);

179 }	159 }

180 }	160 }

181	161

182 static void ITransformWHT(const int16_t* in, int16_t* out) {	162 static void ITransformWHT(const int16_t* in, int16_t* out) {

183 int tmp[16];	163 int tmp[16];

184 int i;	164 int i;

185 for (i = 0; i < 4; ++i) {	165 for (i = 0; i < 4; ++i) {

(...skipping 396 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
582 // reconstructed samples.	562 // reconstructed samples.

583	563

584 // Hadamard transform	564 // Hadamard transform

585 // Returns the weighted sum of the absolute value of transformed coefficients.	565 // Returns the weighted sum of the absolute value of transformed coefficients.

586 static int TTransform(const uint8_t* in, const uint16_t* w) {	566 static int TTransform(const uint8_t* in, const uint16_t* w) {

587 int sum = 0;	567 int sum = 0;

588 int tmp[16];	568 int tmp[16];

589 int i;	569 int i;

590 // horizontal pass	570 // horizontal pass

591 for (i = 0; i < 4; ++i, in += BPS) {	571 for (i = 0; i < 4; ++i, in += BPS) {

592 const int a0 = (in[0] + in[2]) << 2;	572 const int a0 = in[0] + in[2];

593 const int a1 = (in[1] + in[3]) << 2;	573 const int a1 = in[1] + in[3];

594 const int a2 = (in[1] - in[3]) << 2;	574 const int a2 = in[1] - in[3];

595 const int a3 = (in[0] - in[2]) << 2;	575 const int a3 = in[0] - in[2];

596 tmp[0 + i * 4] = a0 + a1 + (a0 != 0);	576 tmp[0 + i * 4] = a0 + a1;

597 tmp[1 + i * 4] = a3 + a2;	577 tmp[1 + i * 4] = a3 + a2;

598 tmp[2 + i * 4] = a3 - a2;	578 tmp[2 + i * 4] = a3 - a2;

599 tmp[3 + i * 4] = a0 - a1;	579 tmp[3 + i * 4] = a0 - a1;

600 }	580 }

601 // vertical pass	581 // vertical pass

602 for (i = 0; i < 4; ++i, ++w) {	582 for (i = 0; i < 4; ++i, ++w) {

603 const int a0 = (tmp[0 + i] + tmp[8 + i]);	583 const int a0 = tmp[0 + i] + tmp[8 + i];

604 const int a1 = (tmp[4 + i] + tmp[12+ i]);	584 const int a1 = tmp[4 + i] + tmp[12+ i];

605 const int a2 = (tmp[4 + i] - tmp[12+ i]);	585 const int a2 = tmp[4 + i] - tmp[12+ i];

606 const int a3 = (tmp[0 + i] - tmp[8 + i]);	586 const int a3 = tmp[0 + i] - tmp[8 + i];

607 const int b0 = a0 + a1;	587 const int b0 = a0 + a1;

608 const int b1 = a3 + a2;	588 const int b1 = a3 + a2;

609 const int b2 = a3 - a2;	589 const int b2 = a3 - a2;

610 const int b3 = a0 - a1;	590 const int b3 = a0 - a1;

611 // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3	591

612 sum += w[ 0] * ((abs(b0) + 3) >> 3);	592 sum += w[ 0] * abs(b0);

613 sum += w[ 4] * ((abs(b1) + 3) >> 3);	593 sum += w[ 4] * abs(b1);

614 sum += w[ 8] * ((abs(b2) + 3) >> 3);	594 sum += w[ 8] * abs(b2);

615 sum += w[12] * ((abs(b3) + 3) >> 3);	595 sum += w[12] * abs(b3);

616 }	596 }

617 return sum;	597 return sum;

618 }	598 }

619	599

620 static int Disto4x4(const uint8_t* const a, const uint8_t* const b,	600 static int Disto4x4(const uint8_t* const a, const uint8_t* const b,

621 const uint16_t* const w) {	601 const uint16_t* const w) {

622 const int sum1 = TTransform(a, w);	602 const int sum1 = TTransform(a, w);

623 const int sum2 = TTransform(b, w);	603 const int sum2 = TTransform(b, w);

624 return (abs(sum2 - sum1) + 8) >> 4;	604 return abs(sum2 - sum1) >> 5;

625 }	605 }

626	606

627 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,	607 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,

628 const uint16_t* const w) {	608 const uint16_t* const w) {

629 int D = 0;	609 int D = 0;

630 int x, y;	610 int x, y;

631 for (y = 0; y < 16 * BPS; y += 4 * BPS) {	611 for (y = 0; y < 16 * BPS; y += 4 * BPS) {

632 for (x = 0; x < 16; x += 4) {	612 for (x = 0; x < 16; x += 4) {

633 D += Disto4x4(a + x + y, b + x + y, w);	613 D += Disto4x4(a + x + y, b + x + y, w);

634 }	614 }

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
699 VP8Metric VP8SSE16x16;	679 VP8Metric VP8SSE16x16;

700 VP8Metric VP8SSE8x8;	680 VP8Metric VP8SSE8x8;

701 VP8Metric VP8SSE16x8;	681 VP8Metric VP8SSE16x8;

702 VP8Metric VP8SSE4x4;	682 VP8Metric VP8SSE4x4;

703 VP8WMetric VP8TDisto4x4;	683 VP8WMetric VP8TDisto4x4;

704 VP8WMetric VP8TDisto16x16;	684 VP8WMetric VP8TDisto16x16;

705 VP8QuantizeBlock VP8EncQuantizeBlock;	685 VP8QuantizeBlock VP8EncQuantizeBlock;

706 VP8BlockCopy VP8Copy4x4;	686 VP8BlockCopy VP8Copy4x4;

707	687

708 extern void VP8EncDspInitSSE2(void);	688 extern void VP8EncDspInitSSE2(void);

	689 extern void VP8EncDspInitNEON(void);

709	690

710 void VP8EncDspInit(void) {	691 void VP8EncDspInit(void) {

711 InitTables();	692 InitTables();

712	693

713 // default C implementations	694 // default C implementations

714 VP8CollectHistogram = CollectHistogram;	695 VP8CollectHistogram = CollectHistogram;

715 VP8ITransform = ITransform;	696 VP8ITransform = ITransform;

716 VP8FTransform = FTransform;	697 VP8FTransform = FTransform;

717 VP8ITransformWHT = ITransformWHT;	698 VP8ITransformWHT = ITransformWHT;

718 VP8FTransformWHT = FTransformWHT;	699 VP8FTransformWHT = FTransformWHT;

719 VP8EncPredLuma4 = Intra4Preds;	700 VP8EncPredLuma4 = Intra4Preds;

720 VP8EncPredLuma16 = Intra16Preds;	701 VP8EncPredLuma16 = Intra16Preds;

721 VP8EncPredChroma8 = IntraChromaPreds;	702 VP8EncPredChroma8 = IntraChromaPreds;

722 VP8SSE16x16 = SSE16x16;	703 VP8SSE16x16 = SSE16x16;

723 VP8SSE8x8 = SSE8x8;	704 VP8SSE8x8 = SSE8x8;

724 VP8SSE16x8 = SSE16x8;	705 VP8SSE16x8 = SSE16x8;

725 VP8SSE4x4 = SSE4x4;	706 VP8SSE4x4 = SSE4x4;

726 VP8TDisto4x4 = Disto4x4;	707 VP8TDisto4x4 = Disto4x4;

727 VP8TDisto16x16 = Disto16x16;	708 VP8TDisto16x16 = Disto16x16;

728 VP8EncQuantizeBlock = QuantizeBlock;	709 VP8EncQuantizeBlock = QuantizeBlock;

729 VP8Copy4x4 = Copy4x4;	710 VP8Copy4x4 = Copy4x4;

730	711

731 // If defined, use CPUInfo() to overwrite some pointers with faster versions.	712 // If defined, use CPUInfo() to overwrite some pointers with faster versions.

732 if (VP8GetCPUInfo) {	713 if (VP8GetCPUInfo) {

733 #if defined(WEBP_USE_SSE2)	714 #if defined(WEBP_USE_SSE2)

734 if (VP8GetCPUInfo(kSSE2)) {	715 if (VP8GetCPUInfo(kSSE2)) {

735 VP8EncDspInitSSE2();	716 VP8EncDspInitSSE2();

736 }	717 }

	718 #elif defined(WEBP_USE_NEON)

	719 if (VP8GetCPUInfo(kNEON)) {

	720 VP8EncDspInitNEON();

	721 }

737 #endif	722 #endif

738 }	723 }

739 }	724 }

740	725

741 #if defined(__cplusplus) \|\| defined(c_plusplus)	726 #if defined(__cplusplus) \|\| defined(c_plusplus)

742 } // extern "C"	727 } // extern "C"

743 #endif	728 #endif

OLD	NEW

« third_party/libwebp/dsp/dec_neon.c ('K') | « third_party/libwebp/dsp/dsp.h ('k') | third_party/libwebp/dsp/enc_neon.c » ('j') | no next file with comments »