source/libvpx/vp9/encoder/vp9_dct.c - Issue 54923004: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp9/encoder/vp9_dct.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp9/encoder/vp9_dct.c

===================================================================

--- source/libvpx/vp9/encoder/vp9_dct.c (revision 232232)

+++ source/libvpx/vp9/encoder/vp9_dct.c (working copy)

@@ -8,16 +8,19 @@

* be found in the AUTHORS file in the root of the source tree.

#include <assert.h>

#include <math.h>

#include "./vpx_config.h"

-#include "vp9/common/vp9_systemdependent.h"

+#include "./vp9_rtcd.h"

#include "vp9/common/vp9_blockd.h"

#include "vp9/common/vp9_idct.h"

+#include "vp9/common/vp9_systemdependent.h"

-static void fdct4_1d(int16_t *input, int16_t *output) {

+#include "vp9/encoder/vp9_dct.h"

+static void fdct4(const int16_t *input, int16_t *output) {

int16_t step[4];

int temp1, temp2;

@@ -36,18 +39,17 @@

output[3] = dct_const_round_shift(temp2);

}

-void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {

+void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {

// The 2D transform is done with two passes which are actually pretty

// similar. In the first one, we transform the columns and transpose

// the results. In the second one, we transform the rows. To achieve that,

// as the first pass results are transposed, we tranpose the columns (that

// is the transposed rows) and transpose the results (so that it goes back

// in normal/row positions).

- const int stride = pitch >> 1;

int pass;

// We need an intermediate buffer between passes.

int16_t intermediate[4 * 4];

- int16_t *in = input;

+ const int16_t *in = input;

int16_t *out = intermediate;

// Do the two transform/transpose passes

for (pass = 0; pass < 2; ++pass) {

@@ -58,10 +60,10 @@

for (i = 0; i < 4; ++i) {

// Load inputs.

if (0 == pass) {

- input[0] = in[0 * stride] << 4;

- input[1] = in[1 * stride] << 4;

- input[2] = in[2 * stride] << 4;

- input[3] = in[3 * stride] << 4;

+ input[0] = in[0 * stride] * 16;

+ input[1] = in[1 * stride] * 16;

+ input[2] = in[2 * stride] * 16;

+ input[3] = in[3 * stride] * 16;

if (i == 0 && input[0]) {

input[0] += 1;

}

@@ -102,7 +104,7 @@

}

-static void fadst4_1d(int16_t *input, int16_t *output) {

+static void fadst4(const int16_t *input, int16_t *output) {

int x0, x1, x2, x3;

int s0, s1, s2, s3, s4, s5, s6, s7;

@@ -143,14 +145,14 @@

}

static const transform_2d FHT_4[] = {

- { fdct4_1d, fdct4_1d }, // DCT_DCT = 0

- { fadst4_1d, fdct4_1d }, // ADST_DCT = 1

- { fdct4_1d, fadst4_1d }, // DCT_ADST = 2

- { fadst4_1d, fadst4_1d } // ADST_ADST = 3

+ { fdct4, fdct4 }, // DCT_DCT = 0

+ { fadst4, fdct4 }, // ADST_DCT = 1

+ { fdct4, fadst4 }, // DCT_ADST = 2

+ { fadst4, fadst4 } // ADST_ADST = 3

};

-void vp9_short_fht4x4_c(int16_t *input, int16_t *output,

- int pitch, TX_TYPE tx_type) {

+void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,

+ int stride, int tx_type) {

int16_t out[4 * 4];

int16_t *outptr = &out[0];

int i, j;

@@ -160,7 +162,7 @@

// Columns

for (i = 0; i < 4; ++i) {

for (j = 0; j < 4; ++j)

- temp_in[j] = input[j * pitch + i] << 4;

+ temp_in[j] = input[j * stride + i] * 16;

if (i == 0 && temp_in[0])

temp_in[0] += 1;

ht.cols(temp_in, temp_out);

@@ -178,12 +180,7 @@

}

-void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {

- vp9_short_fdct4x4_c(input, output, pitch);

- vp9_short_fdct4x4_c(input + 4, output + 16, pitch);

-static void fdct8_1d(int16_t *input, int16_t *output) {

+static void fdct8(const int16_t *input, int16_t *output) {

/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;

/*needs32*/ int t0, t1, t2, t3;

/*canbe16*/ int x0, x1, x2, x3;

@@ -198,7 +195,7 @@

s6 = input[1] - input[6];

s7 = input[0] - input[7];

- // fdct4_1d(step, step);

+ // fdct4(step, step);

x0 = s0 + s3;

x1 = s1 + s2;

x2 = s1 - s2;

@@ -235,8 +232,7 @@

output[7] = dct_const_round_shift(t3);

}

-void vp9_short_fdct8x8_c(int16_t *input, int16_t *final_output, int pitch) {

- const int stride = pitch >> 1;

+void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {

int i, j;

int16_t intermediate[64];

@@ -250,16 +246,16 @@

int i;

for (i = 0; i < 8; i++) {

// stage 1

- s0 = (input[0 * stride] + input[7 * stride]) << 2;

- s1 = (input[1 * stride] + input[6 * stride]) << 2;

- s2 = (input[2 * stride] + input[5 * stride]) << 2;

- s3 = (input[3 * stride] + input[4 * stride]) << 2;

- s4 = (input[3 * stride] - input[4 * stride]) << 2;

- s5 = (input[2 * stride] - input[5 * stride]) << 2;

- s6 = (input[1 * stride] - input[6 * stride]) << 2;

- s7 = (input[0 * stride] - input[7 * stride]) << 2;

+ s0 = (input[0 * stride] + input[7 * stride]) * 4;

+ s1 = (input[1 * stride] + input[6 * stride]) * 4;

+ s2 = (input[2 * stride] + input[5 * stride]) * 4;

+ s3 = (input[3 * stride] + input[4 * stride]) * 4;

+ s4 = (input[3 * stride] - input[4 * stride]) * 4;

+ s5 = (input[2 * stride] - input[5 * stride]) * 4;

+ s6 = (input[1 * stride] - input[6 * stride]) * 4;

+ s7 = (input[0 * stride] - input[7 * stride]) * 4;

- // fdct4_1d(step, step);

+ // fdct4(step, step);

x0 = s0 + s3;

x1 = s1 + s2;

x2 = s1 - s2;

@@ -301,24 +297,23 @@

// Rows

for (i = 0; i < 8; ++i) {

- fdct8_1d(&intermediate[i * 8], &final_output[i * 8]);

+ fdct8(&intermediate[i * 8], &final_output[i * 8]);

for (j = 0; j < 8; ++j)

final_output[j + i * 8] /= 2;

}

-void vp9_short_fdct16x16_c(int16_t *input, int16_t *output, int pitch) {

+void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {

// The 2D transform is done with two passes which are actually pretty

// similar. In the first one, we transform the columns and transpose

// the results. In the second one, we transform the rows. To achieve that,

// as the first pass results are transposed, we tranpose the columns (that

// is the transposed rows) and transpose the results (so that it goes back

// in normal/row positions).

- const int stride = pitch >> 1;

int pass;

// We need an intermediate buffer between passes.

int16_t intermediate[256];

- int16_t *in = input;

+ const int16_t *in = input;

int16_t *out = intermediate;

// Do the two transform/transpose passes

for (pass = 0; pass < 2; ++pass) {

@@ -331,23 +326,23 @@

for (i = 0; i < 16; i++) {

if (0 == pass) {

// Calculate input for the first 8 results.

- input[0] = (in[0 * stride] + in[15 * stride]) << 2;

- input[1] = (in[1 * stride] + in[14 * stride]) << 2;

- input[2] = (in[2 * stride] + in[13 * stride]) << 2;

- input[3] = (in[3 * stride] + in[12 * stride]) << 2;

- input[4] = (in[4 * stride] + in[11 * stride]) << 2;

- input[5] = (in[5 * stride] + in[10 * stride]) << 2;

- input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;

- input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;

+ input[0] = (in[0 * stride] + in[15 * stride]) * 4;

+ input[1] = (in[1 * stride] + in[14 * stride]) * 4;

+ input[2] = (in[2 * stride] + in[13 * stride]) * 4;

+ input[3] = (in[3 * stride] + in[12 * stride]) * 4;

+ input[4] = (in[4 * stride] + in[11 * stride]) * 4;

+ input[5] = (in[5 * stride] + in[10 * stride]) * 4;

+ input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;

+ input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;

// Calculate input for the next 8 results.

- step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;

- step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;

- step1[2] = (in[5 * stride] - in[10 * stride]) << 2;

- step1[3] = (in[4 * stride] - in[11 * stride]) << 2;

- step1[4] = (in[3 * stride] - in[12 * stride]) << 2;

- step1[5] = (in[2 * stride] - in[13 * stride]) << 2;

- step1[6] = (in[1 * stride] - in[14 * stride]) << 2;

- step1[7] = (in[0 * stride] - in[15 * stride]) << 2;

+ step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;

+ step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;

+ step1[2] = (in[5 * stride] - in[10 * stride]) * 4;

+ step1[3] = (in[4 * stride] - in[11 * stride]) * 4;

+ step1[4] = (in[3 * stride] - in[12 * stride]) * 4;

+ step1[5] = (in[2 * stride] - in[13 * stride]) * 4;

+ step1[6] = (in[1 * stride] - in[14 * stride]) * 4;

+ step1[7] = (in[0 * stride] - in[15 * stride]) * 4;

} else {

// Calculate input for the first 8 results.

input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);

@@ -368,7 +363,7 @@

step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);

step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);

}

- // Work on the first eight values; fdct8_1d(input, even_results);

+ // Work on the first eight values; fdct8(input, even_results);

{

/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;

/*needs32*/ int t0, t1, t2, t3;

@@ -384,7 +379,7 @@

s6 = input[1] - input[6];

s7 = input[0] - input[7];

- // fdct4_1d(step, step);

+ // fdct4(step, step);

x0 = s0 + s3;

x1 = s1 + s2;

x2 = s1 - s2;

@@ -486,7 +481,7 @@

}

-static void fadst8_1d(int16_t *input, int16_t *output) {

+static void fadst8(const int16_t *input, int16_t *output) {

int s0, s1, s2, s3, s4, s5, s6, s7;

int x0 = input[7];

@@ -558,14 +553,14 @@

}

static const transform_2d FHT_8[] = {

- { fdct8_1d, fdct8_1d }, // DCT_DCT = 0

- { fadst8_1d, fdct8_1d }, // ADST_DCT = 1

- { fdct8_1d, fadst8_1d }, // DCT_ADST = 2

- { fadst8_1d, fadst8_1d } // ADST_ADST = 3

+ { fdct8, fdct8 }, // DCT_DCT = 0

+ { fadst8, fdct8 }, // ADST_DCT = 1

+ { fdct8, fadst8 }, // DCT_ADST = 2

+ { fadst8, fadst8 } // ADST_ADST = 3

};

-void vp9_short_fht8x8_c(int16_t *input, int16_t *output,

- int pitch, TX_TYPE tx_type) {

+void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,

+ int stride, int tx_type) {

int16_t out[64];

int16_t *outptr = &out[0];

int i, j;

@@ -575,7 +570,7 @@

// Columns

for (i = 0; i < 8; ++i) {

for (j = 0; j < 8; ++j)

- temp_in[j] = input[j * pitch + i] << 2;

+ temp_in[j] = input[j * stride + i] * 4;

ht.cols(temp_in, temp_out);

for (j = 0; j < 8; ++j)

outptr[j * 8 + i] = temp_out[j];

@@ -593,18 +588,17 @@

/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per

pixel. */

-void vp9_short_walsh4x4_c(short *input, short *output, int pitch) {

+void vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) {

int i;

int a1, b1, c1, d1, e1;

- short *ip = input;

- short *op = output;

- int pitch_short = pitch >> 1;

+ const int16_t *ip = input;

+ int16_t *op = output;

for (i = 0; i < 4; i++) {

- a1 = ip[0 * pitch_short];

- b1 = ip[1 * pitch_short];

- c1 = ip[2 * pitch_short];

- d1 = ip[3 * pitch_short];

+ a1 = ip[0 * stride];

+ b1 = ip[1 * stride];

+ c1 = ip[2 * stride];

+ d1 = ip[3 * stride];

a1 += b1;

d1 = d1 - c1;

@@ -637,24 +631,18 @@

c1 = e1 - c1;

a1 -= c1;

d1 += b1;

- op[0] = a1 << WHT_UPSCALE_FACTOR;

- op[1] = c1 << WHT_UPSCALE_FACTOR;

- op[2] = d1 << WHT_UPSCALE_FACTOR;

- op[3] = b1 << WHT_UPSCALE_FACTOR;

+ op[0] = a1 * UNIT_QUANT_FACTOR;

+ op[1] = c1 * UNIT_QUANT_FACTOR;

+ op[2] = d1 * UNIT_QUANT_FACTOR;

+ op[3] = b1 * UNIT_QUANT_FACTOR;

ip += 4;

op += 4;

}

-void vp9_short_walsh8x4_c(short *input, short *output, int pitch) {

- vp9_short_walsh4x4_c(input, output, pitch);

- vp9_short_walsh4x4_c(input + 4, output + 16, pitch);

// Rewrote to use same algorithm as others.

-static void fdct16_1d(int16_t in[16], int16_t out[16]) {

+static void fdct16(const int16_t in[16], int16_t out[16]) {

/*canbe16*/ int step1[8];

/*canbe16*/ int step2[8];

/*canbe16*/ int step3[8];

@@ -680,7 +668,7 @@

step1[6] = in[1] - in[14];

step1[7] = in[0] - in[15];

- // fdct8_1d(step, step);

+ // fdct8(step, step);

{

/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;

/*needs32*/ int t0, t1, t2, t3;

@@ -696,7 +684,7 @@

s6 = input[1] - input[6];

s7 = input[0] - input[7];

- // fdct4_1d(step, step);

+ // fdct4(step, step);

x0 = s0 + s3;

x1 = s1 + s2;

x2 = s1 - s2;

@@ -795,7 +783,7 @@

out[15] = dct_const_round_shift(temp2);

}

-void fadst16_1d(int16_t *input, int16_t *output) {

+static void fadst16(const int16_t *input, int16_t *output) {

int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;

int x0 = input[15];

@@ -958,14 +946,14 @@

}

static const transform_2d FHT_16[] = {

- { fdct16_1d, fdct16_1d }, // DCT_DCT = 0

- { fadst16_1d, fdct16_1d }, // ADST_DCT = 1

- { fdct16_1d, fadst16_1d }, // DCT_ADST = 2

- { fadst16_1d, fadst16_1d } // ADST_ADST = 3

+ { fdct16, fdct16 }, // DCT_DCT = 0

+ { fadst16, fdct16 }, // ADST_DCT = 1

+ { fdct16, fadst16 }, // DCT_ADST = 2

+ { fadst16, fadst16 } // ADST_ADST = 3

};

-void vp9_short_fht16x16_c(int16_t *input, int16_t *output,

- int pitch, TX_TYPE tx_type) {

+void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,

+ int stride, int tx_type) {

int16_t out[256];

int16_t *outptr = &out[0];

int i, j;

@@ -975,7 +963,7 @@

// Columns

for (i = 0; i < 16; ++i) {

for (j = 0; j < 16; ++j)

- temp_in[j] = input[j * pitch + i] << 2;

+ temp_in[j] = input[j * stride + i] * 4;

ht.cols(temp_in, temp_out);

for (j = 0; j < 16; ++j)

outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;

@@ -1003,7 +991,7 @@

return rv;

}

-static void dct32_1d(int *input, int *output, int round) {

+static void dct32_1d(const int *input, int *output, int round) {

int step[32];

// Stage 1

step[0] = input[0] + input[(32 - 1)];

@@ -1326,8 +1314,7 @@

output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);

}

-void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {

- int shortpitch = pitch >> 1;

+void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {

int i, j;

int output[32 * 32];

@@ -1335,7 +1322,7 @@

for (i = 0; i < 32; ++i) {

int temp_in[32], temp_out[32];

for (j = 0; j < 32; ++j)

- temp_in[j] = input[j * shortpitch + i] << 2;

+ temp_in[j] = input[j * stride + i] * 4;

dct32_1d(temp_in, temp_out, 0);

for (j = 0; j < 32; ++j)

output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;

@@ -1355,8 +1342,7 @@

// Note that although we use dct_32_round in dct32_1d computation flow,

// this 2d fdct32x32 for rate-distortion optimization loop is operating

// within 16 bits precision.

-void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {

- int shortpitch = pitch >> 1;

+void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {

int i, j;

int output[32 * 32];

@@ -1364,7 +1350,7 @@

for (i = 0; i < 32; ++i) {

int temp_in[32], temp_out[32];

for (j = 0; j < 32; ++j)

- temp_in[j] = input[j * shortpitch + i] << 2;

+ temp_in[j] = input[j * stride + i] * 4;

dct32_1d(temp_in, temp_out, 0);

for (j = 0; j < 32; ++j)

// TODO(cd): see quality impact of only doing

@@ -1383,3 +1369,27 @@

out[j + i * 32] = temp_out[j];

}

+void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,

+ int stride) {

+ if (tx_type == DCT_DCT)

+ vp9_fdct4x4(input, output, stride);

+ else

+ vp9_short_fht4x4(input, output, stride, tx_type);

+void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,

+ int stride) {

+ if (tx_type == DCT_DCT)

+ vp9_fdct8x8(input, output, stride);

+ else

+ vp9_short_fht8x8(input, output, stride, tx_type);

+void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,

+ int stride) {

+ if (tx_type == DCT_DCT)

+ vp9_fdct16x16(input, output, stride);

+ else

+ vp9_short_fht16x16(input, output, stride, tx_type);

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_dct.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.h » ('j') | no next file with comments »