source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c - Issue 54923004: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

===================================================================

--- source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c (revision 232232)

+++ source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c (working copy)

@@ -11,45 +11,47 @@

#include "./vp9_rtcd.h"

#include "vp9/common/vp9_common.h"

-extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,

- int16_t *output,

- int output_stride);

-extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,

- int16_t *output,

- int16_t *pass1Output,

- int16_t skip_adding,

- uint8_t *dest,

- int dest_stride);

-extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,

- int16_t *output,

- int output_stride);

-extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,

- int16_t *output,

- int16_t *pass1Output,

- int16_t skip_adding,

- uint8_t *dest,

- int dest_stride);

-extern void save_neon_registers();

-extern void restore_neon_registers();

+void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,

+ int16_t *output,

+ int output_stride);

+void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,

+ int16_t *output,

+ int16_t *pass1Output,

+ int16_t skip_adding,

+ uint8_t *dest,

+ int dest_stride);

+void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,

+ int16_t *output,

+ int output_stride);

+void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,

+ int16_t *output,

+ int16_t *pass1Output,

+ int16_t skip_adding,

+ uint8_t *dest,

+ int dest_stride);

+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */

+extern void vp9_push_neon(int64_t *store);

+extern void vp9_pop_neon(int64_t *store);

-void vp9_short_idct16x16_add_neon(int16_t *input,

- uint8_t *dest, int dest_stride) {

+void vp9_idct16x16_256_add_neon(const int16_t *input,

+ uint8_t *dest, int dest_stride) {

+ int64_t store_reg[8];

int16_t pass1_output[16*16] = {0};

int16_t row_idct_output[16*16] = {0};

// save d8-d15 register values.

- save_neon_registers();

+ vp9_push_neon(store_reg);

/* Parallel idct on the upper 8 rows */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7

// which will be saved into row_idct_output.

- vp9_short_idct16x16_add_neon_pass2(input+1,

+ vp9_idct16x16_256_add_neon_pass2(input+1,

row_idct_output,

pass1_output,

@@ -59,12 +61,12 @@

/* Parallel idct on the lower 8 rows */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7

// which will be saved into row_idct_output.

- vp9_short_idct16x16_add_neon_pass2(input+8*16+1,

+ vp9_idct16x16_256_add_neon_pass2(input+8*16+1,

row_idct_output+8,

pass1_output,

@@ -74,12 +76,12 @@

/* Parallel idct on the left 8 columns */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7.

// Then add the result to the destination data.

- vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,

+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

row_idct_output,

pass1_output,

@@ -89,12 +91,12 @@

/* Parallel idct on the right 8 columns */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7.

// Then add the result to the destination data.

- vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,

+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

row_idct_output+8,

pass1_output,

@@ -102,28 +104,29 @@

dest_stride);

// restore d8-d15 register values.

- restore_neon_registers();

+ vp9_pop_neon(store_reg);

return;

}

-void vp9_short_idct10_16x16_add_neon(int16_t *input,

- uint8_t *dest, int dest_stride) {

+void vp9_idct16x16_10_add_neon(const int16_t *input,

+ uint8_t *dest, int dest_stride) {

+ int64_t store_reg[8];

int16_t pass1_output[16*16] = {0};

int16_t row_idct_output[16*16] = {0};

// save d8-d15 register values.

- save_neon_registers();

+ vp9_push_neon(store_reg);

/* Parallel idct on the upper 8 rows */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);

+ vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7

// which will be saved into row_idct_output.

- vp9_short_idct10_16x16_add_neon_pass2(input+1,

+ vp9_idct16x16_10_add_neon_pass2(input+1,

row_idct_output,

pass1_output,

@@ -135,12 +138,12 @@

/* Parallel idct on the left 8 columns */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7.

// Then add the result to the destination data.

- vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,

+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,

row_idct_output,

pass1_output,

@@ -150,12 +153,12 @@

/* Parallel idct on the right 8 columns */

// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the

// stage 6 result in pass1_output.

- vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

+ vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);

// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines

// with result in pass1(pass1_output) to calculate final result in stage 7.

// Then add the result to the destination data.

- vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,

+ vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,

row_idct_output+8,

pass1_output,

@@ -163,7 +166,7 @@

dest_stride);

// restore d8-d15 register values.

- restore_neon_registers();

+ vp9_pop_neon(store_reg);

return;

}

« no previous file with comments | « source/libvpx/build/make/thumb.pm ('k') | source/libvpx/vp9/common/arm/neon/vp9_idct32x32_neon.c » ('j') | no next file with comments »