source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

	11 #include <assert.h>

	12

11 #include "./vpx_dsp_rtcd.h"	13 #include "./vpx_dsp_rtcd.h"

12 #include "vpx_dsp/vpx_dsp_common.h"	14 #include "vpx_dsp/vpx_dsp_common.h"

13 #include "vpx_ports/mem.h"	15 #include "vpx_ports/mem.h"

14	16

15 void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,	17 void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,

16 uint8_t *dst, ptrdiff_t dst_stride,	18 uint8_t *dst, ptrdiff_t dst_stride,

17 const int16_t *filter_x, int x_step_q4,	19 const int16_t *filter_x, int x_step_q4,

18 const int16_t *filter_y, int y_step_q4,	20 const int16_t *filter_y, int y_step_q4,

19 int w, int h) {	21 int w, int h) {

20 /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the	22 /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the

21 * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).	23 * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).

22 */	24 */

23 DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);	25 DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);

24	26

25 // Account for the vertical phase needing 3 lines prior and 4 lines post	27 // Account for the vertical phase needing 3 lines prior and 4 lines post

26 int intermediate_height = h + 7;	28 int intermediate_height = h + 7;

27	29

28 if (x_step_q4 != 16 \|\| y_step_q4 != 16) {	30 assert(y_step_q4 == 16);

29 vpx_convolve8_c(src, src_stride,	31 assert(x_step_q4 == 16);

30 dst, dst_stride,

31 filter_x, x_step_q4,

32 filter_y, y_step_q4,

33 w, h);

34 return;

35 }

36	32

37 /* Filter starting 3 lines back. The neon implementation will ignore the	33 /* Filter starting 3 lines back. The neon implementation will ignore the

38 * given height and filter a multiple of 4 lines. Since this goes in to	34 * given height and filter a multiple of 4 lines. Since this goes in to

39 * the temp buffer which has lots of extra room and is subsequently discarded	35 * the temp buffer which has lots of extra room and is subsequently discarded

40 * this is safe if somewhat less than ideal.	36 * this is safe if somewhat less than ideal.

41 */	37 */

42 vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,	38 vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,

43 temp, 64,	39 temp, 64,

44 filter_x, x_step_q4, filter_y, y_step_q4,	40 filter_x, x_step_q4, filter_y, y_step_q4,

45 w, intermediate_height);	41 w, intermediate_height);

46	42

47 /* Step into the temp buffer 3 lines to get the actual frame data */	43 /* Step into the temp buffer 3 lines to get the actual frame data */

48 vpx_convolve8_vert_neon(temp + 64 * 3, 64,	44 vpx_convolve8_vert_neon(temp + 64 * 3, 64,

49 dst, dst_stride,	45 dst, dst_stride,

50 filter_x, x_step_q4, filter_y, y_step_q4,	46 filter_x, x_step_q4, filter_y, y_step_q4,

51 w, h);	47 w, h);

52 }	48 }

53	49

54 void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,	50 void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,

55 uint8_t *dst, ptrdiff_t dst_stride,	51 uint8_t *dst, ptrdiff_t dst_stride,

56 const int16_t *filter_x, int x_step_q4,	52 const int16_t *filter_x, int x_step_q4,

57 const int16_t *filter_y, int y_step_q4,	53 const int16_t *filter_y, int y_step_q4,

58 int w, int h) {	54 int w, int h) {

59 DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);	55 DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);

60 int intermediate_height = h + 7;	56 int intermediate_height = h + 7;

61	57

62 if (x_step_q4 != 16 \|\| y_step_q4 != 16) {	58 assert(y_step_q4 == 16);

63 vpx_convolve8_avg_c(src, src_stride,	59 assert(x_step_q4 == 16);

64 dst, dst_stride,

65 filter_x, x_step_q4,

66 filter_y, y_step_q4,

67 w, h);

68 return;

69 }

70	60

71 /* This implementation has the same issues as above. In addition, we only want	61 /* This implementation has the same issues as above. In addition, we only want

72 * to average the values after both passes.	62 * to average the values after both passes.

73 */	63 */

74 vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,	64 vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride,

75 temp, 64,	65 temp, 64,

76 filter_x, x_step_q4, filter_y, y_step_q4,	66 filter_x, x_step_q4, filter_y, y_step_q4,

77 w, intermediate_height);	67 w, intermediate_height);

78 vpx_convolve8_avg_vert_neon(temp + 64 * 3,	68 vpx_convolve8_avg_vert_neon(temp + 64 * 3,

79 64, dst, dst_stride,	69 64, dst, dst_stride,

80 filter_x, x_step_q4, filter_y, y_step_q4,	70 filter_x, x_step_q4, filter_y, y_step_q4,

81 w, h);	71 w, h);

82 }	72 }

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/arm/vpx_convolve8_neon_asm.asm ('k') | source/libvpx/vpx_dsp/bitreader.c » ('j') | no next file with comments »