source/libvpx/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c - Issue 54923004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c

Issue 54923004: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « source/libvpx/vp9/common/mips/dspr2/vp9_common_dspr2.h ('k') | source/libvpx/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include <assert.h>

	12 #include <stdio.h>

	13

	14 #include "./vpx_config.h"

	15 #include "./vp9_rtcd.h"

	16 #include "vp9/common/vp9_common.h"

	17 #include "vpx/vpx_integer.h"

	18 #include "vpx_ports/mem.h"

	19 #include "vp9/common/vp9_convolve.h"

	20 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"

	21

	22 #if HAVE_DSPR2

	23 static void convolve_bi_avg_vert_4_dspr2(const uint8_t *src,

	24 int32_t src_stride,

	25 uint8_t *dst,

	26 int32_t dst_stride,

	27 const int16_t *filter_y,

	28 int32_t w,

	29 int32_t h) {

	30 int32_t x, y;

	31 const uint8_t *src_ptr;

	32 uint8_t *dst_ptr;

	33 uint8_t *cm = vp9_ff_cropTbl;

	34 uint32_t vector4a = 64;

	35 uint32_t load1, load2;

	36 uint32_t p1, p2;

	37 uint32_t scratch1, scratch2;

	38 uint32_t store1, store2;

	39 int32_t Temp1, Temp2;

	40 const int16_t *filter = &filter_y[3];

	41 uint32_t filter45;

	42

	43 filter45 = ((const int32_t *)filter)[0];

	44

	45 for (y = h; y--;) {

	46 /* prefetch data to cache memory */

	47 vp9_prefetch_store(dst + dst_stride);

	48

	49 for (x = 0; x < w; x += 4) {

	50 src_ptr = src + x;

	51 dst_ptr = dst + x;

	52

	53 __asm__ __volatile__ (

	54 "ulw %[load1], 0(%[src_ptr]) \n\t"

	55 "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"

	56 "ulw %[load2], 0(%[src_ptr]) \n\t"

	57

	58 "mtlo %[vector4a], $ac0 \n\t"

	59 "mtlo %[vector4a], $ac1 \n\t"

	60 "mtlo %[vector4a], $ac2 \n\t"

	61 "mtlo %[vector4a], $ac3 \n\t"

	62 "mthi $zero, $ac0 \n\t"

	63 "mthi $zero, $ac1 \n\t"

	64 "mthi $zero, $ac2 \n\t"

	65 "mthi $zero, $ac3 \n\t"

	66

	67 "preceu.ph.qbr %[scratch1], %[load1] \n\t"

	68 "preceu.ph.qbr %[p1], %[load2] \n\t"

	69 "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */

	70 "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */

	71

	72 "dpa.w.ph $ac0, %[p1], %[filter45] \n\t"

	73 "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"

	74

	75 "preceu.ph.qbl %[scratch1], %[load1] \n\t"

	76 "preceu.ph.qbl %[p1], %[load2] \n\t"

	77 "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */

	78 "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */

	79

	80 "dpa.w.ph $ac2, %[p1], %[filter45] \n\t"

	81 "dpa.w.ph $ac3, %[p2], %[filter45] \n\t"

	82

	83 "extp %[Temp1], $ac0, 31 \n\t"

	84 "extp %[Temp2], $ac1, 31 \n\t"

	85

	86 "lbu %[scratch1], 0(%[dst_ptr]) \n\t"

	87 "lbu %[scratch2], 1(%[dst_ptr]) \n\t"

	88

	89 "lbux %[store1], %[Temp1](%[cm]) \n\t"

	90 "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */

	91 "extp %[Temp1], $ac2, 31 \n\t"

	92

	93 "lbux %[store2], %[Temp2](%[cm]) \n\t"

	94 "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */

	95 "extp %[Temp2], $ac3, 31 \n\t"

	96 "lbu %[scratch1], 2(%[dst_ptr]) \n\t"

	97

	98 "sb %[store1], 0(%[dst_ptr]) \n\t"

	99 "sb %[store2], 1(%[dst_ptr]) \n\t"

	100 "lbu %[scratch2], 3(%[dst_ptr]) \n\t"

	101

	102 "lbux %[store1], %[Temp1](%[cm]) \n\t"

	103 "lbux %[store2], %[Temp2](%[cm]) \n\t"

	104 "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */

	105 "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */

	106

	107 "sb %[store1], 2(%[dst_ptr]) \n\t"

	108 "sb %[store2], 3(%[dst_ptr]) \n\t"

	109

	110 : [load1] "=&r" (load1), [load2] "=&r" (load2),

	111 [p1] "=&r" (p1), [p2] "=&r" (p2),

	112 [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2),

	113 [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),

	114 [store1] "=&r" (store1), [store2] "=&r" (store2),

	115 [src_ptr] "+r" (src_ptr)

	116 : [filter45] "r" (filter45), [vector4a] "r" (vector4a),

	117 [src_stride] "r" (src_stride), [cm] "r" (cm),

	118 [dst_ptr] "r" (dst_ptr)

	119 );

	120 }

	121

	122 /* Next row... */

	123 src += src_stride;

	124 dst += dst_stride;

	125 }

	126 }

	127

	128 static void convolve_bi_avg_vert_64_dspr2(const uint8_t *src,

	129 int32_t src_stride,

	130 uint8_t *dst,

	131 int32_t dst_stride,

	132 const int16_t *filter_y,

	133 int32_t h) {

	134 int32_t x, y;

	135 const uint8_t *src_ptr;

	136 uint8_t *dst_ptr;

	137 uint8_t *cm = vp9_ff_cropTbl;

	138 uint32_t vector4a = 64;

	139 uint32_t load1, load2;

	140 uint32_t p1, p2;

	141 uint32_t scratch1, scratch2;

	142 uint32_t store1, store2;

	143 int32_t Temp1, Temp2;

	144 const int16_t *filter = &filter_y[3];

	145 uint32_t filter45;;

	146

	147 filter45 = ((const int32_t *)filter)[0];

	148

	149 for (y = h; y--;) {

	150 /* prefetch data to cache memory */

	151 vp9_prefetch_store(dst + dst_stride);

	152 vp9_prefetch_store(dst + dst_stride + 32);

	153

	154 for (x = 0; x < 64; x += 4) {

	155 src_ptr = src + x;

	156 dst_ptr = dst + x;

	157

	158 __asm__ __volatile__ (

	159 "ulw %[load1], 0(%[src_ptr]) \n\t"

	160 "add %[src_ptr], %[src_ptr], %[src_stride] \n\t"

	161 "ulw %[load2], 0(%[src_ptr]) \n\t"

	162

	163 "mtlo %[vector4a], $ac0 \n\t"

	164 "mtlo %[vector4a], $ac1 \n\t"

	165 "mtlo %[vector4a], $ac2 \n\t"

	166 "mtlo %[vector4a], $ac3 \n\t"

	167 "mthi $zero, $ac0 \n\t"

	168 "mthi $zero, $ac1 \n\t"

	169 "mthi $zero, $ac2 \n\t"

	170 "mthi $zero, $ac3 \n\t"

	171

	172 "preceu.ph.qbr %[scratch1], %[load1] \n\t"

	173 "preceu.ph.qbr %[p1], %[load2] \n\t"

	174 "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */

	175 "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */

	176

	177 "dpa.w.ph $ac0, %[p1], %[filter45] \n\t"

	178 "dpa.w.ph $ac1, %[p2], %[filter45] \n\t"

	179

	180 "preceu.ph.qbl %[scratch1], %[load1] \n\t"

	181 "preceu.ph.qbl %[p1], %[load2] \n\t"

	182 "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */

	183 "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */

	184

	185 "dpa.w.ph $ac2, %[p1], %[filter45] \n\t"

	186 "dpa.w.ph $ac3, %[p2], %[filter45] \n\t"

	187

	188 "extp %[Temp1], $ac0, 31 \n\t"

	189 "extp %[Temp2], $ac1, 31 \n\t"

	190

	191 "lbu %[scratch1], 0(%[dst_ptr]) \n\t"

	192 "lbu %[scratch2], 1(%[dst_ptr]) \n\t"

	193

	194 "lbux %[store1], %[Temp1](%[cm]) \n\t"

	195 "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */

	196 "extp %[Temp1], $ac2, 31 \n\t"

	197

	198 "lbux %[store2], %[Temp2](%[cm]) \n\t"

	199 "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */

	200 "extp %[Temp2], $ac3, 31 \n\t"

	201 "lbu %[scratch1], 2(%[dst_ptr]) \n\t"

	202

	203 "sb %[store1], 0(%[dst_ptr]) \n\t"

	204 "sb %[store2], 1(%[dst_ptr]) \n\t"

	205 "lbu %[scratch2], 3(%[dst_ptr]) \n\t"

	206

	207 "lbux %[store1], %[Temp1](%[cm]) \n\t"

	208 "lbux %[store2], %[Temp2](%[cm]) \n\t"

	209 "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */

	210 "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */

	211

	212 "sb %[store1], 2(%[dst_ptr]) \n\t"

	213 "sb %[store2], 3(%[dst_ptr]) \n\t"

	214

	215 : [load1] "=&r" (load1), [load2] "=&r" (load2),

	216 [p1] "=&r" (p1), [p2] "=&r" (p2),

	217 [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2),

	218 [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),

	219 [store1] "=&r" (store1), [store2] "=&r" (store2),

	220 [src_ptr] "+r" (src_ptr)

	221 : [filter45] "r" (filter45), [vector4a] "r" (vector4a),

	222 [src_stride] "r" (src_stride), [cm] "r" (cm),

	223 [dst_ptr] "r" (dst_ptr)

	224 );

	225 }

	226

	227 /* Next row... */

	228 src += src_stride;

	229 dst += dst_stride;

	230 }

	231 }

	232

	233 void vp9_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,

	234 uint8_t *dst, ptrdiff_t dst_stride,

	235 const int16_t *filter_x, int x_step_q4,

	236 const int16_t *filter_y, int y_step_q4,

	237 int w, int h) {

	238 if (16 == y_step_q4) {

	239 uint32_t pos = 38;

	240

	241 /* bit positon for extract from acc */

	242 __asm__ __volatile__ (

	243 "wrdsp %[pos], 1 \n\t"

	244 :

	245 : [pos] "r" (pos)

	246 );

	247

	248 vp9_prefetch_store(dst);

	249

	250 switch (w) {

	251 case 4:

	252 case 8:

	253 case 16:

	254 case 32:

	255 convolve_bi_avg_vert_4_dspr2(src, src_stride,

	256 dst, dst_stride,

	257 filter_y, w, h);

	258 break;

	259 case 64:

	260 vp9_prefetch_store(dst + 32);

	261 convolve_bi_avg_vert_64_dspr2(src, src_stride,

	262 dst, dst_stride,

	263 filter_y, h);

	264 break;

	265 default:

	266 vp9_convolve8_avg_vert_c(src, src_stride,

	267 dst, dst_stride,

	268 filter_x, x_step_q4,

	269 filter_y, y_step_q4,

	270 w, h);

	271 break;

	272 }

	273 } else {

	274 vp9_convolve8_avg_vert_c(src, src_stride,

	275 dst, dst_stride,

	276 filter_x, x_step_q4,

	277 filter_y, y_step_q4,

	278 w, h);

	279 }

	280 }

	281 #endif

OLD	NEW