Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: source/scale_msa.cc

Issue 2527983002: Add MSA optimized ARGB scaling functions (Closed)
Patch Set: Changes as per review comments Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_argb.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale_row.h"
12
13 // This module is for GCC MSA
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
15 #include "libyuv/macros_msa.h"
16
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21
22 void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
23 ptrdiff_t src_stride,
24 uint8_t* dst_argb,
25 int dst_width) {
26 int x;
27 v16u8 src0, src1, dst0;
28
29 for (x = 0; x < dst_width; x += 4) {
30 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
31 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
32 dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
33 ST_UB(dst0, dst_argb);
34 src_argb += 32;
35 dst_argb += 16;
36 }
37 }
38
39 void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb,
40 ptrdiff_t src_stride,
41 uint8_t* dst_argb,
42 int dst_width) {
43 int x;
44 v16u8 src0, src1, vec0, vec1, dst0;
45
46 for (x = 0; x < dst_width; x += 4) {
47 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
48 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
49 vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0);
50 vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0);
51 dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1);
52 ST_UB(dst0, dst_argb);
53 src_argb += 32;
54 dst_argb += 16;
55 }
56 }
57
58 void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb,
59 ptrdiff_t src_stride,
60 uint8_t* dst_argb,
61 int dst_width) {
62 int x;
63 const uint8_t* s = src_argb;
64 const uint8_t* t = src_argb + src_stride;
65 v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0;
66 v8u16 reg0, reg1, reg2, reg3;
67 v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15};
68
69 for (x = 0; x < dst_width; x += 4) {
70 src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
71 src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
72 src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
73 src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
74 vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0);
75 vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1);
76 vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2);
77 vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3);
78 reg0 = __msa_hadd_u_h(vec0, vec0);
79 reg1 = __msa_hadd_u_h(vec1, vec1);
80 reg2 = __msa_hadd_u_h(vec2, vec2);
81 reg3 = __msa_hadd_u_h(vec3, vec3);
82 reg0 += reg2;
83 reg1 += reg3;
84 reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2);
85 reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2);
86 dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
87 ST_UB(dst0, dst_argb);
88 s += 32;
89 t += 32;
90 dst_argb += 16;
91 }
92 }
93
94 void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb,
95 ptrdiff_t src_stride,
96 int32_t src_stepx,
97 uint8_t* dst_argb,
98 int dst_width) {
99 int x;
100 int32_t stepx = src_stepx * 4;
101 int32_t data0, data1, data2, data3;
102
103 for (x = 0; x < dst_width; x += 4) {
104 data0 = LW(src_argb);
105 data1 = LW(src_argb + stepx);
106 data2 = LW(src_argb + stepx * 2);
107 data3 = LW(src_argb + stepx * 3);
108 SW(data0, dst_argb);
109 SW(data1, dst_argb + 4);
110 SW(data2, dst_argb + 8);
111 SW(data3, dst_argb + 12);
112 src_argb += stepx * 4;
113 dst_argb += 16;
114 }
115 }
116
117 void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb,
118 ptrdiff_t src_stride,
119 int src_stepx,
120 uint8* dst_argb,
121 int dst_width) {
122 int x;
123 const uint8* nxt_argb = src_argb + src_stride;
124 int32_t stepx = src_stepx * 4;
125 int64_t data0, data1, data2, data3;
126 v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0};
127 v16u8 vec0, vec1, vec2, vec3;
128 v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
129 v16u8 dst0;
130
131 for (x = 0; x < dst_width; x += 4) {
132 data0 = LD(src_argb);
133 data1 = LD(src_argb + stepx);
134 data2 = LD(src_argb + stepx * 2);
135 data3 = LD(src_argb + stepx * 3);
136 src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0);
137 src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1);
138 src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2);
139 src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3);
140 data0 = LD(nxt_argb);
141 data1 = LD(nxt_argb + stepx);
142 data2 = LD(nxt_argb + stepx * 2);
143 data3 = LD(nxt_argb + stepx * 3);
144 src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0);
145 src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1);
146 src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2);
147 src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3);
148 vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
149 vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
150 vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
151 vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
152 reg0 = __msa_hadd_u_h(vec0, vec0);
153 reg1 = __msa_hadd_u_h(vec1, vec1);
154 reg2 = __msa_hadd_u_h(vec2, vec2);
155 reg3 = __msa_hadd_u_h(vec3, vec3);
156 reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0);
157 reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1);
158 reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0);
159 reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1);
160 reg4 += reg6;
161 reg5 += reg7;
162 reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2);
163 reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2);
164 dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
165 ST_UB(dst0, dst_argb);
166 src_argb += stepx * 4;
167 nxt_argb += stepx * 4;
168 dst_argb += 16;
169 }
170 }
171
172 #ifdef __cplusplus
173 } // extern "C"
174 } // namespace libyuv
175 #endif
176
177 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« no previous file with comments | « source/scale_argb.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698