Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(409)

Side by Side Diff: third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef WebGLImageConversionMSA_h 5 #ifndef WebGLImageConversionMSA_h
6 #define WebGLImageConversionMSA_h 6 #define WebGLImageConversionMSA_h
7 7
8 #if HAVE(MIPS_MSA_INTRINSICS) 8 #if HAVE(MIPS_MSA_INTRINSICS)
9 9
10 #include "platform/cpu/mips/CommonMacrosMSA.h" 10 #include "platform/cpu/mips/CommonMacrosMSA.h"
11 11
12 namespace blink { 12 namespace blink {
13 13
14 namespace SIMD { 14 namespace SIMD {
15 15
16 #define SEPERATE_RGBA_FRM_16BIT_5551INPUT(in, out_r, out_g, out_b, out_a) \ 16 #define SEPERATE_RGBA_FRM_16BIT_5551INPUT(in, out_r, out_g, out_b, out_a) \
17 cnst31 = (v8u16)__msa_ldi_h(0x1F); \ 17 cnst31 = (v8u16)__msa_ldi_h(0x1F); \
18 cnst7 = (v8u16)__msa_ldi_h(0x7); \ 18 cnst7 = (v8u16)__msa_ldi_h(0x7); \
19 cnst1 = (v8u16)__msa_ldi_h(0x1); \ 19 cnst1 = (v8u16)__msa_ldi_h(0x1); \
20 out_r = (v8u16)SRLI_H(in, 11); \ 20 out_r = (v8u16)SRLI_H(in, 11); \
21 out_g = ((v8u16)SRLI_H(in, 6)) & cnst31; \ 21 out_g = ((v8u16)SRLI_H(in, 6)) & cnst31; \
22 out_b = ((v8u16)SRLI_H(in, 1)) & cnst31; \ 22 out_b = ((v8u16)SRLI_H(in, 1)) & cnst31; \
23 out_a = in & cnst1; \ 23 out_a = in & cnst1; \
24 out_r = ((v8u16)SLLI_H(out_r, 3)) | (out_r & cnst7); \ 24 out_r = ((v8u16)SLLI_H(out_r, 3)) | (out_r & cnst7); \
25 out_g = ((v8u16)SLLI_H(out_g, 3)) | (out_g & cnst7); \ 25 out_g = ((v8u16)SLLI_H(out_g, 3)) | (out_g & cnst7); \
26 out_b = ((v8u16)SLLI_H(out_b, 3)) | (out_b & cnst7); \ 26 out_b = ((v8u16)SLLI_H(out_b, 3)) | (out_b & cnst7); \
27 out_a = (v8u16)CEQI_H((v8i16)out_a, 1); \ 27 out_a = (v8u16)CEQI_H((v8i16)out_a, 1); \
28 28
29 #define SEPERATE_RGBA_FRM_16BIT_4444INPUT(in, out_rb, out_ga) \
30 out_rb = (v16u8)SRLI_B((v16u8)in, 4); \
31 out_ga = ANDI_B((v16u8)in, 15); \
32 out_rb = ((v16u8)SLLI_B(out_rb, 4)) | out_rb; \
33 out_ga = ((v16u8)SLLI_B(out_ga, 4)) | out_ga; \
34
29 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uin t8_t*& destination, unsigned& pixelsPerRow) 35 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uin t8_t*& destination, unsigned& pixelsPerRow)
30 { 36 {
31 unsigned i; 37 unsigned i;
32 v8u16 src0, src1, src2, src3; 38 v8u16 src0, src1, src2, src3;
33 v8u16 src0r, src0g, src0b, src0a, src1r, src1g, src1b, src1a; 39 v8u16 src0r, src0g, src0b, src0a, src1r, src1g, src1b, src1a;
34 v8u16 src2r, src2g, src2b, src2a, src3r, src3g, src3b, src3a; 40 v8u16 src2r, src2g, src2b, src2a, src3r, src3g, src3b, src3a;
35 v8u16 cnst31, cnst7, cnst1; 41 v8u16 cnst31, cnst7, cnst1;
36 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 42 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
37 v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; 43 v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
38 v16u8 out0, out1, out2, out3, out4, out5, out6, out7; 44 v16u8 out0, out1, out2, out3, out4, out5, out6, out7;
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 ILVRL_B2_UB(src0g, src0r, dst0, dst1); 98 ILVRL_B2_UB(src0g, src0r, dst0, dst1);
93 ILVRL_B2_UB(src0a, src0b, dst2, dst3); 99 ILVRL_B2_UB(src0a, src0b, dst2, dst3);
94 ILVEV_H2_UB(dst0, dst2, dst1, dst3, out0, out1); 100 ILVEV_H2_UB(dst0, dst2, dst1, dst3, out0, out1);
95 ST_UB2(out0, out1, destination, 16); 101 ST_UB2(out0, out1, destination, 16);
96 } 102 }
97 } 103 }
98 104
99 pixelsPerRow &= 7; 105 pixelsPerRow &= 7;
100 } 106 }
101 107
108 ALWAYS_INLINE void unpackOneRowOfBGRA8LittleToRGBA8MSA(const uint32_t*& source, uint32_t*& destination, unsigned& pixelsPerRow)
109 {
110 unsigned i;
111 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
112 v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
113
114 for (i = (pixelsPerRow >> 6); i--;) {
115 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
116 LD_UB8(source, 4, src8, src9, src10, src11, src12, src13, src14, src15);
117 SHF_B4_UB(src0, src1, src2, src3, 198);
118 SHF_B4_UB(src4, src5, src6, src7, 198);
119 SHF_B4_UB(src8, src9, src10, src11, 198);
120 SHF_B4_UB(src12, src13, src14, src15, 198);
121 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
122 ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, destination , 4);
123 }
124
125 if (pixelsPerRow & 63) {
126 if (pixelsPerRow & 32) {
127 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
128 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7 );
129 LD_UB6(source, 4, src8, src9, src10, src11, src12, src13);
130 SHF_B4_UB(src0, src1, src2, src3, 198);
131 SHF_B4_UB(src4, src5, src6, src7, 198);
132 SHF_B4_UB(src8, src9, src10, src11, 198);
133 SHF_B2_UB(src12, src13, 198);
134 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati on, 4);
135 ST_UB6(src8, src9, src10, src11, src12, src13, destination, 4);
136 } else if (pixelsPerRow & 16) {
137 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7 );
138 LD_UB4(source, 4, src8, src9, src10, src11);
139 SHF_B4_UB(src0, src1, src2, src3, 198);
140 SHF_B4_UB(src4, src5, src6, src7, 198);
141 SHF_B4_UB(src8, src9, src10, src11, 198);
142 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati on, 4);
143 ST_UB4(src8, src9, src10, src11, destination, 4);
144 } else if (pixelsPerRow & 8) {
145 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7 );
146 LD_UB2(source, 4, src8, src9);
147 SHF_B4_UB(src0, src1, src2, src3, 198);
148 SHF_B4_UB(src4, src5, src6, src7, 198);
149 SHF_B2_UB(src8, src9, 198);
150 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati on, 4);
151 ST_UB2(src8, src9, destination, 4);
152 } else {
153 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7 );
154 SHF_B4_UB(src0, src1, src2, src3, 198);
155 SHF_B4_UB(src4, src5, src6, src7, 198);
156 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati on, 4);
157 }
158 } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
159 LD_UB6(source, 4, src0, src1, src2, src3, src4, src5);
160 SHF_B4_UB(src0, src1, src2, src3, 198);
161 SHF_B2_UB(src4, src5, 198);
162 ST_UB6(src0, src1, src2, src3, src4, src5, destination, 4);
163 } else if (pixelsPerRow & 16) {
164 LD_UB4(source, 4, src0, src1, src2, src3);
165 SHF_B4_UB(src0, src1, src2, src3, 198);
166 ST_UB4(src0, src1, src2, src3, destination, 4);
167 } else if (pixelsPerRow & 8) {
168 LD_UB2(source, 4, src0, src1);
169 SHF_B2_UB(src0, src1, 198);
170 ST_UB2(src0, src1, destination, 4);
171 }
172
173 if (pixelsPerRow & 4) {
174 src0 = LD_UB(source);
175 source += 4;
176 src0 = (v16u8)__msa_shf_b((v16i8)src0, 198);
177 ST_UB(src0, destination);
178 destination += 4;
179 }
180 }
181
182 pixelsPerRow &= 3;
183 }
184
185 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8MSA(const uint16_t*& source, uin t8_t*& destination, unsigned& pixelsPerRow)
186 {
187 unsigned i;
188 v8u16 src0, src1, src2, src3;
189 v16u8 src0rb, src0ga, src1rb, src1ga, src2rb, src2ga, src3rb, src3ga;
190 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
191 v16u8 out0, out1, out2, out3, out4, out5, out6, out7;
192
193 for (i = (pixelsPerRow >> 5); i--;) {
194 LD_UH4(source, 8, src0, src1, src2, src3);
195 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
196 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
197 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga);
198 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src3, src3rb, src3ga);
199 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
200 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
201 ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5);
202 ILVODEV_B2_UB(src3ga, src3rb, dst6, dst7);
203 ILVRL_H2_UB(dst1, dst0, out0, out1);
204 ILVRL_H2_UB(dst3, dst2, out2, out3);
205 ILVRL_H2_UB(dst5, dst4, out4, out5);
206 ILVRL_H2_UB(dst7, dst6, out6, out7);
207 ST_UB8(out0, out1, out2, out3, out4, out5, out6, out7, destination, 16);
208 }
209
210 if (pixelsPerRow & 31) {
211 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
212 LD_UH3(source, 8, src0, src1, src2);
213 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
214 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
215 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga);
216 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
217 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
218 ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5);
219 ILVRL_H2_UB(dst1, dst0, out0, out1);
220 ILVRL_H2_UB(dst3, dst2, out2, out3);
221 ILVRL_H2_UB(dst5, dst4, out4, out5);
222 ST_UB6(out0, out1, out2, out3, out4, out5, destination, 16);
223 } else if (pixelsPerRow & 16) {
224 LD_UH2(source, 8, src0, src1);
225 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
226 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
227 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
228 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
229 ILVRL_H2_UB(dst1, dst0, out0, out1);
230 ILVRL_H2_UB(dst3, dst2, out2, out3);
231 ST_UB4(out0, out1, out2, out3, destination, 16);
232 } else if (pixelsPerRow & 8) {
233 src0 = LD_UH(source);
234 source += 8;
235 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
236 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
237 ILVRL_H2_UB(dst1, dst0, out0, out1);
238 ST_UB2(out0, out1, destination, 16);
239 }
240 }
241
242 pixelsPerRow &= 7;
243 }
244
245 ALWAYS_INLINE void packOneRowOfRGBA8LittleToRGBA8MSA(const uint8_t*& source, uin t8_t*& destination, unsigned& pixelsPerRow)
246 {
247 unsigned i;
248 v16u8 src0, src1, src2, src3, out0, out1, out2, out3;
249 v16u8 src0R, src1R, src2R, src3R, src0G, src1G, src2G, src3G;
250 v16u8 src0B, src1B, src2B, src3B, src0A, src1A, src2A, src3A;
251 v16u8 dst0R, dst1R, dst2R, dst3R, dst0G, dst1G, dst2G, dst3G;
252 v16u8 dst0B, dst1B, dst2B, dst3B, dst0A, dst1A, dst2A, dst3A;
253 v16u8 dst0RG, dst1RG, dst2RG, dst3RG, dst0BA, dst1BA, dst2BA, dst3BA;
254 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc0G, fsrc1G, fsrc2G, fsrc3G;
255 v4f32 fsrc0B, fsrc1B, fsrc2B, fsrc3B, fsrc0A, fsrc1A, fsrc2A, fsrc3A;
256 v4u32 vCnst255 = (v4u32) __msa_ldi_w(255);
257 v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255};
258 v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255);
259
260 for (i = (pixelsPerRow >> 4); i--;) {
261 LD_UB4(source, 16, src0, src1, src2, src3);
262 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
263 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
264 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
265 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
266 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
267 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, sr c3A);
268 src0A = SLDI_UB(src0A, src0A, 3);
269 src1A = SLDI_UB(src1A, src1A, 3);
270 src2A = SLDI_UB(src2A, src2A, 3);
271 src3A = SLDI_UB(src3A, src3A, 3);
272 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A) ;
273 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
274 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
275 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R) ;
276 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fsr c0R, fsrc1R, fsrc2R, fsrc3R);
277 src0G = SLDI_UB(src0, src0, 1);
278 src1G = SLDI_UB(src1, src1, 1);
279 src2G = SLDI_UB(src2, src2, 1);
280 src3G = SLDI_UB(src3, src3, 1);
281 AND_V4_UB(src0G, src1G, src2G, src3G, vCnst255, src0G, src1G, src2G, src 3G);
282 FFINTU_W4_SP(src0G, src1G, src2G, src3G, fsrc0G, fsrc1G, fsrc2G, fsrc3G) ;
283 MUL4(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc2G, fsrc2A, fsrc3G, fsrc3A, fsr c0G, fsrc1G, fsrc2G, fsrc3G);
284 src0B = SLDI_UB(src0, src0, 2);
285 src1B = SLDI_UB(src1, src1, 2);
286 src2B = SLDI_UB(src2, src2, 2);
287 src3B = SLDI_UB(src3, src3, 2);
288 AND_V4_UB(src0B, src1B, src2B, src3B, vCnst255, src0B, src1B, src2B, src 3B);
289 FFINTU_W4_SP(src0B, src1B, src2B, src3B, fsrc0B, fsrc1B, fsrc2B, fsrc3B) ;
290 MUL4(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc2B, fsrc2A, fsrc3B, fsrc3A, fsr c0B, fsrc1B, fsrc2B, fsrc3B);
291 FTRUNCU_W4_UB(fsrc0R, fsrc1R, fsrc2R, fsrc3R, dst0R, dst1R, dst2R, dst3R );
292 FTRUNCU_W4_UB(fsrc0G, fsrc1G, fsrc2G, fsrc3G, dst0G, dst1G, dst2G, dst3G );
293 FTRUNCU_W4_UB(fsrc0B, fsrc1B, fsrc2B, fsrc3B, dst0B, dst1B, dst2B, dst3B );
294 dst0A = SLDI_UB(src0, src0, 3);
295 dst1A = SLDI_UB(src1, src1, 3);
296 dst2A = SLDI_UB(src2, src2, 3);
297 dst3A = SLDI_UB(src3, src3, 3);
298 ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG);
299 ILVEV_B2_UB(dst2R, dst2G, dst3R, dst3G, dst2RG, dst3RG);
300 ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA);
301 ILVEV_B2_UB(dst2B, dst2A, dst3B, dst3A, dst2BA, dst3BA);
302 ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1);
303 ILVEV_H2_UB(dst2RG, dst2BA, dst3RG, dst3BA, out2, out3);
304 ST_UB4(out0, out1, out2, out3, destination, 16);
305 }
306
307 if (pixelsPerRow & 15) {
308 if (pixelsPerRow & 8) {
309 LD_UB2(source, 16, src0, src1);
310 CEQI_B2_UB(src0, src1, 0, src0A, src1A);
311 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
312 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
313 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A);
314 src0A = SLDI_UB(src0A, src0A, 3);
315 src1A = SLDI_UB(src1A, src1A, 3);
316 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A);
317 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A);
318 AND_V2_UB(src0, src1, vCnst255, src0R, src1R);
319 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R);
320 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc0R, fsrc1R);
321 src0G = SLDI_UB(src0, src0, 1);
322 src1G = SLDI_UB(src1, src1, 1);
323 AND_V2_UB(src0G, src1G, vCnst255, src0G, src1G);
324 FFINTU_W2_SP(src0G, src1G, fsrc0G, fsrc1G);
325 MUL2(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc0G, fsrc1G);
326 src0B = SLDI_UB(src0, src0, 2);
327 src1B = SLDI_UB(src1, src1, 2);
328 AND_V2_UB(src0B, src1B, vCnst255, src0B, src1B);
329 FFINTU_W2_SP(src0B, src1B, fsrc0B, fsrc1B);
330 MUL2(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc0B, fsrc1B);
331 FTRUNCU_W2_UB(fsrc0R, fsrc1R, dst0R, dst1R);
332 FTRUNCU_W2_UB(fsrc0G, fsrc1G, dst0G, dst1G);
333 FTRUNCU_W2_UB(fsrc0B, fsrc1B, dst0B, dst1B);
334 dst0A = SLDI_UB(src0, src0, 3);
335 dst1A = SLDI_UB(src1, src1, 3);
336 ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG);
337 ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA);
338 ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1);
339 ST_UB2(out0, out1, destination, 16);
340 }
341
342 if (pixelsPerRow & 4) {
343 src0 = LD_UB(source);
344 source += 16;
345 src0A = CEQI_B(src0, 0);
346 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
347 src0A = src0A & alphaMask;
348 src0A = SLDI_UB(src0A, src0A, 3);
349 fsrc0A = __msa_ffint_u_w((v4u32)src0A);
350 fsrc0A = vfCnst255 / fsrc0A;
351 src0R = src0 & (v16u8)vCnst255;
352 fsrc0R = __msa_ffint_u_w((v4u32)src0R);
353 fsrc0R *= fsrc0A;
354 src0G = SLDI_UB(src0, src0, 1);
355 src0G &= (v16u8)vCnst255;
356 fsrc0G = __msa_ffint_u_w((v4u32)src0G);
357 fsrc0G *= fsrc0A;
358 src0B = SLDI_UB(src0, src0, 2);
359 src0B &= (v16u8)vCnst255;
360 fsrc0B = __msa_ffint_u_w((v4u32)src0B);
361 fsrc0B *= fsrc0A;
362 dst0R = (v16u8)__msa_ftrunc_u_w(fsrc0R);
363 dst0G = (v16u8)__msa_ftrunc_u_w(fsrc0G);
364 dst0B = (v16u8)__msa_ftrunc_u_w(fsrc0B);
365 dst0A = SLDI_UB(src0, src0, 3);
366 dst0RG = (v16u8)__msa_ilvev_b((v16i8)dst0G, (v16i8)dst0R);
367 dst0BA = (v16u8)__msa_ilvev_b((v16i8)dst0A, (v16i8)dst0B);
368 out0 = (v16u8)__msa_ilvev_h((v8i16)dst0BA, (v8i16)dst0RG);
369 ST_UB(out0, destination);
370 destination += 16;
371 }
372 }
373
374 pixelsPerRow &= 3;
375 }
376
377 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551MSA(const uint8_t*& sourc e, uint16_t*& destination, unsigned& pixelsPerRow)
378 {
379 unsigned i;
380 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
381 v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b;
382 v16u8 src0g = { 0 }, src0a = { 0 }, src1g = { 0 }, src1a = { 0 };
383 v16u8 src2g = { 0 }, src2a = { 0 }, src3g = { 0 }, src3a = { 0 };
384 v16u8 src0gt, src1gt, src2gt, src3gt;
385 v8u16 dst0, dst1, dst2, dst3;
386
387 for (i = (pixelsPerRow >> 5); i--;) {
388 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
389 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r , src2r, src3r);
390 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b , src2b, src3b);
391 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
392 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
393 SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1);
394 SLDI_B2_UB(src2a, src3a, src2b, src3b, src2a, src3a, 1);
395 src0gt = (v16u8)SLLI_B(src0g, 3);
396 src1gt = (v16u8)SLLI_B(src1g, 3);
397 src2gt = (v16u8)SLLI_B(src2g, 3);
398 src3gt = (v16u8)SLLI_B(src3g, 3);
399 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
400 SRLI_B4_UB(src0b, src1b, src2b, src3b, 2);
401 SRLI_B4_UB(src0a, src1a, src2a, src3a, 7);
402 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
403 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
404 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5);
405 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 5);
406 BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0);
407 BINSRI_B2_UB(src2b, src2a, src3b, src3a, src2b, src3b, 0);
408 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
409 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
410 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
411 }
412
413 if (pixelsPerRow & 31) {
414 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
415 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
416 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r) ;
417 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b) ;
418 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
419 SLDI_B2_UB(src2g, src0a, src2r, src0b, src2g, src0a, 1);
420 SLDI_B2_UB(src1a, src2a, src1b, src2b, src1a, src2a, 1);
421 src0gt = (v16u8)SLLI_B(src0g, 3);
422 src1gt = (v16u8)SLLI_B(src1g, 3);
423 src2gt = (v16u8)SLLI_B(src2g, 3);
424 SRLI_B3_UB(src0g, src1g, src2g, 5);
425 SRLI_B3_UB(src0b, src1b, src2b, 2);
426 SRLI_B3_UB(src0a, src1a, src2a, 7);
427 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r, src2r, 2);
428 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src 1b, src2b, 5);
429 BINSRI_B3_UB(src0b, src0a, src1b, src1a, src2b, src2a, src0b, src1b, src2b, 0);
430 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, ds t2);
431 ST_UH3(dst0, dst1, dst2, destination, 8);
432 } else if (pixelsPerRow & 16) {
433 LD_UB4(source, 16, src0, src1, src2, src3);
434 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
435 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
436 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
437 SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1);
438 src0gt = (v16u8)SLLI_B(src0g, 3);
439 src1gt = (v16u8)SLLI_B(src1g, 3);
440 SRLI_B2_UB(src0g, src1g, 5);
441 SRLI_B2_UB(src0b, src1b, 2);
442 SRLI_B2_UB(src0a, src1a, 7);
443 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
444 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5);
445 BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0);
446 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
447 ST_UH2(dst0, dst1, destination, 8);
448 } else if (pixelsPerRow & 8) {
449 LD_UB2(source, 16, src0, src1);
450 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
451 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
452 SLDI_B2_UB(src0g, src0a, src0r, src0b, src0g, src0a, 1);
453 src0gt = (v16u8)SLLI_B(src0g, 3);
454 src0g = (v16u8)SRLI_B(src0g, 5);
455 src0b = (v16u8)SRLI_B(src0b, 2);
456 src0a = (v16u8)SRLI_B(src0a, 7);
457 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
458 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 5);
459 src0b = (v16u8)__msa_binsri_b((v16u8)src0b, (v16u8)src0a, 0);
460 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
461 ST_UH(dst0, destination);
462 destination += 8;
463 }
464 }
465
466 pixelsPerRow &= 7;
467 }
468
469 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565MSA(const uint8_t*& source , uint16_t*& destination, unsigned& pixelsPerRow)
470 {
471 unsigned i;
472 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
473 v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b;
474 v16u8 src0g = { 0 }, src1g = { 0 }, src2g = { 0 }, src3g = { 0 };
475 v16u8 src0gt, src1gt, src2gt, src3gt;
476 v8u16 dst0, dst1, dst2, dst3;
477
478 for (i = (pixelsPerRow >> 6); i--;) {
479 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
480 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r , src2r, src3r);
481 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b , src2b, src3b);
482 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
483 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
484 src0gt = (v16u8)SLLI_B(src0g, 3);
485 src1gt = (v16u8)SLLI_B(src1g, 3);
486 src2gt = (v16u8)SLLI_B(src2g, 3);
487 src3gt = (v16u8)SLLI_B(src3g, 3);
488 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
489 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
490 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
491 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
492 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
493 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
494 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
495 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
496 LD_UB4(source, 16, src0, src1, src2, src3);
497 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
498 LD_UB4(source, 16, src4, src5, src6, src7);
499 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r , src2r, src3r);
500 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b , src2b, src3b);
501 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
502 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
503 src0gt = (v16u8)SLLI_B(src0g, 3);
504 src1gt = (v16u8)SLLI_B(src1g, 3);
505 src2gt = (v16u8)SLLI_B(src2g, 3);
506 src3gt = (v16u8)SLLI_B(src3g, 3);
507 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
508 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
509 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
510 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
511 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
512 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
513 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
514 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
515 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
516 }
517
518 if (pixelsPerRow & 63) {
519 if (pixelsPerRow & 32) {
520 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
521 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src 7);
522 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 r, src1r, src2r, src3r);
523 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 b, src1b, src2b, src3b);
524 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
525 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
526 src0gt = (v16u8)SLLI_B(src0g, 3);
527 src1gt = (v16u8)SLLI_B(src1g, 3);
528 src2gt = (v16u8)SLLI_B(src2g, 3);
529 src3gt = (v16u8)SLLI_B(src3g, 3);
530 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
531 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
532 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
533 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
534 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
535 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
536 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
537 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
538 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
539 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
540 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, sr c2r);
541 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, sr c2b);
542 src0g = SLDI_UB(src0g, src0r, 1);
543 src1g = SLDI_UB(src1g, src1r, 1);
544 src2g = SLDI_UB(src2g, src2r, 1);
545 src0gt = (v16u8)SLLI_B(src0g, 3);
546 src1gt = (v16u8)SLLI_B(src1g, 3);
547 src2gt = (v16u8)SLLI_B(src2g, 3);
548 SRLI_B3_UB(src0g, src1g, src2g, 5);
549 SRLI_B3_UB(src0b, src1b, src2b, 3);
550 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, sr c1r, src2r, 2);
551 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src1b, src2b, 4);
552 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1 , dst2);
553 ST_UH3(dst0, dst1, dst2, destination, 8);
554 } else if (pixelsPerRow & 16) {
555 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src 7);
556 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 r, src1r, src2r, src3r);
557 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 b, src1b, src2b, src3b);
558 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
559 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
560 src0gt = (v16u8)SLLI_B(src0g, 3);
561 src1gt = (v16u8)SLLI_B(src1g, 3);
562 src2gt = (v16u8)SLLI_B(src2g, 3);
563 src3gt = (v16u8)SLLI_B(src3g, 3);
564 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
565 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
566 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
567 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
568 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
569 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
570 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
571 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
572 LD_UB4(source, 16, src0, src1, src2, src3);
573 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
574 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
575 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
576 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
577 src0gt = (v16u8)SLLI_B(src0g, 3);
578 src1gt = (v16u8)SLLI_B(src1g, 3);
579 SRLI_B2_UB(src0g, src1g, 5);
580 SRLI_B2_UB(src0b, src1b, 3);
581 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
582 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
583 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
584 ST_UH2(dst0, dst1, destination, 8);
585 } else if (pixelsPerRow & 8) {
586 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src 7);
587 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 r, src1r, src2r, src3r);
588 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 b, src1b, src2b, src3b);
589 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
590 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
591 src0gt = (v16u8)SLLI_B(src0g, 3);
592 src1gt = (v16u8)SLLI_B(src1g, 3);
593 src2gt = (v16u8)SLLI_B(src2g, 3);
594 src3gt = (v16u8)SLLI_B(src3g, 3);
595 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
596 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
597 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
598 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
599 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
600 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
601 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
602 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
603 LD_UB2(source, 16, src0, src1);
604 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
605 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
606 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
607 src0g = SLDI_UB(src0g, src0r, 1);
608 src0gt = (v16u8)SLLI_B(src0g, 3);
609 src0g = (v16u8)SRLI_B(src0g, 5);
610 src0b = (v16u8)SRLI_B(src0b, 3);
611 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
612 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4);
613 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
614 ST_UH(dst0, destination);
615 destination += 8;
616 } else {
617 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src 7);
618 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 r, src1r, src2r, src3r);
619 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0 b, src1b, src2b, src3b);
620 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
621 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
622 src0gt = (v16u8)SLLI_B(src0g, 3);
623 src1gt = (v16u8)SLLI_B(src1g, 3);
624 src2gt = (v16u8)SLLI_B(src2g, 3);
625 src3gt = (v16u8)SLLI_B(src3g, 3);
626 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
627 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
628 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
629 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
630 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
631 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
632 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
633 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
634 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
635 }
636 } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
637 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
638 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r) ;
639 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b) ;
640 src0g = SLDI_UB(src0g, src0r, 1);
641 src1g = SLDI_UB(src1g, src1r, 1);
642 src2g = SLDI_UB(src2g, src2r, 1);
643 src0gt = (v16u8)SLLI_B(src0g, 3);
644 src1gt = (v16u8)SLLI_B(src1g, 3);
645 src2gt = (v16u8)SLLI_B(src2g, 3);
646 SRLI_B3_UB(src0g, src1g, src2g, 5);
647 SRLI_B3_UB(src0b, src1b, src2b, 3);
648 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r, src2r, 2);
649 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src 1b, src2b, 4);
650 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, ds t2);
651 ST_UH3(dst0, dst1, dst2, destination, 8);
652 } else if (pixelsPerRow & 16) {
653 LD_UB4(source, 16, src0, src1, src2, src3);
654 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
655 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
656 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
657 src0gt = (v16u8)SLLI_B(src0g, 3);
658 src1gt = (v16u8)SLLI_B(src1g, 3);
659 SRLI_B2_UB(src0g, src1g, 5);
660 SRLI_B2_UB(src0b, src1b, 3);
661 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
662 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
663 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
664 ST_UH2(dst0, dst1, destination, 8);
665 } else if (pixelsPerRow & 8) {
666 LD_UB2(source, 16, src0, src1);
667 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
668 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
669 src0g = SLDI_UB(src0g, src0r, 1);
670 src0gt = (v16u8)SLLI_B(src0g, 3);
671 src0g = (v16u8)SRLI_B(src0g, 5);
672 src0b = (v16u8)SRLI_B(src0b, 3);
673 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
674 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4);
675 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
676 ST_UH(dst0, destination);
677 destination += 8;
678 }
679 }
680
681 pixelsPerRow &= 7;
682 }
102 } // namespace SIMD 683 } // namespace SIMD
103 684
104 } // namespace blink 685 } // namespace blink
105 686
106 #endif // HAVE(MIPS_MSA_INTRINSICS) 687 #endif // HAVE(MIPS_MSA_INTRINSICS)
107 688
108 #endif // WebGLImageConversionMSA_h 689 #endif // WebGLImageConversionMSA_h
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698