OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); |
216 ST_UB(dst0, dst_u); | 216 ST_UB(dst0, dst_u); |
217 ST_UB(dst1, dst_v); | 217 ST_UB(dst1, dst_v); |
218 src_uyvy += 64; | 218 src_uyvy += 64; |
219 dst_u += 16; | 219 dst_u += 16; |
220 dst_v += 16; | 220 dst_v += 16; |
221 } | 221 } |
222 } | 222 } |
223 | 223 |
224 void ARGB4444ToYRow_MSA(const uint8* src_argb4444, uint8* dst_y, int width) { | |
225 int x; | |
226 const uint16* src_argb4444_u16 = (uint16*) src_argb4444; | |
227 v8u16 src0, src1; | |
228 v8u16 vec0, vec1, vec2, vec3, vec4, vec5; | |
229 v16u8 dst0; | |
230 v8u16 const_0x19 = (v8u16) __msa_ldi_h(0x19); | |
231 v8u16 const_0x81 = (v8u16) __msa_ldi_h(0x81); | |
232 v8u16 const_0x42 = (v8u16) __msa_ldi_h(0x42); | |
233 v8u16 const_0x1080 = (v8u16) __msa_fill_h(0x1080); | |
234 v8u16 const_0x0F = (v8u16) __msa_ldi_h(0x0F); | |
235 | |
236 for (x = 0; x < width; x += 16) { | |
237 LD_UH2(src_argb4444_u16, 8, src0, src1); | |
238 vec0 = src0 & const_0x0F; | |
239 vec1 = src1 & const_0x0F; | |
240 src0 = (v8u16) __msa_srai_h((v8i16) src0, 4); | |
241 src1 = (v8u16) __msa_srai_h((v8i16) src1, 4); | |
242 vec2 = src0 & const_0x0F; | |
243 vec3 = src1 & const_0x0F; | |
244 src0 = (v8u16) __msa_srai_h((v8i16) src0, 4); | |
245 src1 = (v8u16) __msa_srai_h((v8i16) src1, 4); | |
246 vec4 = src0 & const_0x0F; | |
247 vec5 = src1 & const_0x0F; | |
248 vec0 |= (v8u16) __msa_slli_h((v8i16) vec0, 4); | |
249 vec1 |= (v8u16) __msa_slli_h((v8i16) vec1, 4); | |
250 vec2 |= (v8u16) __msa_slli_h((v8i16) vec2, 4); | |
251 vec3 |= (v8u16) __msa_slli_h((v8i16) vec3, 4); | |
252 vec4 |= (v8u16) __msa_slli_h((v8i16) vec4, 4); | |
253 vec5 |= (v8u16) __msa_slli_h((v8i16) vec5, 4); | |
254 vec0 *= const_0x19; | |
fbarchard1
2016/10/14 21:35:16
FYI The YUV to RGB functions now take constants as
manojkumar.bhosale
2016/10/19 11:56:27
OK. will then fix them when changes happens.
| |
255 vec1 *= const_0x19; | |
256 vec2 *= const_0x81; | |
257 vec3 *= const_0x81; | |
258 vec4 *= const_0x42; | |
259 vec5 *= const_0x42; | |
260 vec0 += vec2; | |
261 vec1 += vec3; | |
262 vec0 += vec4; | |
263 vec1 += vec5; | |
264 vec0 += const_0x1080; | |
265 vec1 += const_0x1080; | |
266 vec0 = (v8u16) __msa_srai_h((v8i16) vec0, 8); | |
267 vec1 = (v8u16) __msa_srai_h((v8i16) vec1, 8); | |
268 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
269 ST_UB(dst0, dst_y); | |
270 src_argb4444_u16 += 16; | |
271 dst_y += 16; | |
272 } | |
273 } | |
274 | |
275 void ARGB4444ToUVRow_MSA(const uint8* src_argb4444, | |
276 int src_stride_argb4444, | |
277 uint8* dst_u, uint8* dst_v, int width) { | |
278 int x; | |
279 const uint8* src_argb4444_next = src_argb4444 + src_stride_argb4444; | |
280 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
281 v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; | |
282 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
283 v16u8 dst0, dst1; | |
284 v8u16 const_0x70 = (v8u16) __msa_ldi_h(0x70); | |
285 v8u16 const_0x4A = (v8u16) __msa_ldi_h(0x4A); | |
286 v8u16 const_0x26 = (v8u16) __msa_ldi_h(0x26); | |
287 v8u16 const_0x5E = (v8u16) __msa_ldi_h(0x5E); | |
288 v8u16 const_0x12 = (v8u16) __msa_ldi_h(0x12); | |
289 v8u16 const_0x8080 = (v8u16) __msa_fill_h(0x8080); | |
290 | |
291 for (x = 0; x < width; x += 32) { | |
292 LD_UB4(src_argb4444, 16, src0, src1, src2, src3); | |
293 LD_UB4(src_argb4444_next, 16, src4, src5, src6, src7); | |
294 reg0 = __msa_andi_b(src0, 0x0F); | |
295 reg1 = __msa_andi_b(src1, 0x0F); | |
296 reg2 = __msa_andi_b(src2, 0x0F); | |
297 reg3 = __msa_andi_b(src3, 0x0F); | |
298 reg0 += __msa_andi_b(src4, 0x0F); | |
299 reg1 += __msa_andi_b(src5, 0x0F); | |
300 reg2 += __msa_andi_b(src6, 0x0F); | |
301 reg3 += __msa_andi_b(src7, 0x0F); | |
302 src0 = __msa_andi_b(src0, 0xF0); | |
303 src1 = __msa_andi_b(src1, 0xF0); | |
304 src2 = __msa_andi_b(src2, 0xF0); | |
305 src3 = __msa_andi_b(src3, 0xF0); | |
306 src4 = __msa_andi_b(src4, 0xF0); | |
307 src5 = __msa_andi_b(src5, 0xF0); | |
308 src6 = __msa_andi_b(src6, 0xF0); | |
309 src7 = __msa_andi_b(src7, 0xF0); | |
310 reg4 = (v16u8) __msa_srli_b((v16i8) src0, 4); | |
311 reg5 = (v16u8) __msa_srli_b((v16i8) src1, 4); | |
312 reg6 = (v16u8) __msa_srli_b((v16i8) src2, 4); | |
313 reg7 = (v16u8) __msa_srli_b((v16i8) src3, 4); | |
314 reg4 += (v16u8) __msa_srli_b((v16i8) src4, 4); | |
315 reg5 += (v16u8) __msa_srli_b((v16i8) src5, 4); | |
316 reg6 += (v16u8) __msa_srli_b((v16i8) src6, 4); | |
317 reg7 += (v16u8) __msa_srli_b((v16i8) src7, 4); | |
318 reg8 = (v16u8) __msa_pckod_b((v16i8) reg1, (v16i8) reg0); | |
319 reg9 = (v16u8) __msa_pckod_b((v16i8) reg3, (v16i8) reg2); | |
320 reg0 = (v16u8) __msa_pckev_b((v16i8) reg1, (v16i8) reg0); | |
321 reg1 = (v16u8) __msa_pckev_b((v16i8) reg3, (v16i8) reg2); | |
322 reg2 = (v16u8) __msa_pckev_b((v16i8) reg5, (v16i8) reg4); | |
323 reg3 = (v16u8) __msa_pckev_b((v16i8) reg7, (v16i8) reg6); | |
324 vec0 = __msa_hadd_u_h(reg0, reg0); | |
325 vec1 = __msa_hadd_u_h(reg1, reg1); | |
326 vec2 = __msa_hadd_u_h(reg2, reg2); | |
327 vec3 = __msa_hadd_u_h(reg3, reg3); | |
328 vec4 = __msa_hadd_u_h(reg8, reg8); | |
329 vec5 = __msa_hadd_u_h(reg9, reg9); | |
330 vec0 = (v8u16) __msa_slli_h((v8i16) vec0, 2); | |
331 vec1 = (v8u16) __msa_slli_h((v8i16) vec1, 2); | |
332 vec2 = (v8u16) __msa_slli_h((v8i16) vec2, 2); | |
333 vec3 = (v8u16) __msa_slli_h((v8i16) vec3, 2); | |
334 vec4 = (v8u16) __msa_slli_h((v8i16) vec4, 2); | |
335 vec5 = (v8u16) __msa_slli_h((v8i16) vec5, 2); | |
336 vec0 |= (v8u16) __msa_srai_h((v8i16) vec0, 6); | |
337 vec1 |= (v8u16) __msa_srai_h((v8i16) vec1, 6); | |
338 vec2 |= (v8u16) __msa_srai_h((v8i16) vec2, 6); | |
339 vec3 |= (v8u16) __msa_srai_h((v8i16) vec3, 6); | |
340 vec4 |= (v8u16) __msa_srai_h((v8i16) vec4, 6); | |
341 vec5 |= (v8u16) __msa_srai_h((v8i16) vec5, 6); | |
fbarchard1
2016/10/14 21:35:16
I'm concerned that this is a lot of code for a for
manojkumar.bhosale
2016/10/19 11:56:27
Done.
| |
342 vec6 = vec0 * const_0x70; | |
343 vec7 = vec1 * const_0x70; | |
344 vec8 = vec2 * const_0x4A; | |
345 vec9 = vec3 * const_0x4A; | |
346 vec0 *= const_0x12; | |
347 vec1 *= const_0x12; | |
348 vec2 *= const_0x5E; | |
349 vec3 *= const_0x5E; | |
350 vec6 += const_0x8080; | |
351 vec7 += const_0x8080; | |
352 vec8 += vec4 * const_0x26; | |
353 vec9 += vec5 * const_0x26; | |
354 vec4 *= const_0x70; | |
355 vec5 *= const_0x70; | |
356 vec2 += vec0; | |
357 vec3 += vec1; | |
358 vec4 += const_0x8080; | |
359 vec5 += const_0x8080; | |
360 vec0 = vec6 - vec8; | |
361 vec1 = vec7 - vec9; | |
362 vec2 = vec4 - vec2; | |
363 vec3 = vec5 - vec3; | |
364 vec0 = (v8u16) __msa_srli_h((v8i16) vec0, 8); | |
365 vec1 = (v8u16) __msa_srli_h((v8i16) vec1, 8); | |
366 vec2 = (v8u16) __msa_srli_h((v8i16) vec2, 8); | |
367 vec3 = (v8u16) __msa_srli_h((v8i16) vec3, 8); | |
368 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
369 dst1 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
370 ST_UB(dst0, dst_u); | |
371 ST_UB(dst1, dst_v); | |
372 src_argb4444 += 64; | |
373 src_argb4444_next += 64; | |
fbarchard1
2016/10/14 21:35:16
on other platforms I'd typically unroll less than
| |
374 dst_u += 16; | |
375 dst_v += 16; | |
376 } | |
377 } | |
378 | |
379 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, | |
380 int width) { | |
381 int x; | |
382 v16u8 src0, src1; | |
383 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; | |
384 v16u8 dst0, dst1, dst2, dst3; | |
385 | |
386 for (x = 0; x < width; x += 16) { | |
387 LD_UB2(src_argb4444, 16, src0, src1); | |
388 vec0 = (v8u16) __msa_andi_b(src0, 0x0F); | |
389 vec1 = (v8u16) __msa_andi_b(src1, 0x0F); | |
390 vec2 = (v8u16) __msa_andi_b(src0, 0xF0); | |
391 vec3 = (v8u16) __msa_andi_b(src1, 0xF0); | |
392 vec4 = (v8u16) __msa_slli_b((v16i8) vec0, 4); | |
393 vec5 = (v8u16) __msa_slli_b((v16i8) vec1, 4); | |
394 vec6 = (v8u16) __msa_srli_b((v16i8) vec2, 4); | |
395 vec7 = (v8u16) __msa_srli_b((v16i8) vec3, 4); | |
396 vec0 |= vec4; | |
397 vec1 |= vec5; | |
398 vec2 |= vec6; | |
399 vec3 |= vec7; | |
400 dst0 = (v16u8) __msa_ilvr_b((v16i8) vec2, (v16i8) vec0); | |
401 dst1 = (v16u8) __msa_ilvl_b((v16i8) vec2, (v16i8) vec0); | |
402 dst2 = (v16u8) __msa_ilvr_b((v16i8) vec3, (v16i8) vec1); | |
403 dst3 = (v16u8) __msa_ilvl_b((v16i8) vec3, (v16i8) vec1); | |
404 ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); | |
405 src_argb4444 += 32; | |
406 dst_argb += 64; | |
407 } | |
408 } | |
409 | |
224 #ifdef __cplusplus | 410 #ifdef __cplusplus |
225 } // extern "C" | 411 } // extern "C" |
226 } // namespace libyuv | 412 } // namespace libyuv |
227 #endif | 413 #endif |
228 | 414 |
229 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 415 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |