src/opts/SkBlitRow_opts_mips_dsp.cpp - Issue 326913004: MIPS: added optimization for functions from SkBlitRow.

Side by Side Diff: src/opts/SkBlitRow_opts_mips_dsp.cpp

Issue 326913004: MIPS: added optimization for functions from SkBlitRow. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2014 The Android Open Source Project

	3 *

	4 * Use of this source code is governed by a BSD-style license that can be

	5 * found in the LICENSE file.

	6 */

	7

	8 #include "SkBlitRow.h"

	9 #include "SkBlitMask.h"

	10 #include "SkColorPriv.h"

	11 #include "SkDither.h"

	12 #include "SkMathPriv.h"

	13

	14 static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,

	15 const SkPMColor* SK_RESTRICT src, int count,

	16 U8CPU alpha, int /x/, int /y/) {

	17 register uint32_t t0, t1, t2, t3, t4, t5, t6;

	18 register uint32_t s0, s1, s2, s4, s5, s6;

	19

	20 alpha += 1;

	21 if (count >= 2) {

	22 __asm__ volatile (

	23 ".set push \n\t"

	24 ".set noreorder \n\t"

	25 "sll %[s4], %[alpha], 8 \n\t"

	26 "or %[s4], %[s4], %[alpha] \n\t"

	27 "repl.ph %[s5], 0x1f \n\t"

	28 "repl.ph %[s6], 0x3f \n\t"

	29 "1: \n\t"

	30 "lw %[s2], 0(%[src]) \n\t"

	31 "lw %[s1], 4(%[src]) \n\t"

	32 "lwr %[s0], 0(%[dst]) \n\t"

	33 "lwl %[s0], 3(%[dst]) \n\t"

	34 "and %[t1], %[s0], %[s5] \n\t"

	35 "shra.ph %[t0], %[s0], 5 \n\t"

	36 "and %[t2], %[t0], %[s6] \n\t"

	37 #ifdef __MIPS_HAVE_DSPR2

	38 "shrl.ph %[t3], %[s0], 11 \n\t"

	39 #else

	40 "shra.ph %[t0], %[s0], 11 \n\t"

	41 "and %[t3], %[t0], %[s5] \n\t"

	42 #endif

	43 "precrq.ph.w %[t0], %[s1], %[s2] \n\t"

	44 "shrl.qb %[t5], %[t0], 3 \n\t"

	45 "and %[t4], %[t5], %[s5] \n\t"

	46 "ins %[s2], %[s1], 16, 16 \n\t"

	47 "preceu.ph.qbra %[t0], %[s2] \n\t"

	48 "shrl.qb %[t6], %[t0], 3 \n\t"

	49 #ifdef __MIPS_HAVE_DSPR2

	50 "shrl.ph %[t5], %[s2], 10 \n\t"

	51 #else

	52 "shra.ph %[t0], %[s2], 10 \n\t"

	53 "and %[t5], %[t0], %[s6] \n\t"

	54 #endif

	55 "subu.qb %[t4], %[t4], %[t1] \n\t"

	56 "subu.qb %[t5], %[t5], %[t2] \n\t"

	57 "subu.qb %[t6], %[t6], %[t3] \n\t"

	58 "muleu_s.ph.qbr %[t4], %[s4], %[t4] \n\t"

	59 "muleu_s.ph.qbr %[t5], %[s4], %[t5] \n\t"

	60 "muleu_s.ph.qbr %[t6], %[s4], %[t6] \n\t"

	61 "addiu %[count], %[count], -2 \n\t"

	62 "addiu %[src], %[src], 8 \n\t"

	63 "shra.ph %[t4], %[t4], 8 \n\t"

	64 "shra.ph %[t5], %[t5], 8 \n\t"

	65 "shra.ph %[t6], %[t6], 8 \n\t"

	66 "addu.qb %[t4], %[t4], %[t1] \n\t"

	67 "addu.qb %[t5], %[t5], %[t2] \n\t"

	68 "addu.qb %[t6], %[t6], %[t3] \n\t"

	69 "andi %[s0], %[t4], 0xffff \n\t"

	70 "andi %[t0], %[t5], 0xffff \n\t"

	71 "sll %[t0], %[t0], 0x5 \n\t"

	72 "or %[s0], %[s0], %[t0] \n\t"

	73 "sll %[t0], %[t6], 0xb \n\t"

	74 "or %[t0], %[t0], %[s0] \n\t"

	75 "sh %[t0], 0(%[dst]) \n\t"

	76 "srl %[s1], %[t4], 16 \n\t"

	77 "srl %[t0], %[t5], 16 \n\t"

	78 "sll %[t5], %[t0], 5 \n\t"

	79 "or %[t0], %[t5], %[s1] \n\t"

	80 "srl %[s0], %[t6], 16 \n\t"

	81 "sll %[s2], %[s0], 0xb \n\t"

	82 "or %[s1], %[s2], %[t0] \n\t"

	83 "sh %[s1], 2(%[dst]) \n\t"

	84 "bge %[count], 2, 1b \n\t"

	85 " addiu %[dst], %[dst], 4 \n\t"

	86 ".set pop \n\t"

	87 : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

	88 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),

	89 [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),

	90 [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),

	91 [src]"+r"(src)

	92 : [alpha]"r"(alpha)

	93 : "memory", "hi", "lo"

	94 );

	95 }

	96

	97 if (count == 1) {

	98 SkPMColor c = *src++;

	99 SkPMColorAssert(c);

	100 SkASSERT(SkGetPackedA32(c) == 255);

	101 uint16_t d = *dst;

	102 *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),

	103 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),

	104 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));

	105 }

	106 }

	107

	108 static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,

	109 const SkPMColor* __restrict__ src,

	110 int count, U8CPU alpha, int x, int y) {

	111 __asm__ volatile (

	112 "pref 0, 0(%[src]) \n\t"

	113 "pref 1, 0(%[dst]) \n\t"

	114 "pref 0, 32(%[src]) \n\t"

	115 "pref 1, 32(%[dst]) \n\t"

	116 :

	117 : [src]"r"(src), [dst]"r"(dst)

	118 : "memory"

	119 );

	120

	121 register int32_t t0, t1, t2, t3, t4, t5, t6;

	122 register int32_t t7, t8, t9, s0, s1, s2, s3;

	123 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];

	124

	125 if (count >= 2) {

	126 __asm__ volatile (

	127 ".set push \n\t"

	128 ".set noreorder \n\t"

	129 "li %[s1], 0x01010101 \n\t"

	130 "li %[s2], -2017 \n\t"

	131 "1: \n\t"

	132 "bnez %[s3], 4f \n\t"

	133 " li %[s3], 2 \n\t"

	134 "pref 0, 64(%[src]) \n\t"

	135 "pref 1, 64(%[dst]) \n\t"

	136 "4: \n\t"

	137 "addiu %[s3], %[s3], -1 \n\t"

	138 "lw %[t1], 0(%[src]) \n\t"

	139 "andi %[t3], %[x], 0x3 \n\t"

	140 "addiu %[x], %[x], 1 \n\t"

	141 "sll %[t4], %[t3], 2 \n\t"

	142 "srav %[t5], %[dither_scan], %[t4] \n\t"

	143 "andi %[t3], %[t5], 0xf \n\t"

	144 "lw %[t2], 4(%[src]) \n\t"

	145 "andi %[t4], %[x], 0x3 \n\t"

	146 "sll %[t5], %[t4], 2 \n\t"

	147 "srav %[t6], %[dither_scan], %[t5] \n\t"

	148 "addiu %[x], %[x], 1 \n\t"

	149 "ins %[t3], %[t6], 8, 4 \n\t"

	150 "srl %[t4], %[t1], 24 \n\t"

	151 "addiu %[t0], %[t4], 1 \n\t"

	152 "srl %[t4], %[t2], 24 \n\t"

	153 "addiu %[t5], %[t4], 1 \n\t"

	154 "ins %[t0], %[t5], 16, 16 \n\t"

	155 "muleu_s.ph.qbr %[t4], %[t3], %[t0] \n\t"

	156 "preceu.ph.qbla %[t3], %[t4] \n\t"

	157 "andi %[t4], %[t1], 0xff \n\t"

	158 "ins %[t4], %[t2], 16, 8 \n\t"

	159 "shrl.qb %[t5], %[t4], 5 \n\t"

	160 "subu.qb %[t6], %[t3], %[t5] \n\t"

	161 "addq.ph %[t5], %[t6], %[t4] \n\t"

	162 "ext %[t4], %[t1], 8, 8 \n\t"

	163 "srl %[t6], %[t2], 8 \n\t"

	164 "ins %[t4], %[t6], 16, 8 \n\t"

	165 "shrl.qb %[t6], %[t4], 6 \n\t"

	166 "shrl.qb %[t7], %[t3], 1 \n\t"

	167 "subu.qb %[t8], %[t7], %[t6] \n\t"

	168 "addq.ph %[t6], %[t8], %[t4] \n\t"

	169 "ext %[t4], %[t1], 16, 8 \n\t"

	170 "srl %[t7], %[t2], 16 \n\t"

	171 "ins %[t4], %[t7], 16, 8 \n\t"

	172 "shrl.qb %[t7], %[t4], 5 \n\t"

	173 "subu.qb %[t8], %[t3], %[t7] \n\t"

	174 "addq.ph %[t7], %[t8], %[t4] \n\t"

	175 "shll.ph %[t4], %[t7], 2 \n\t"

	176 "andi %[t9], %[t4], 0xffff \n\t"

	177 "srl %[s0], %[t4], 16 \n\t"

	178 "andi %[t3], %[t6], 0xffff \n\t"

	179 "srl %[t4], %[t6], 16 \n\t"

	180 "andi %[t6], %[t5], 0xffff \n\t"

	181 "srl %[t7], %[t5], 16 \n\t"

	182 "subq.ph %[t5], %[s1], %[t0] \n\t"

	183 "srl %[t0], %[t5], 3 \n\t"

	184 "beqz %[t1], 3f \n\t"

	185 " lhu %[t5], 0(%[dst]) \n\t"

	186 "sll %[t1], %[t6], 13 \n\t"

	187 "or %[t8], %[t9], %[t1] \n\t"

	188 "sll %[t1], %[t3], 24 \n\t"

	189 "or %[t9], %[t1], %[t8] \n\t"

	190 "andi %[t3], %[t5], 0x7e0 \n\t"

	191 "sll %[t6], %[t3], 0x10 \n\t"

	192 "and %[t8], %[s2], %[t5] \n\t"

	193 "or %[t5], %[t6], %[t8] \n\t"

	194 "andi %[t6], %[t0], 0xff \n\t"

	195 "mul %[t1], %[t6], %[t5] \n\t"

	196 "addu %[t5], %[t1], %[t9] \n\t"

	197 "srl %[t6], %[t5], 5 \n\t"

	198 "and %[t5], %[s2], %[t6] \n\t"

	199 "srl %[t8], %[t6], 16 \n\t"

	200 "andi %[t6], %[t8], 0x7e0 \n\t"

	201 "or %[t1], %[t5], %[t6] \n\t"

	202 "sh %[t1], 0(%[dst]) \n\t"

	203 "3: \n\t"

	204 "beqz %[t2], 2f \n\t"

	205 " lhu %[t5], 2(%[dst]) \n\t"

	206 "sll %[t1], %[t7], 13 \n\t"

	207 "or %[t8], %[s0], %[t1] \n\t"

	208 "sll %[t1], %[t4], 24 \n\t"

	209 "or %[t9], %[t1], %[t8] \n\t"

	210 "andi %[t3], %[t5], 0x7e0 \n\t"

	211 "sll %[t6], %[t3], 0x10 \n\t"

	212 "and %[t8], %[s2], %[t5] \n\t"

	213 "or %[t5], %[t6], %[t8] \n\t"

	214 "srl %[t6], %[t0], 16 \n\t"

	215 "mul %[t1], %[t6], %[t5] \n\t"

	216 "addu %[t5], %[t1], %[t9] \n\t"

	217 "srl %[t6], %[t5], 5 \n\t"

	218 "and %[t5], %[s2], %[t6] \n\t"

	219 "srl %[t8], %[t6], 16 \n\t"

	220 "andi %[t6], %[t8], 0x7e0 \n\t"

	221 "or %[t1], %[t5], %[t6] \n\t"

	222 "sh %[t1], 2(%[dst]) \n\t"

	223 "2: \n\t"

	224 "addiu %[count], %[count], -2 \n\t"

	225 "addiu %[src], %[src], 8 \n\t"

	226 "addiu %[t1], %[count], -1 \n\t"

	227 "bgtz %[t1], 1b \n\t"

	228 " addiu %[dst], %[dst], 4 \n\t"

	229 ".set pop \n\t"

	230 : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),

	231 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

	232 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),

	233 [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),

	234 [s2]"=&r"(s2), [s3]"=&r"(s3)

	235 : [dither_scan]"r"(dither_scan)

	236 : "memory", "hi", "lo"

	237 );

	238 }

	239

	240 if (count == 1) {

	241 SkPMColor c = *src++;

	242 SkPMColorAssert(c);

	243 if (c) {

	244 unsigned a = SkGetPackedA32(c);

	245 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));

	246

	247 unsigned sr = SkGetPackedR32(c);

	248 unsigned sg = SkGetPackedG32(c);

	249 unsigned sb = SkGetPackedB32(c);

	250 sr = SkDITHER_R32_FOR_565(sr, d);

	251 sg = SkDITHER_G32_FOR_565(sg, d);

	252 sb = SkDITHER_B32_FOR_565(sb, d);

	253

	254 uint32_t src_expanded = (sg << 24) \| (sr << 13) \| (sb << 2);

	255 uint32_t dst_expanded = SkExpand_rgb_16(*dst);

	256 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);

	257 // now src and dst expanded are in g:11 r:10 x:1 b:10

	258 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);

	259 }

	260 dst += 1;

	261 DITHER_INC_X(x);

	262 }

	263 }

	264

	265 static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,

	266 const SkPMColor* __restrict__ src,

	267 int count, U8CPU alpha, int x, int y ) {

	268 uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];

	269 register uint32_t t0, t1, t2, t3, t4, t5;

	270 register uint32_t t6, t7, t8, t9, s0;

	271 int dither[4];

	272 int i;

	273

	274 for (i = 0; i < 4; i++, x++) {

	275 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;

	276 }

	277

	278 __asm__ volatile (

	279 ".set push \n\t"

	280 ".set noreorder \n\t"

	281 "li %[s0], 1 \n\t"

	282 "2: \n\t"

	283 "beqz %[count], 1f \n\t"

	284 " nop \n\t"

	285 "addiu %[t0], %[count], -1 \n\t"

	286 "beqz %[t0], 1f \n\t"

	287 " nop \n\t"

	288 "beqz %[s0], 3f \n\t"

	289 " nop \n\t"

	290 "lw %[t0], 0(%[dither]) \n\t"

	291 "lw %[t1], 4(%[dither]) \n\t"

	292 "li %[s0], 0 \n\t"

	293 "b 4f \n\t"

	294 " nop \n\t"

	295 "3: \n\t"

	296 "lw %[t0], 8(%[dither]) \n\t"

	297 "lw %[t1], 12(%[dither]) \n\t"

	298 "li %[s0], 1 \n\t"

	299 "4: \n\t"

	300 "sll %[t2], %[t0], 16 \n\t"

	301 "or %[t1], %[t2], %[t1] \n\t"

	302 "lw %[t0], 0(%[src]) \n\t"

	303 "lw %[t2], 4(%[src]) \n\t"

	304 "precrq.ph.w %[t3], %[t0], %[t2] \n\t"

	305 "preceu.ph.qbra %[t9], %[t3] \n\t"

	306 #ifdef __MIPS_HAVE_DSPR2

	307 "append %[t0], %[t2], 16 \n\t"

	308 "preceu.ph.qbra %[t4], %[t0] \n\t"

	309 "preceu.ph.qbla %[t5], %[t0] \n\t"

	310 #else

	311 "sll %[t6], %[t0], 16 \n\t"

	312 "sll %[t7], %[t2], 16 \n\t"

	313 "precrq.ph.w %[t8], %[t6], %[t7] \n\t"

	314 "preceu.ph.qbra %[t4], %[t8] \n\t"

	315 "preceu.ph.qbla %[t5], %[t8] \n\t"

	316 #endif

	317 "addu.qb %[t0], %[t4], %[t1] \n\t"

	318 "shra.ph %[t2], %[t4], 5 \n\t"

	319 "subu.qb %[t3], %[t0], %[t2] \n\t"

	320 "shra.ph %[t6], %[t3], 3 \n\t"

	321 "addu.qb %[t0], %[t9], %[t1] \n\t"

	322 "shra.ph %[t2], %[t9], 5 \n\t"

	323 "subu.qb %[t3], %[t0], %[t2] \n\t"

	324 "shra.ph %[t7], %[t3], 3 \n\t"

	325 "shra.ph %[t0], %[t1], 1 \n\t"

	326 "shra.ph %[t2], %[t5], 6 \n\t"

	327 "addu.qb %[t3], %[t5], %[t0] \n\t"

	328 "subu.qb %[t4], %[t3], %[t2] \n\t"

	329 "shra.ph %[t8], %[t4], 2 \n\t"

	330 "precrq.ph.w %[t0], %[t6], %[t7] \n\t"

	331 #ifdef __MIPS_HAVE_DSPR2

	332 "append %[t6], %[t7], 16 \n\t"

	333 #else

	334 "sll %[t6], %[t6], 16 \n\t"

	335 "sll %[t2], %[t7], 16 \n\t"

	336 "precrq.ph.w %[t6], %[t6], %[t2] \n\t"

	337 #endif

	338 "sra %[t4], %[t8], 16 \n\t"

	339 "andi %[t5], %[t8], 0xFF \n\t"

	340 "sll %[t7], %[t4], 5 \n\t"

	341 "sra %[t8], %[t0], 5 \n\t"

	342 "or %[t9], %[t7], %[t8] \n\t"

	343 "or %[t3], %[t9], %[t0] \n\t"

	344 "andi %[t4], %[t3], 0xFFFF \n\t"

	345 "sll %[t7], %[t5], 5 \n\t"

	346 "sra %[t8], %[t6], 5 \n\t"

	347 "or %[t9], %[t7], %[t8] \n\t"

	348 "or %[t3], %[t9], %[t6] \n\t"

	349 "and %[t7], %[t3], 0xFFFF \n\t"

	350 "sh %[t4], 0(%[dst]) \n\t"

	351 "sh %[t7], 2(%[dst]) \n\t"

	352 "addiu %[count], %[count], -2 \n\t"

	353 "addiu %[src], %[src], 8 \n\t"

	354 "b 2b \n\t"

	355 " addiu %[dst], %[dst], 4 \n\t"

	356 "1: \n\t"

	357 ".set pop \n\t"

	358 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),

	359 [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),

	360 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),

	361 [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)

	362 : [dither] "r" (dither)

	363 : "memory"

	364 );

	365

	366 if (count == 1) {

	367 SkPMColor c = *src++;

	368 SkPMColorAssert(c); // only if DEBUG is turned on

	369 SkASSERT(SkGetPackedA32(c) == 255);

	370 unsigned dither = DITHER_VALUE(x);

	371 *dst++ = SkDitherRGB32To565(c, dither);

	372 }

	373 }

	374

	375 static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,

	376 const SkPMColor* src,

	377 int count, U8CPU alpha, int x, int y) {

	378 register int32_t t0, t1, t2, t3, t4, t5, t6;

	379 register int32_t s0, s1, s2, s3;

	380 register int x1 = 0;

	381 register uint32_t sc_mul;

	382 register uint32_t sc_add;

	383 #ifdef ENABLE_DITHER_MATRIX_4X4

	384 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];

	385 #else // ENABLE_DITHER_MATRIX_4X4

	386 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];

	387 #endif // ENABLE_DITHER_MATRIX_4X4

	388 int dither[4];

	389

	390 for (int i = 0; i < 4; i++) {

	391 dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;

	392 x += 1;

	393 }

	394 alpha += 1;

	395 __asm__ volatile (

	396 ".set push \n\t"

	397 ".set noreorder \n\t"

	398 "li %[t0], 0x100 \n\t"

	399 "subu %[t0], %[t0], %[alpha] \n\t"

	400 "replv.ph %[sc_mul], %[alpha] \n\t"

	401 "beqz %[alpha], 1f \n\t"

	402 " nop \n\t"

	403 "replv.qb %[sc_add], %[t0] \n\t"

	404 "b 2f \n\t"

	405 " nop \n\t"

	406 "1: \n\t"

	407 "replv.qb %[sc_add], %[alpha] \n\t"

	408 "2: \n\t"

	409 "addiu %[t2], %[count], -1 \n\t"

	410 "blez %[t2], 3f \n\t"

	411 " nop \n\t"

	412 "lw %[s0], 0(%[src]) \n\t"

	413 "lw %[s1], 4(%[src]) \n\t"

	414 "bnez %[x1], 4f \n\t"

	415 " nop \n\t"

	416 "lw %[t0], 0(%[dither]) \n\t"

	417 "lw %[t1], 4(%[dither]) \n\t"

	418 "li %[x1], 1 \n\t"

	419 "b 5f \n\t"

	420 " nop \n\t"

	421 "4: \n\t"

	422 "lw %[t0], 8(%[dither]) \n\t"

	423 "lw %[t1], 12(%[dither]) \n\t"

	424 "li %[x1], 0 \n\t"

	425 "5: \n\t"

	426 "sll %[t3], %[t0], 7 \n\t"

	427 "sll %[t4], %[t1], 7 \n\t"

	428 #ifdef __MIPS_HAVE_DSPR2

	429 "append %[t0], %[t1], 16 \n\t"

	430 #else

	431 "sll %[t0], %[t0], 8 \n\t"

	432 "sll %[t2], %[t1], 8 \n\t"

	433 "precrq.qb.ph %[t0], %[t0], %[t2] \n\t"

	434 #endif

	435 "precrq.qb.ph %[t1], %[t3], %[t4] \n\t"

	436 "sll %[t5], %[s0], 8 \n\t"

	437 "sll %[t6], %[s1], 8 \n\t"

	438 "precrq.qb.ph %[t4], %[t5], %[t6] \n\t"

	439 "precrq.qb.ph %[t6], %[s0], %[s1] \n\t"

	440 "preceu.ph.qbla %[t5], %[t4] \n\t"

	441 "preceu.ph.qbra %[t4], %[t4] \n\t"

	442 "preceu.ph.qbra %[t6], %[t6] \n\t"

	443 "lh %[t2], 0(%[dst]) \n\t"

	444 "lh %[s1], 2(%[dst]) \n\t"

	445 #ifdef __MIPS_HAVE_DSPR2

	446 "append %[t2], %[s1], 16 \n\t"

	447 #else

	448 "sll %[s1], %[s1], 16 \n\t"

	449 "packrl.ph %[t2], %[t2], %[s1] \n\t"

	450 #endif

	451 "shra.ph %[s1], %[t2], 11 \n\t"

	452 "and %[s1], %[s1], 0x1F001F \n\t"

	453 "shra.ph %[s2], %[t2], 5 \n\t"

	454 "and %[s2], %[s2], 0x3F003F \n\t"

	455 "and %[s3], %[t2], 0x1F001F \n\t"

	456 "shrl.qb %[t3], %[t4], 5 \n\t"

	457 "addu.qb %[t4], %[t4], %[t0] \n\t"

	458 "subu.qb %[t4], %[t4], %[t3] \n\t"

	459 "shrl.qb %[t4], %[t4], 3 \n\t"

	460 "shrl.qb %[t3], %[t5], 5 \n\t"

	461 "addu.qb %[t5], %[t5], %[t0] \n\t"

	462 "subu.qb %[t5], %[t5], %[t3] \n\t"

	463 "shrl.qb %[t5], %[t5], 3 \n\t"

	464 "shrl.qb %[t3], %[t6], 6 \n\t"

	465 "addu.qb %[t6], %[t6], %[t1] \n\t"

	466 "subu.qb %[t6], %[t6], %[t3] \n\t"

	467 "shrl.qb %[t6], %[t6], 2 \n\t"

	468 "cmpu.lt.qb %[t4], %[s1] \n\t"

	469 "pick.qb %[s0], %[sc_add], $0 \n\t"

	470 "addu.qb %[s0], %[s0], %[s1] \n\t"

	471 "subu.qb %[t4], %[t4], %[s1] \n\t"

	472 "muleu_s.ph.qbl %[t0], %[t4], %[sc_mul] \n\t"

	473 "muleu_s.ph.qbr %[t1], %[t4], %[sc_mul] \n\t"

	474 "precrq.qb.ph %[t4], %[t0], %[t1] \n\t"

	475 "addu.qb %[t4], %[t4], %[s0] \n\t"

	476 "cmpu.lt.qb %[t5], %[s3] \n\t"

	477 "pick.qb %[s0], %[sc_add], $0 \n\t"

	478 "addu.qb %[s0], %[s0], %[s3] \n\t"

	479 "subu.qb %[t5], %[t5], %[s3] \n\t"

	480 "muleu_s.ph.qbl %[t0], %[t5], %[sc_mul] \n\t"

	481 "muleu_s.ph.qbr %[t1], %[t5], %[sc_mul] \n\t"

	482 "precrq.qb.ph %[t5], %[t0], %[t1] \n\t"

	483 "addu.qb %[t5], %[t5], %[s0] \n\t"

	484 "cmpu.lt.qb %[t6], %[s2] \n\t"

	485 "pick.qb %[s0], %[sc_add], $0 \n\t"

	486 "addu.qb %[s0], %[s0], %[s2] \n\t"

	487 "subu.qb %[t6], %[t6], %[s2] \n\t"

	488 "muleu_s.ph.qbl %[t0], %[t6], %[sc_mul] \n\t"

	489 "muleu_s.ph.qbr %[t1], %[t6], %[sc_mul] \n\t"

	490 "precrq.qb.ph %[t6], %[t0], %[t1] \n\t"

	491 "addu.qb %[t6], %[t6], %[s0] \n\t"

	492 "shll.ph %[s1], %[t4], 11 \n\t"

	493 "shll.ph %[t0], %[t6], 5 \n\t"

	494 "or %[s0], %[s1], %[t0] \n\t"

	495 "or %[s1], %[s0], %[t5] \n\t"

	496 "srl %[t2], %[s1], 16 \n\t"

	497 "and %[t3], %[s1], 0xFFFF \n\t"

	498 "sh %[t2], 0(%[dst]) \n\t"

	499 "sh %[t3], 2(%[dst]) \n\t"

	500 "addiu %[src], %[src], 8 \n\t"

	501 "addi %[count], %[count], -2 \n\t"

	502 "b 2b \n\t"

	503 " addu %[dst], %[dst], 4 \n\t"

	504 "3: \n\t"

	505 ".set pop \n\t"

	506 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),

	507 [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),

	508 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

	509 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),

	510 [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)

	511 : [dither]"r"(dither), [alpha]"r"(alpha)

	512 : "memory", "hi", "lo"

	513 );

	514

	515 if(count == 1) {

	516 SkPMColor c = *src++;

	517 SkPMColorAssert(c);

	518 SkASSERT(SkGetPackedA32(c) == 255);

	519 DITHER_565_SCAN(y);

	520 int dither = DITHER_VALUE(x);

	521 int sr = SkGetPackedR32(c);

	522 int sg = SkGetPackedG32(c);

	523 int sb = SkGetPackedB32(c);

	524 sr = SkDITHER_R32To565(sr, dither);

	525 sg = SkDITHER_G32To565(sg, dither);

	526 sb = SkDITHER_B32To565(sb, dither);

	527

	528 uint16_t d = *dst;

	529 *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),

	530 SkAlphaBlend(sg, SkGetPackedG16(d), alpha),

	531 SkAlphaBlend(sb, SkGetPackedB16(d), alpha));

	532 DITHER_INC_X(x);

	533 }

	534 }

	535

	536 static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,

	537 const SkPMColor* __restrict__ src,

	538 int count, U8CPU alpha, int x, int y) {

	539

	540 __asm__ volatile (

	541 "pref 0, 0(%[src]) \n\t"

	542 "pref 1, 0(%[dst]) \n\t"

	543 "pref 0, 32(%[src]) \n\t"

	544 "pref 1, 32(%[dst]) \n\t"

	545 :

	546 : [src]"r"(src), [dst]"r"(dst)

	547 : "memory"

	548 );

	549

	550 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;

	551 register uint32_t t16;

	552 register uint32_t add_x10 = 0x100010;

	553 register uint32_t add_x20 = 0x200020;

	554 register uint32_t sa = 0xff00ff;

	555

	556 __asm__ volatile (

	557 ".set push \n\t"

	558 ".set noreorder \n\t"

	559 "blez %[count], 1f \n\t"

	560 " nop \n\t"

	561 "2: \n\t"

	562 "beqz %[count], 1f \n\t"

	563 " nop \n\t"

	564 "addiu %[t0], %[count], -1 \n\t"

	565 "beqz %[t0], 1f \n\t"

	566 " nop \n\t"

	567 "bnez %[t16], 3f \n\t"

	568 " nop \n\t"

	569 "li %[t16], 2 \n\t"

	570 "pref 0, 64(%[src]) \n\t"

	571 "pref 1, 64(%[dst]) \n\t"

	572 "3: \n\t"

	573 "addiu %[t16], %[t16], -1 \n\t"

	574 "lw %[t0], 0(%[src]) \n\t"

	575 "lw %[t1], 4(%[src]) \n\t"

	576 "precrq.ph.w %[t2], %[t0], %[t1] \n\t"

	577 "preceu.ph.qbra %[t8], %[t2] \n\t"

	578 #ifdef __MIPS_HAVE_DSPR2

	579 "append %[t0], %[t1], 16 \n\t"

	580 #else

	581 "sll %[t0], %[t0], 16 \n\t"

	582 "sll %[t6], %[t1], 16 \n\t"

	583 "precrq.ph.w %[t0], %[t0], %[t6] \n\t"

	584 #endif

	585 "preceu.ph.qbra %[t3], %[t0] \n\t"

	586 "preceu.ph.qbla %[t4], %[t0] \n\t"

	587 "preceu.ph.qbla %[t0], %[t2] \n\t"

	588 "subq.ph %[t1], %[sa], %[t0] \n\t"

	589 "sra %[t2], %[t1], 8 \n\t"

	590 "or %[t5], %[t2], %[t1] \n\t"

	591 "replv.ph %[t2], %[t5] \n\t"

	592 "lh %[t0], 0(%[dst]) \n\t"

	593 "lh %[t1], 2(%[dst]) \n\t"

	594 "and %[t1], %[t1], 0xffff \n\t"

	595 #ifdef __MIPS_HAVE_DSPR2

	596 "append %[t0], %[t1], 16 \n\t"

	597 #else

	598 "sll %[t5], %[t0], 16 \n\t"

	599 "or %[t0], %[t5], %[t1] \n\t"

	600 #endif

	601 "and %[t1], %[t0], 0x1f001f \n\t"

	602 "shra.ph %[t6], %[t0], 11 \n\t"

	603 "and %[t6], %[t6], 0x1f001f \n\t"

	604 "and %[t7], %[t0], 0x7e007e0 \n\t"

	605 "shra.ph %[t5], %[t7], 5 \n\t"

	606 "muleu_s.ph.qbl %[t0], %[t2], %[t6] \n\t"

	607 "addq.ph %[t7], %[t0], %[add_x10] \n\t"

	608 "shra.ph %[t6], %[t7], 5 \n\t"

	609 "addq.ph %[t6], %[t7], %[t6] \n\t"

	610 "shra.ph %[t0], %[t6], 5 \n\t"

	611 "addq.ph %[t7], %[t0], %[t3] \n\t"

	612 "shra.ph %[t6], %[t7], 3 \n\t"

	613 "muleu_s.ph.qbl %[t0], %[t2], %[t1] \n\t"

	614 "addq.ph %[t7], %[t0], %[add_x10] \n\t"

	615 "shra.ph %[t0], %[t7], 5 \n\t"

	616 "addq.ph %[t7], %[t7], %[t0] \n\t"

	617 "shra.ph %[t0], %[t7], 5 \n\t"

	618 "addq.ph %[t7], %[t0], %[t8] \n\t"

	619 "shra.ph %[t3], %[t7], 3 \n\t"

	620 "muleu_s.ph.qbl %[t0], %[t2], %[t5] \n\t"

	621 "addq.ph %[t7], %[t0], %[add_x20] \n\t"

	622 "shra.ph %[t0], %[t7], 6 \n\t"

	623 "addq.ph %[t8], %[t7], %[t0] \n\t"

	624 "shra.ph %[t0], %[t8], 6 \n\t"

	625 "addq.ph %[t7], %[t0], %[t4] \n\t"

	626 "shra.ph %[t8], %[t7], 2 \n\t"

	627 "shll.ph %[t0], %[t8], 5 \n\t"

	628 "shll.ph %[t1], %[t6], 11 \n\t"

	629 "or %[t2], %[t0], %[t1] \n\t"

	630 "or %[t3], %[t2], %[t3] \n\t"

	631 "sra %[t4], %[t3], 16 \n\t"

	632 "sh %[t4], 0(%[dst]) \n\t"

	633 "sh %[t3], 2(%[dst]) \n\t"

	634 "addiu %[count], %[count], -2 \n\t"

	635 "addiu %[src], %[src], 8 \n\t"

	636 "b 2b \n\t"

	637 " addiu %[dst], %[dst], 4 \n\t"

	638 "1: \n\t"

	639 ".set pop \n\t"

	640 : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),

	641 [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),

	642 [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),

	643 [t7]"=&r"(t7), [t8]"=&r"(t8)

	644 : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)

	645 : "memory", "hi", "lo"

	646 );

	647

	648 if (count == 1) {

	649 SkPMColor c = *src++;

	650 SkPMColorAssert(c);

	651 if (c) {

	652 dst = SkSrcOver32To16(c, dst);

	653 }

	654 dst += 1;

	655 }

	656 }

	657

	658 static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,

	659 const SkPMColor* SK_RESTRICT src, int count ,

	660 U8CPU alpha, int /x/, int /y/) {

	661 register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;

	662 register uint32_t s0, s1, s2, s3;

	663 register unsigned dst_scale = 0;

	664

	665 __asm__ volatile (

	666 ".set push \n\t"

	667 ".set noreorder \n\t"

	668 "replv.qb %[t0], %[alpha] \n\t"

	669 "repl.ph %[t6], 0x80 \n\t"

	670 "repl.ph %[t7], 0xFF \n\t"

	671 "1: \n\t"

	672 "addiu %[t8], %[count], -1 \n\t"

	673 "blez %[t8], 2f \n\t"

	674 " nop \n\t"

	675 "lw %[t8], 0(%[src]) \n\t"

	676 "lw %[t9], 4(%[src]) \n\t"

	677 "lh %[t4], 0(%[dst]) \n\t"

	678 "lh %[t5], 2(%[dst]) \n\t"

	679 "sll %[t5], %[t5], 16 \n\t"

	680 "sll %[t2], %[t8], 8 \n\t"

	681 "sll %[t3], %[t9], 8 \n\t"

	682 "precrq.qb.ph %[t1], %[t2], %[t3] \n\t"

	683 "precrq.qb.ph %[t3], %[t8], %[t9] \n\t"

	684 "preceu.ph.qbla %[t8], %[t3] \n\t"

	685 "muleu_s.ph.qbr %[s3], %[t0], %[t8] \n\t"

	686 "preceu.ph.qbla %[t2], %[t1] \n\t"

	687 "preceu.ph.qbra %[t1], %[t1] \n\t"

	688 "preceu.ph.qbra %[t3], %[t3] \n\t"

	689 "packrl.ph %[t9], %[t4], %[t5] \n\t"

	690 "shra.ph %[s0], %[t9], 11 \n\t"

	691 "and %[s0], %[s0], 0x1F001F \n\t"

	692 "shra.ph %[s1], %[t9], 5 \n\t"

	693 "and %[s1], %[s1], 0x3F003F \n\t"

	694 "and %[s2], %[t9], 0x1F001F \n\t"

	695 "addq.ph %[s3], %[s3], %[t6] \n\t"

	696 "shra.ph %[t5], %[s3], 8 \n\t"

	697 "and %[t5], %[t5], 0xFF00FF \n\t"

	698 "addq.ph %[dst_scale], %[s3], %[t5] \n\t"

	699 "shra.ph %[dst_scale], %[dst_scale], 8 \n\t"

	700 "subq_s.ph %[dst_scale], %[t7], %[dst_scale] \n\t"

	701 "sll %[dst_scale], %[dst_scale], 8 \n\t"

	702 "precrq.qb.ph %[dst_scale], %[dst_scale], %[dst_scale] \n\t"

	703 "shrl.qb %[t1], %[t1], 3 \n\t"

	704 "shrl.qb %[t2], %[t2], 3 \n\t"

	705 "shrl.qb %[t3], %[t3], 2 \n\t"

	706 "muleu_s.ph.qbl %[t1], %[t0], %[t1] \n\t"

	707 "muleu_s.ph.qbl %[t2], %[t0], %[t2] \n\t"

	708 "muleu_s.ph.qbl %[t3], %[t0], %[t3] \n\t"

	709 "muleu_s.ph.qbl %[t8], %[dst_scale], %[s0] \n\t"

	710 "muleu_s.ph.qbl %[t9], %[dst_scale], %[s2] \n\t"

	711 "muleu_s.ph.qbl %[t4], %[dst_scale], %[s1] \n\t"

	712 "addq.ph %[t1], %[t1], %[t8] \n\t"

	713 "addq.ph %[t2], %[t2], %[t9] \n\t"

	714 "addq.ph %[t3], %[t3], %[t4] \n\t"

	715 "addq.ph %[t8], %[t1], %[t6] \n\t"

	716 "addq.ph %[t9], %[t2], %[t6] \n\t"

	717 "addq.ph %[t4], %[t3], %[t6] \n\t"

	718 "shra.ph %[t1], %[t8], 8 \n\t"

	719 "addq.ph %[t1], %[t1], %[t8] \n\t"

	720 "preceu.ph.qbla %[t1], %[t1] \n\t"

	721 "shra.ph %[t2], %[t9], 8 \n\t"

	722 "addq.ph %[t2], %[t2], %[t9] \n\t"

	723 "preceu.ph.qbla %[t2], %[t2] \n\t"

	724 "shra.ph %[t3], %[t4], 8 \n\t"

	725 "addq.ph %[t3], %[t3], %[t4] \n\t"

	726 "preceu.ph.qbla %[t3], %[t3] \n\t"

	727 "shll.ph %[t8], %[t1], 11 \n\t"

	728 "shll.ph %[t9], %[t3], 5 \n\t"

	729 "or %[t8], %[t8], %[t9] \n\t"

	730 "or %[s0], %[t8], %[t2] \n\t"

	731 "srl %[t8], %[s0], 16 \n\t"

	732 "and %[t9], %[s0], 0xFFFF \n\t"

	733 "sh %[t8], 0(%[dst]) \n\t"

	734 "sh %[t9], 2(%[dst]) \n\t"

	735 "addiu %[src], %[src], 8 \n\t"

	736 "addiu %[count], %[count], -2 \n\t"

	737 "b 1b \n\t"

	738 " addiu %[dst], %[dst], 4 \n\t"

	739 "2: \n\t"

	740 ".set pop \n\t"

	741 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),

	742 [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),

	743 [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),

	744 [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),

	745 [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)

	746 : [alpha]"r"(alpha)

	747 : "memory", "hi", "lo"

	748 );

	749

	750 if (count == 1) {

	751 SkPMColor sc = *src++;

	752 SkPMColorAssert(sc);

	753 if (sc) {

	754 uint16_t dc = *dst;

	755 unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alph a);

	756 unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) +

	757 SkMulS16(SkGetPackedR16(dc), dst_scale);

	758 unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) +

	759 SkMulS16(SkGetPackedG16(dc), dst_scale);

	760 unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) +

	761 SkMulS16(SkGetPackedB16(dc), dst_scale);

	762 *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Rou nd(db));

	763 }

	764 dst += 1;

	765 }

	766 }

	767

	768 static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,

	769 const SkPMColor* SK_RESTRICT src,

	770 int count, U8CPU alpha) {

	771 register int32_t t0, t1, t2, t3, t4, t5, t6, t7;

	772

	773 __asm__ volatile (

	774 ".set push \n\t"

	775 ".set noreorder \n\t"

	776 "li %[t2], 0x100 \n\t"

	777 "addiu %[t0], %[alpha], 1 \n\t"

	778 "subu %[t1], %[t2], %[t0] \n\t"

	779 "replv.qb %[t7], %[t0] \n\t"

	780 "replv.qb %[t6], %[t1] \n\t"

	781 "1: \n\t"

	782 "blez %[count], 2f \n\t"

	783 "lw %[t0], 0(%[src]) \n\t"

	784 "lw %[t1], 0(%[dst]) \n\t"

	785 "preceu.ph.qbr %[t2], %[t0] \n\t"

	786 "preceu.ph.qbl %[t3], %[t0] \n\t"

	787 "preceu.ph.qbr %[t4], %[t1] \n\t"

	788 "preceu.ph.qbl %[t5], %[t1] \n\t"

	789 "muleu_s.ph.qbr %[t2], %[t7], %[t2] \n\t"

	790 "muleu_s.ph.qbr %[t3], %[t7], %[t3] \n\t"

	791 "muleu_s.ph.qbr %[t4], %[t6], %[t4] \n\t"

	792 "muleu_s.ph.qbr %[t5], %[t6], %[t5] \n\t"

	793 "addiu %[src], %[src], 4 \n\t"

	794 "addiu %[count], %[count], -1 \n\t"

	795 "precrq.qb.ph %[t0], %[t3], %[t2] \n\t"

	796 "precrq.qb.ph %[t2], %[t5], %[t4] \n\t"

	797 "addu %[t1], %[t0], %[t2] \n\t"

	798 "sw %[t1], 0(%[dst]) \n\t"

	799 "b 1b \n\t"

	800 " addi %[dst], %[dst], 4 \n\t"

	801 "2: \n\t"

	802 ".set pop \n\t"

	803 : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),

	804 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),

	805 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)

	806 : [alpha]"r"(alpha)

	807 : "memory", "hi", "lo"

	808 );

	809 }

	810

	811 //////////////////////////////////////////////////////////////////////////////// ///////////////////

	812

	813 const SkBlitRow::Proc platform_565_procs_mips_dsp[] = {

	814 // no dither

	815 NULL,

	816 S32_D565_Blend_mips_dsp,

	817 S32A_D565_Opaque_mips_dsp,

	818 S32A_D565_Blend_mips_dsp,

	819

	820 // dither

	821 S32_D565_Opaque_Dither_mips_dsp,

	822 S32_D565_Blend_Dither_mips_dsp,

	823 S32A_D565_Opaque_Dither_mips_dsp,

	824 NULL,

	825 };

	826

	827 static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {

	828 NULL, // S32_Opaque,

	829 S32_Blend_BlitRow32_mips_dsp, // S32_Blend,

	830 NULL, // S32A_Opaque,

	831 NULL, // S32A_Blend,

	832 };

	833

	834 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {

	835 return platform_565_procs_mips_dsp[flags];

	836 }

	837

	838 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {

	839 return platform_32_procs_mips_dsp[flags];

	840 }

	841

	842 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {

	843 return NULL;

	844 }

	845

	846 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {

	847 return NULL;

	848 }

OLD	NEW

« no previous file with comments | « gyp/opts.gyp ('k') | no next file » | no next file with comments »