| Index: src/opts/SkBlitRow_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
|
| index d1474f4a67c45bb37965d4a362c1cb5f79fcb496..4aa08e81657a63388535b8d1a454bf129e260b0f 100644
|
| --- a/src/opts/SkBlitRow_opts_SSE2.cpp
|
| +++ b/src/opts/SkBlitRow_opts_SSE2.cpp
|
| @@ -914,7 +914,7 @@
|
| __m128i b = _mm_packs_epi32(b1, b2);
|
|
|
| // Store 8 16-bit colors in dst.
|
| - __m128i d_pixel = SkPackRGB16_SSE2(r, g, b);
|
| + __m128i d_pixel = SkPackRGB16_SSE(r, g, b);
|
| _mm_store_si128(d++, d_pixel);
|
| count -= 8;
|
| }
|
| @@ -983,54 +983,54 @@
|
| __m128i dst_pixel = _mm_load_si128(d);
|
|
|
| // Extract A from src.
|
| - __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
|
| + __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT));
|
| sa1 = _mm_srli_epi32(sa1, 24);
|
| - __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
|
| + __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT));
|
| sa2 = _mm_srli_epi32(sa2, 24);
|
| __m128i sa = _mm_packs_epi32(sa1, sa2);
|
|
|
| // Extract R from src.
|
| - __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
|
| + __m128i sr1 = _mm_slli_epi32(src_pixel1,(24 - SK_R32_SHIFT));
|
| sr1 = _mm_srli_epi32(sr1, 24);
|
| - __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
|
| + __m128i sr2 = _mm_slli_epi32(src_pixel2,(24 - SK_R32_SHIFT));
|
| sr2 = _mm_srli_epi32(sr2, 24);
|
| __m128i sr = _mm_packs_epi32(sr1, sr2);
|
|
|
| // Extract G from src.
|
| - __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
|
| + __m128i sg1 = _mm_slli_epi32(src_pixel1,(24 - SK_G32_SHIFT));
|
| sg1 = _mm_srli_epi32(sg1, 24);
|
| - __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
|
| + __m128i sg2 = _mm_slli_epi32(src_pixel2,(24 - SK_G32_SHIFT));
|
| sg2 = _mm_srli_epi32(sg2, 24);
|
| __m128i sg = _mm_packs_epi32(sg1, sg2);
|
|
|
| // Extract B from src.
|
| - __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
|
| + __m128i sb1 = _mm_slli_epi32(src_pixel1,(24 - SK_B32_SHIFT));
|
| sb1 = _mm_srli_epi32(sb1, 24);
|
| - __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
|
| + __m128i sb2 = _mm_slli_epi32(src_pixel2,(24 - SK_B32_SHIFT));
|
| sb2 = _mm_srli_epi32(sb2, 24);
|
| __m128i sb = _mm_packs_epi32(sb1, sb2);
|
|
|
| // Extract R G B from dst.
|
| - __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
|
| + __m128i dr = _mm_srli_epi16(dst_pixel,SK_R16_SHIFT);
|
| dr = _mm_and_si128(dr, r16_mask);
|
| - __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
|
| + __m128i dg = _mm_srli_epi16(dst_pixel,SK_G16_SHIFT);
|
| dg = _mm_and_si128(dg, g16_mask);
|
| - __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
|
| + __m128i db = _mm_srli_epi16(dst_pixel,SK_B16_SHIFT);
|
| db = _mm_and_si128(db, b16_mask);
|
|
|
| __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa
|
|
|
| // Calculate R G B of result.
|
| // Original algorithm is in SkSrcOver32To16().
|
| - dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE2(dr, isa, SK_R16_BITS));
|
| + dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE(dr, isa, SK_R16_BITS));
|
| dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS);
|
| - dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE2(dg, isa, SK_G16_BITS));
|
| + dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE(dg, isa, SK_G16_BITS));
|
| dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS);
|
| - db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE2(db, isa, SK_B16_BITS));
|
| + db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE(db, isa, SK_B16_BITS));
|
| db = _mm_srli_epi16(db, 8 - SK_B16_BITS);
|
|
|
| // Pack R G B into 16-bit color.
|
| - __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
|
| + __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db);
|
|
|
| // Store 8 16-bit colors in dst.
|
| _mm_store_si128(d++, d_pixel);
|
| @@ -1143,7 +1143,7 @@
|
| sb = _mm_srli_epi16(sb, SK_B32_BITS - SK_B16_BITS);
|
|
|
| // Pack and store 16-bit dst pixel.
|
| - __m128i d_pixel = SkPackRGB16_SSE2(sr, sg, sb);
|
| + __m128i d_pixel = SkPackRGB16_SSE(sr, sg, sb);
|
| _mm_store_si128(d++, d_pixel);
|
|
|
| count -= 8;
|
| @@ -1242,9 +1242,9 @@
|
| __m128i dst_pixel = _mm_load_si128(d);
|
|
|
| // Extract A from src.
|
| - __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
|
| + __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT));
|
| sa1 = _mm_srli_epi32(sa1, 24);
|
| - __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
|
| + __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT));
|
| sa2 = _mm_srli_epi32(sa2, 24);
|
| __m128i sa = _mm_packs_epi32(sa1, sa2);
|
|
|
| @@ -1323,7 +1323,7 @@
|
| db = _mm_srli_epi16(db, 5);
|
|
|
| // Package and store dst pixel.
|
| - __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
|
| + __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db);
|
| _mm_store_si128(d++, d_pixel);
|
|
|
| count -= 8;
|
|
|