| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 | 8 |
| 9 #include "SkBlitRow_opts_SSE2.h" | 9 #include "SkBlitRow_opts_SSE2.h" |
| 10 #include "SkBitmapProcState_opts_SSE2.h" | 10 #include "SkBitmapProcState_opts_SSE2.h" |
| (...skipping 895 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 906 // Calculate result b. | 906 // Calculate result b. |
| 907 __m128i b1 = _mm_srli_epi32(src_pixel1, | 907 __m128i b1 = _mm_srli_epi32(src_pixel1, |
| 908 SK_B32_SHIFT + (8 - SK_B16_BITS)); | 908 SK_B32_SHIFT + (8 - SK_B16_BITS)); |
| 909 b1 = _mm_and_si128(b1, b16_mask); | 909 b1 = _mm_and_si128(b1, b16_mask); |
| 910 __m128i b2 = _mm_srli_epi32(src_pixel2, | 910 __m128i b2 = _mm_srli_epi32(src_pixel2, |
| 911 SK_B32_SHIFT + (8 - SK_B16_BITS)); | 911 SK_B32_SHIFT + (8 - SK_B16_BITS)); |
| 912 b2 = _mm_and_si128(b2, b16_mask); | 912 b2 = _mm_and_si128(b2, b16_mask); |
| 913 __m128i b = _mm_packs_epi32(b1, b2); | 913 __m128i b = _mm_packs_epi32(b1, b2); |
| 914 | 914 |
| 915 // Store 8 16-bit colors in dst. | 915 // Store 8 16-bit colors in dst. |
| 916 __m128i d_pixel = SkPackRGB16_SSE(r, g, b); | 916 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); |
| 917 _mm_store_si128(d++, d_pixel); | 917 _mm_store_si128(d++, d_pixel); |
| 918 count -= 8; | 918 count -= 8; |
| 919 } | 919 } |
| 920 src = reinterpret_cast<const SkPMColor*>(s); | 920 src = reinterpret_cast<const SkPMColor*>(s); |
| 921 dst = reinterpret_cast<uint16_t*>(d); | 921 dst = reinterpret_cast<uint16_t*>(d); |
| 922 } | 922 } |
| 923 | 923 |
| 924 if (count > 0) { | 924 if (count > 0) { |
| 925 do { | 925 do { |
| 926 SkPMColor c = *src++; | 926 SkPMColor c = *src++; |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 975 if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) { | 975 if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) { |
| 976 d++; | 976 d++; |
| 977 count -= 8; | 977 count -= 8; |
| 978 continue; | 978 continue; |
| 979 } | 979 } |
| 980 | 980 |
| 981 // Load 8 pixels of dst. | 981 // Load 8 pixels of dst. |
| 982 __m128i dst_pixel = _mm_load_si128(d); | 982 __m128i dst_pixel = _mm_load_si128(d); |
| 983 | 983 |
| 984 // Extract A from src. | 984 // Extract A from src. |
| 985 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT)); | 985 __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT)); |
| 986 sa1 = _mm_srli_epi32(sa1, 24); | 986 sa1 = _mm_srli_epi32(sa1, 24); |
| 987 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT)); | 987 __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT)); |
| 988 sa2 = _mm_srli_epi32(sa2, 24); | 988 sa2 = _mm_srli_epi32(sa2, 24); |
| 989 __m128i sa = _mm_packs_epi32(sa1, sa2); | 989 __m128i sa = _mm_packs_epi32(sa1, sa2); |
| 990 | 990 |
| 991 // Extract R from src. | 991 // Extract R from src. |
| 992 __m128i sr1 = _mm_slli_epi32(src_pixel1,(24 - SK_R32_SHIFT)); | 992 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT)); |
| 993 sr1 = _mm_srli_epi32(sr1, 24); | 993 sr1 = _mm_srli_epi32(sr1, 24); |
| 994 __m128i sr2 = _mm_slli_epi32(src_pixel2,(24 - SK_R32_SHIFT)); | 994 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT)); |
| 995 sr2 = _mm_srli_epi32(sr2, 24); | 995 sr2 = _mm_srli_epi32(sr2, 24); |
| 996 __m128i sr = _mm_packs_epi32(sr1, sr2); | 996 __m128i sr = _mm_packs_epi32(sr1, sr2); |
| 997 | 997 |
| 998 // Extract G from src. | 998 // Extract G from src. |
| 999 __m128i sg1 = _mm_slli_epi32(src_pixel1,(24 - SK_G32_SHIFT)); | 999 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT)); |
| 1000 sg1 = _mm_srli_epi32(sg1, 24); | 1000 sg1 = _mm_srli_epi32(sg1, 24); |
| 1001 __m128i sg2 = _mm_slli_epi32(src_pixel2,(24 - SK_G32_SHIFT)); | 1001 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT)); |
| 1002 sg2 = _mm_srli_epi32(sg2, 24); | 1002 sg2 = _mm_srli_epi32(sg2, 24); |
| 1003 __m128i sg = _mm_packs_epi32(sg1, sg2); | 1003 __m128i sg = _mm_packs_epi32(sg1, sg2); |
| 1004 | 1004 |
| 1005 // Extract B from src. | 1005 // Extract B from src. |
| 1006 __m128i sb1 = _mm_slli_epi32(src_pixel1,(24 - SK_B32_SHIFT)); | 1006 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT)); |
| 1007 sb1 = _mm_srli_epi32(sb1, 24); | 1007 sb1 = _mm_srli_epi32(sb1, 24); |
| 1008 __m128i sb2 = _mm_slli_epi32(src_pixel2,(24 - SK_B32_SHIFT)); | 1008 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT)); |
| 1009 sb2 = _mm_srli_epi32(sb2, 24); | 1009 sb2 = _mm_srli_epi32(sb2, 24); |
| 1010 __m128i sb = _mm_packs_epi32(sb1, sb2); | 1010 __m128i sb = _mm_packs_epi32(sb1, sb2); |
| 1011 | 1011 |
| 1012 // Extract R G B from dst. | 1012 // Extract R G B from dst. |
| 1013 __m128i dr = _mm_srli_epi16(dst_pixel,SK_R16_SHIFT); | 1013 __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT); |
| 1014 dr = _mm_and_si128(dr, r16_mask); | 1014 dr = _mm_and_si128(dr, r16_mask); |
| 1015 __m128i dg = _mm_srli_epi16(dst_pixel,SK_G16_SHIFT); | 1015 __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT); |
| 1016 dg = _mm_and_si128(dg, g16_mask); | 1016 dg = _mm_and_si128(dg, g16_mask); |
| 1017 __m128i db = _mm_srli_epi16(dst_pixel,SK_B16_SHIFT); | 1017 __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT); |
| 1018 db = _mm_and_si128(db, b16_mask); | 1018 db = _mm_and_si128(db, b16_mask); |
| 1019 | 1019 |
| 1020 __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa | 1020 __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa |
| 1021 | 1021 |
| 1022 // Calculate R G B of result. | 1022 // Calculate R G B of result. |
| 1023 // Original algorithm is in SkSrcOver32To16(). | 1023 // Original algorithm is in SkSrcOver32To16(). |
| 1024 dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE(dr, isa, SK_R16_BITS)); | 1024 dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE2(dr, isa, SK_R16_BITS))
; |
| 1025 dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS); | 1025 dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS); |
| 1026 dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE(dg, isa, SK_G16_BITS)); | 1026 dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE2(dg, isa, SK_G16_BITS))
; |
| 1027 dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS); | 1027 dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS); |
| 1028 db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE(db, isa, SK_B16_BITS)); | 1028 db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE2(db, isa, SK_B16_BITS))
; |
| 1029 db = _mm_srli_epi16(db, 8 - SK_B16_BITS); | 1029 db = _mm_srli_epi16(db, 8 - SK_B16_BITS); |
| 1030 | 1030 |
| 1031 // Pack R G B into 16-bit color. | 1031 // Pack R G B into 16-bit color. |
| 1032 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db); | 1032 __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db); |
| 1033 | 1033 |
| 1034 // Store 8 16-bit colors in dst. | 1034 // Store 8 16-bit colors in dst. |
| 1035 _mm_store_si128(d++, d_pixel); | 1035 _mm_store_si128(d++, d_pixel); |
| 1036 count -= 8; | 1036 count -= 8; |
| 1037 } | 1037 } |
| 1038 | 1038 |
| 1039 src = reinterpret_cast<const SkPMColor*>(s); | 1039 src = reinterpret_cast<const SkPMColor*>(s); |
| 1040 dst = reinterpret_cast<uint16_t*>(d); | 1040 dst = reinterpret_cast<uint16_t*>(d); |
| 1041 } | 1041 } |
| 1042 | 1042 |
| 1043 if (count > 0) { | 1043 if (count > 0) { |
| 1044 do { | 1044 do { |
| 1045 SkPMColor c = *src++; | 1045 SkPMColor c = *src++; |
| 1046 SkPMColorAssert(c); | 1046 SkPMColorAssert(c); |
| 1047 if (c) { | 1047 if (c) { |
| 1048 *dst = SkSrcOver32To16(c, *dst); | 1048 *dst = SkSrcOver32To16(c, *dst); |
| 1049 } | 1049 } |
| 1050 dst += 1; | 1050 dst += 1; |
| 1051 } while (--count != 0); | 1051 } while (--count != 0); |
| 1052 } | 1052 } |
| 1053 } | 1053 } |
| OLD | NEW |