OLD | NEW |
1 | 1 |
2 /* | 2 /* |
3 * Copyright 2009 The Android Open Source Project | 3 * Copyright 2009 The Android Open Source Project |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
7 */ | 7 */ |
8 | 8 |
9 | 9 |
10 #include <emmintrin.h> | 10 #include <emmintrin.h> |
11 #include "SkBitmapProcState_opts_SSE2.h" | 11 #include "SkBitmapProcState_opts_SSE2.h" |
| 12 #include "SkColorPriv.h" |
12 #include "SkPaint.h" | 13 #include "SkPaint.h" |
13 #include "SkUtils.h" | 14 #include "SkUtils.h" |
14 | 15 |
15 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s, | 16 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s, |
16 const uint32_t* xy, | 17 const uint32_t* xy, |
17 int count, uint32_t* colors) { | 18 int count, uint32_t* colors) { |
18 SkASSERT(count > 0 && colors != NULL); | 19 SkASSERT(count > 0 && colors != NULL); |
19 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); | 20 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); |
20 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); | 21 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); |
21 SkASSERT(s.fAlphaScale == 256); | 22 SkASSERT(s.fAlphaScale == 256); |
(...skipping 610 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
632 fx += dx; | 633 fx += dx; |
633 fy += dy; | 634 fy += dy; |
634 } | 635 } |
635 } | 636 } |
636 | 637 |
637 /* SSE version of S32_D16_filter_DX_SSE2 | 638 /* SSE version of S32_D16_filter_DX_SSE2 |
638 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS
tate.cpp | 639 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS
tate.cpp |
639 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16 | 640 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16 |
640 */ | 641 */ |
641 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s, | 642 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s, |
642 const uint32_t* xy, | 643 const uint32_t* xy, |
643 int count, uint16_t* colors) { | 644 int count, uint16_t* colors) { |
644 SkASSERT(count > 0 && colors != NULL); | 645 SkASSERT(count > 0 && colors != NULL); |
645 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); | 646 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); |
646 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); | 647 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); |
647 SkASSERT(s.fBitmap->isOpaque()); | 648 SkASSERT(s.fBitmap->isOpaque()); |
648 | 649 |
649 SkPMColor dstColor; | 650 SkPMColor dstColor; |
650 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels()); | 651 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels()); |
651 size_t rb = s.fBitmap->rowBytes(); | 652 size_t rb = s.fBitmap->rowBytes(); |
652 uint32_t XY = *xy++; | 653 uint32_t XY = *xy++; |
653 unsigned y0 = XY >> 14; | 654 unsigned y0 = XY >> 14; |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
737 | 738 |
738 // Divide each 16 bit component by 256. | 739 // Divide each 16 bit component by 256. |
739 sum = _mm_srli_epi16(sum, 8); | 740 sum = _mm_srli_epi16(sum, 8); |
740 | 741 |
741 // Pack lower 4 16 bit values of sum into lower 4 bytes. | 742 // Pack lower 4 16 bit values of sum into lower 4 bytes. |
742 sum = _mm_packus_epi16(sum, zero); | 743 sum = _mm_packus_epi16(sum, zero); |
743 | 744 |
744 // Extract low int and store. | 745 // Extract low int and store. |
745 dstColor = _mm_cvtsi128_si32(sum); | 746 dstColor = _mm_cvtsi128_si32(sum); |
746 | 747 |
747 //*colors++ = SkPixel32ToPixel16(dstColor); | 748 *colors++ = SkPixel32ToPixel16(dstColor); |
748 // below is much faster than the above. It's tested for Android benchmar
k--Softweg | |
749 __m128i _m_temp1 = _mm_set1_epi32(dstColor); | |
750 __m128i _m_temp2 = _mm_srli_epi32(_m_temp1, 3); | |
751 | |
752 unsigned int r32 = _mm_cvtsi128_si32(_m_temp2); | |
753 unsigned r = (r32 & ((1<<5) -1)) << 11; | |
754 | |
755 _m_temp2 = _mm_srli_epi32(_m_temp2, 7); | |
756 unsigned int g32 = _mm_cvtsi128_si32(_m_temp2); | |
757 unsigned g = (g32 & ((1<<6) -1)) << 5; | |
758 | |
759 _m_temp2 = _mm_srli_epi32(_m_temp2, 9); | |
760 unsigned int b32 = _mm_cvtsi128_si32(_m_temp2); | |
761 unsigned b = (b32 & ((1<<5) -1)); | |
762 | |
763 *colors++ = r | g | b; | |
764 | |
765 } while (--count > 0); | 749 } while (--count > 0); |
766 } | 750 } |
OLD | NEW |