| OLD | NEW |
| 1 | 1 |
| 2 /* | 2 /* |
| 3 * Copyright 2009 The Android Open Source Project | 3 * Copyright 2009 The Android Open Source Project |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license that can be | 5 * Use of this source code is governed by a BSD-style license that can be |
| 6 * found in the LICENSE file. | 6 * found in the LICENSE file. |
| 7 */ | 7 */ |
| 8 | 8 |
| 9 | 9 |
| 10 #include <emmintrin.h> | 10 #include <emmintrin.h> |
| 11 #include "SkBitmapProcState_opts_SSE2.h" | 11 #include "SkBitmapProcState_opts_SSE2.h" |
| 12 #include "SkColorPriv.h" |
| 12 #include "SkPaint.h" | 13 #include "SkPaint.h" |
| 13 #include "SkUtils.h" | 14 #include "SkUtils.h" |
| 14 | 15 |
| 15 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s, | 16 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s, |
| 16 const uint32_t* xy, | 17 const uint32_t* xy, |
| 17 int count, uint32_t* colors) { | 18 int count, uint32_t* colors) { |
| 18 SkASSERT(count > 0 && colors != NULL); | 19 SkASSERT(count > 0 && colors != NULL); |
| 19 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); | 20 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); |
| 20 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); | 21 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); |
| 21 SkASSERT(s.fAlphaScale == 256); | 22 SkASSERT(s.fAlphaScale == 256); |
| (...skipping 610 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 632 fx += dx; | 633 fx += dx; |
| 633 fy += dy; | 634 fy += dy; |
| 634 } | 635 } |
| 635 } | 636 } |
| 636 | 637 |
| 637 /* SSE version of S32_D16_filter_DX_SSE2 | 638 /* SSE version of S32_D16_filter_DX_SSE2 |
| 638 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS
tate.cpp | 639 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS
tate.cpp |
| 639 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16 | 640 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16 |
| 640 */ | 641 */ |
| 641 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s, | 642 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s, |
| 642 const uint32_t* xy, | 643 const uint32_t* xy, |
| 643 int count, uint16_t* colors) { | 644 int count, uint16_t* colors) { |
| 644 SkASSERT(count > 0 && colors != NULL); | 645 SkASSERT(count > 0 && colors != NULL); |
| 645 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); | 646 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); |
| 646 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); | 647 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); |
| 647 SkASSERT(s.fBitmap->isOpaque()); | 648 SkASSERT(s.fBitmap->isOpaque()); |
| 648 | 649 |
| 649 SkPMColor dstColor; | 650 SkPMColor dstColor; |
| 650 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels()); | 651 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels()); |
| 651 size_t rb = s.fBitmap->rowBytes(); | 652 size_t rb = s.fBitmap->rowBytes(); |
| 652 uint32_t XY = *xy++; | 653 uint32_t XY = *xy++; |
| 653 unsigned y0 = XY >> 14; | 654 unsigned y0 = XY >> 14; |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 737 | 738 |
| 738 // Divide each 16 bit component by 256. | 739 // Divide each 16 bit component by 256. |
| 739 sum = _mm_srli_epi16(sum, 8); | 740 sum = _mm_srli_epi16(sum, 8); |
| 740 | 741 |
| 741 // Pack lower 4 16 bit values of sum into lower 4 bytes. | 742 // Pack lower 4 16 bit values of sum into lower 4 bytes. |
| 742 sum = _mm_packus_epi16(sum, zero); | 743 sum = _mm_packus_epi16(sum, zero); |
| 743 | 744 |
| 744 // Extract low int and store. | 745 // Extract low int and store. |
| 745 dstColor = _mm_cvtsi128_si32(sum); | 746 dstColor = _mm_cvtsi128_si32(sum); |
| 746 | 747 |
| 747 //*colors++ = SkPixel32ToPixel16(dstColor); | 748 *colors++ = SkPixel32ToPixel16(dstColor); |
| 748 // below is much faster than the above. It's tested for Android benchmar
k--Softweg | |
| 749 __m128i _m_temp1 = _mm_set1_epi32(dstColor); | |
| 750 __m128i _m_temp2 = _mm_srli_epi32(_m_temp1, 3); | |
| 751 | |
| 752 unsigned int r32 = _mm_cvtsi128_si32(_m_temp2); | |
| 753 unsigned r = (r32 & ((1<<5) -1)) << 11; | |
| 754 | |
| 755 _m_temp2 = _mm_srli_epi32(_m_temp2, 7); | |
| 756 unsigned int g32 = _mm_cvtsi128_si32(_m_temp2); | |
| 757 unsigned g = (g32 & ((1<<6) -1)) << 5; | |
| 758 | |
| 759 _m_temp2 = _mm_srli_epi32(_m_temp2, 9); | |
| 760 unsigned int b32 = _mm_cvtsi128_si32(_m_temp2); | |
| 761 unsigned b = (b32 & ((1<<5) -1)); | |
| 762 | |
| 763 *colors++ = r | g | b; | |
| 764 | |
| 765 } while (--count > 0); | 749 } while (--count > 0); |
| 766 } | 750 } |
| OLD | NEW |