Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkBitmapProcState_opts_SSE2.cpp

Issue 239453010: Properly enable S32_D16_filter_DX_SSE2 optimization. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Removed obsolete code Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/opts/opts_check_SSE2.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2009 The Android Open Source Project 3 * Copyright 2009 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #include <emmintrin.h> 10 #include <emmintrin.h>
11 #include "SkBitmapProcState_opts_SSE2.h" 11 #include "SkBitmapProcState_opts_SSE2.h"
12 #include "SkColorPriv.h"
12 #include "SkPaint.h" 13 #include "SkPaint.h"
13 #include "SkUtils.h" 14 #include "SkUtils.h"
14 15
15 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s, 16 void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s,
16 const uint32_t* xy, 17 const uint32_t* xy,
17 int count, uint32_t* colors) { 18 int count, uint32_t* colors) {
18 SkASSERT(count > 0 && colors != NULL); 19 SkASSERT(count > 0 && colors != NULL);
19 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); 20 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel);
20 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); 21 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
21 SkASSERT(s.fAlphaScale == 256); 22 SkASSERT(s.fAlphaScale == 256);
(...skipping 610 matching lines...) Expand 10 before | Expand all | Expand 10 after
632 fx += dx; 633 fx += dx;
633 fy += dy; 634 fy += dy;
634 } 635 }
635 } 636 }
636 637
637 /* SSE version of S32_D16_filter_DX_SSE2 638 /* SSE version of S32_D16_filter_DX_SSE2
638 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS tate.cpp 639 * Definition is in section of "D16 functions for SRC == 8888" in SkBitmapProcS tate.cpp
639 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16 640 * It combines S32_opaque_D32_filter_DX_SSE2 and SkPixel32ToPixel16
640 */ 641 */
641 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s, 642 void S32_D16_filter_DX_SSE2(const SkBitmapProcState& s,
642 const uint32_t* xy, 643 const uint32_t* xy,
643 int count, uint16_t* colors) { 644 int count, uint16_t* colors) {
644 SkASSERT(count > 0 && colors != NULL); 645 SkASSERT(count > 0 && colors != NULL);
645 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel); 646 SkASSERT(s.fFilterLevel != SkPaint::kNone_FilterLevel);
646 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config); 647 SkASSERT(s.fBitmap->config() == SkBitmap::kARGB_8888_Config);
647 SkASSERT(s.fBitmap->isOpaque()); 648 SkASSERT(s.fBitmap->isOpaque());
648 649
649 SkPMColor dstColor; 650 SkPMColor dstColor;
650 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels()); 651 const char* srcAddr = static_cast<const char*>(s.fBitmap->getPixels());
651 size_t rb = s.fBitmap->rowBytes(); 652 size_t rb = s.fBitmap->rowBytes();
652 uint32_t XY = *xy++; 653 uint32_t XY = *xy++;
653 unsigned y0 = XY >> 14; 654 unsigned y0 = XY >> 14;
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
737 738
738 // Divide each 16 bit component by 256. 739 // Divide each 16 bit component by 256.
739 sum = _mm_srli_epi16(sum, 8); 740 sum = _mm_srli_epi16(sum, 8);
740 741
741 // Pack lower 4 16 bit values of sum into lower 4 bytes. 742 // Pack lower 4 16 bit values of sum into lower 4 bytes.
742 sum = _mm_packus_epi16(sum, zero); 743 sum = _mm_packus_epi16(sum, zero);
743 744
744 // Extract low int and store. 745 // Extract low int and store.
745 dstColor = _mm_cvtsi128_si32(sum); 746 dstColor = _mm_cvtsi128_si32(sum);
746 747
747 //*colors++ = SkPixel32ToPixel16(dstColor); 748 *colors++ = SkPixel32ToPixel16(dstColor);
748 // below is much faster than the above. It's tested for Android benchmar k--Softweg
749 __m128i _m_temp1 = _mm_set1_epi32(dstColor);
750 __m128i _m_temp2 = _mm_srli_epi32(_m_temp1, 3);
751
752 unsigned int r32 = _mm_cvtsi128_si32(_m_temp2);
753 unsigned r = (r32 & ((1<<5) -1)) << 11;
754
755 _m_temp2 = _mm_srli_epi32(_m_temp2, 7);
756 unsigned int g32 = _mm_cvtsi128_si32(_m_temp2);
757 unsigned g = (g32 & ((1<<6) -1)) << 5;
758
759 _m_temp2 = _mm_srli_epi32(_m_temp2, 9);
760 unsigned int b32 = _mm_cvtsi128_si32(_m_temp2);
761 unsigned b = (b32 & ((1<<5) -1));
762
763 *colors++ = r | g | b;
764
765 } while (--count > 0); 749 } while (--count > 0);
766 } 750 }
OLDNEW
« no previous file with comments | « no previous file | src/opts/opts_check_SSE2.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698