Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(451)

Side by Side Diff: src/opts/SkBlurImage_opts_SSE2.cpp

Issue 281963002: Add SSE4 check to BlurImage optimization. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Checks for SSE 4.1 Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The Android Open Source Project 2 * Copyright 2013 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmap.h" 9 #include "SkBitmap.h"
10 #include "SkBlurImage_opts_SSE2.h" 10 #include "SkBlurImage_opts_SSE2.h"
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 __m128i sum = zero; 48 __m128i sum = zero;
49 const SkPMColor* p = src; 49 const SkPMColor* p = src;
50 for (int i = 0; i < rightBorder; ++i) { 50 for (int i = 0; i < rightBorder; ++i) {
51 sum = _mm_add_epi32(sum, expand(*p)); 51 sum = _mm_add_epi32(sum, expand(*p));
52 p += srcStrideX; 52 p += srcStrideX;
53 } 53 }
54 54
55 const SkPMColor* sptr = src; 55 const SkPMColor* sptr = src;
56 SkColor* dptr = dst; 56 SkColor* dptr = dst;
57 for (int x = 0; x < width; ++x) { 57 for (int x = 0; x < width; ++x) {
58 #if 0 58 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
59 // In SSE4.1, this would be 59 // In SSE4.1, this would be
60 __m128i result = _mm_mullo_epi32(sum, scale); 60 __m128i result = _mm_mullo_epi32(sum, scale);
61 #else 61 #else
62 // But SSE2 has no PMULLUD, so we must do AG and RB separately. 62 // But SSE2 has no PMULLUD, so we must do AG and RB separately.
63 __m128i tmp1 = _mm_mul_epu32(sum, scale); 63 __m128i tmp1 = _mm_mul_epu32(sum, scale);
64 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4), 64 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
65 _mm_srli_si128(scale, 4)); 65 _mm_srli_si128(scale, 4));
66 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)), 66 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)),
67 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0))); 67 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0)));
68 #endif 68 #endif
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
103 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX, 103 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
104 SkBoxBlurProc* boxBlurY, 104 SkBoxBlurProc* boxBlurY,
105 SkBoxBlurProc* boxBlurXY, 105 SkBoxBlurProc* boxBlurXY,
106 SkBoxBlurProc* boxBlurYX) { 106 SkBoxBlurProc* boxBlurYX) {
107 *boxBlurX = SkBoxBlur_SSE2<kX, kX>; 107 *boxBlurX = SkBoxBlur_SSE2<kX, kX>;
108 *boxBlurY = SkBoxBlur_SSE2<kY, kY>; 108 *boxBlurY = SkBoxBlur_SSE2<kY, kY>;
109 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>; 109 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;
110 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>; 110 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;
111 return true; 111 return true;
112 } 112 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698