Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1664)

Side by Side Diff: src/opts/SkBlurImage_opts_SSE2.cpp

Issue 366593004: Add SSE4 version of BlurImage optimizations. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: I have to stop optimizing gyp files... Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « gyp/opts.gyp ('k') | src/opts/SkBlurImage_opts_SSE4.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The Android Open Source Project 2 * Copyright 2013 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmap.h" 9 #include "SkBitmap.h"
10 #include "SkBlurImage_opts_SSE2.h" 10 #include "SkBlurImage_opts_SSE2.h"
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 __m128i sum = zero; 48 __m128i sum = zero;
49 const SkPMColor* p = src; 49 const SkPMColor* p = src;
50 for (int i = 0; i < rightBorder; ++i) { 50 for (int i = 0; i < rightBorder; ++i) {
51 sum = _mm_add_epi32(sum, expand(*p)); 51 sum = _mm_add_epi32(sum, expand(*p));
52 p += srcStrideX; 52 p += srcStrideX;
53 } 53 }
54 54
55 const SkPMColor* sptr = src; 55 const SkPMColor* sptr = src;
56 SkColor* dptr = dst; 56 SkColor* dptr = dst;
57 for (int x = 0; x < width; ++x) { 57 for (int x = 0; x < width; ++x) {
58 #if 0 58 // SSE2 has no PMULLUD, so we must do AG and RB separately.
59 // In SSE4.1, this would be
60 __m128i result = _mm_mullo_epi32(sum, scale);
61 #else
62 // But SSE2 has no PMULLUD, so we must do AG and RB separately.
63 __m128i tmp1 = _mm_mul_epu32(sum, scale); 59 __m128i tmp1 = _mm_mul_epu32(sum, scale);
64 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4), 60 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
65 _mm_srli_si128(scale, 4)); 61 _mm_srli_si128(scale, 4));
66 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)), 62 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)),
67 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0))); 63 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0)));
68 #endif 64
69 // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5 65 // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
70 result = _mm_add_epi32(result, half); 66 result = _mm_add_epi32(result, half);
71 67
72 // 0 0 0 A 0 0 0 R 0 0 0 G 0 0 0 B 68 // 0 0 0 A 0 0 0 R 0 0 0 G 0 0 0 B
73 result = _mm_srli_epi32(result, 24); 69 result = _mm_srli_epi32(result, 24);
74 70
75 // 0 0 0 0 0 0 0 0 0 A 0 R 0 G 0 B 71 // 0 0 0 0 0 0 0 0 0 A 0 R 0 G 0 B
76 result = _mm_packs_epi32(result, zero); 72 result = _mm_packs_epi32(result, zero);
77 73
78 // 0 0 0 0 0 0 0 0 0 0 0 0 A R G B 74 // 0 0 0 0 0 0 0 0 0 0 0 0 A R G B
(...skipping 24 matching lines...) Expand all
103 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX, 99 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
104 SkBoxBlurProc* boxBlurY, 100 SkBoxBlurProc* boxBlurY,
105 SkBoxBlurProc* boxBlurXY, 101 SkBoxBlurProc* boxBlurXY,
106 SkBoxBlurProc* boxBlurYX) { 102 SkBoxBlurProc* boxBlurYX) {
107 *boxBlurX = SkBoxBlur_SSE2<kX, kX>; 103 *boxBlurX = SkBoxBlur_SSE2<kX, kX>;
108 *boxBlurY = SkBoxBlur_SSE2<kY, kY>; 104 *boxBlurY = SkBoxBlur_SSE2<kY, kY>;
109 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>; 105 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;
110 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>; 106 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;
111 return true; 107 return true;
112 } 108 }
OLDNEW
« no previous file with comments | « gyp/opts.gyp ('k') | src/opts/SkBlurImage_opts_SSE4.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698