src/opts/SkBlurImage_opts_SSE2.cpp - Issue 292563005: Undo troublesome SSE 4.1 change for now to unblock Skia -> Chrome roll.

Side by Side Diff: src/opts/SkBlurImage_opts_SSE2.cpp

Issue 292563005: Undo troublesome SSE 4.1 change for now to unblock Skia -> Chrome roll. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2013 The Android Open Source Project	2 * Copyright 2013 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include <emmintrin.h>	8 #include <emmintrin.h>

9 #include "SkBitmap.h"	9 #include "SkBitmap.h"

10 #include "SkBlurImage_opts_SSE2.h"	10 #include "SkBlurImage_opts_SSE2.h"

11 #include "SkColorPriv.h"	11 #include "SkColorPriv.h"

12 #include "SkRect.h"	12 #include "SkRect.h"

13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41

14 #include <smmintrin.h>

15 #endif

16	13

17 namespace {	14 namespace {

18 enum BlurDirection {	15 enum BlurDirection {

19 kX, kY	16 kX, kY

20 };	17 };

21	18

22 /* Helper function to spread the components of a 32-bit integer into the	19 /* Helper function to spread the components of a 32-bit integer into the

23 * lower 8 bits of each 32-bit element of an SSE register.	20 * lower 8 bits of each 32-bit element of an SSE register.

24 */	21 */

25 inline __m128i expand(int a) {	22 inline __m128i expand(int a) {

(...skipping 25 matching lines...) Expand all Loading...
51 __m128i sum = zero;	48 __m128i sum = zero;

52 const SkPMColor* p = src;	49 const SkPMColor* p = src;

53 for (int i = 0; i < rightBorder; ++i) {	50 for (int i = 0; i < rightBorder; ++i) {

54 sum = _mm_add_epi32(sum, expand(*p));	51 sum = _mm_add_epi32(sum, expand(*p));

55 p += srcStrideX;	52 p += srcStrideX;

56 }	53 }

57	54

58 const SkPMColor* sptr = src;	55 const SkPMColor* sptr = src;

59 SkColor* dptr = dst;	56 SkColor* dptr = dst;

60 for (int x = 0; x < width; ++x) {	57 for (int x = 0; x < width; ++x) {

61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41	58 #if 0

62 // In SSE4.1, this would be	59 // In SSE4.1, this would be

63 __m128i result = _mm_mullo_epi32(sum, scale);	60 __m128i result = _mm_mullo_epi32(sum, scale);

64 #else	61 #else

65 // But SSE2 has no PMULLUD, so we must do AG and RB separately.	62 // But SSE2 has no PMULLUD, so we must do AG and RB separately.

66 __m128i tmp1 = _mm_mul_epu32(sum, scale);	63 __m128i tmp1 = _mm_mul_epu32(sum, scale);

67 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),	64 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),

68 _mm_srli_si128(scale, 4));	65 _mm_srli_si128(scale, 4));

69 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)),	66 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUF FLE(0,0,2,0)),

70 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0)));	67 _mm_shuffle_epi32(tmp2, _MM_SHUF FLE(0,0,2,0)));

71 #endif	68 #endif

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,	103 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,

107 SkBoxBlurProc* boxBlurY,	104 SkBoxBlurProc* boxBlurY,

108 SkBoxBlurProc* boxBlurXY,	105 SkBoxBlurProc* boxBlurXY,

109 SkBoxBlurProc* boxBlurYX) {	106 SkBoxBlurProc* boxBlurYX) {

110 *boxBlurX = SkBoxBlur_SSE2<kX, kX>;	107 *boxBlurX = SkBoxBlur_SSE2<kX, kX>;

111 *boxBlurY = SkBoxBlur_SSE2<kY, kY>;	108 *boxBlurY = SkBoxBlur_SSE2<kY, kY>;

112 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;	109 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;

113 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;	110 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;

114 return true;	111 return true;

115 }	112 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »