Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: src/opts/SkBitmapFilter_opts_SSE2.cpp

Issue 525283002: Enable highQualityFilter_SSE2 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fix mac gm test Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/opts/opts_check_x86.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 Google Inc. 2 * Copyright 2013 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmap.h" 9 #include "SkBitmap.h"
10 #include "SkBitmapFilter_opts_SSE2.h" 10 #include "SkBitmapFilter_opts_SSE2.h"
(...skipping 28 matching lines...) Expand all
39 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]); 39 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]);
40 } 40 }
41 #endif 41 #endif
42 42
43 // because the border is handled specially, this is guaranteed to have all 16 pi xels 43 // because the border is handled specially, this is guaranteed to have all 16 pi xels
44 // available to it without running off the bitmap's edge. 44 // available to it without running off the bitmap's edge.
45 45
46 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, 46 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
47 SkPMColor* SK_RESTRICT colors, int count) { 47 SkPMColor* SK_RESTRICT colors, int count) {
48 48
49 const int maxX = s.fBitmap->width() - 1; 49 const int maxX = s.fBitmap->width();
50 const int maxY = s.fBitmap->height() - 1; 50 const int maxY = s.fBitmap->height();
51 SkAutoTMalloc<SkScalar> xWeights(maxX);
51 52
52 while (count-- > 0) { 53 while (count-- > 0) {
53 SkPoint srcPt; 54 SkPoint srcPt;
54 s.fInvProc(s.fInvMatrix, SkIntToScalar(x), 55 s.fInvProc(s.fInvMatrix, x + 0.5f, y + 0.5f, &srcPt);
55 SkIntToScalar(y), &srcPt);
56 srcPt.fX -= SK_ScalarHalf; 56 srcPt.fX -= SK_ScalarHalf;
57 srcPt.fY -= SK_ScalarHalf; 57 srcPt.fY -= SK_ScalarHalf;
58 58
59 int sx = SkScalarFloorToInt(srcPt.fX);
60 int sy = SkScalarFloorToInt(srcPt.fY);
61
62 __m128 weight = _mm_setzero_ps(); 59 __m128 weight = _mm_setzero_ps();
63 __m128 accum = _mm_setzero_ps(); 60 __m128 accum = _mm_setzero_ps();
64 61
65 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); 62 int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->widt h()), maxY);
66 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f)) ); 63 int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->wid th()+1), maxY);
67 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); 64 int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->widt h()), maxX);
68 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)) ); 65 int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->wid th())+1, maxX);
69 66
70 for (int src_y = y0; src_y <= y1; src_y++) { 67 for (int srcX = x0; srcX < x1 ; srcX++) {
71 float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(sr cPt.fY - src_y)); 68 // Looking these up once instead of each loop is a ~15% speedup.
69 xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
70 }
72 71
73 for (int src_x = x0; src_x <= x1 ; src_x++) { 72 for (int srcY = y0; srcY < y1; srcY++) {
74 float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScala r(srcPt.fX - src_x)); 73 SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - src Y));
75 74
76 float combined_weight = xweight * yweight; 75 for (int srcX = x0; srcX < x1 ; srcX++) {
76 SkScalar xWeight = xWeights[srcX - x0];
77 77
78 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); 78 SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
79 79
80 __m128i c = _mm_cvtsi32_si128( color ); 80 SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY);
81
82 __m128i c = _mm_cvtsi32_si128(color);
81 c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); 83 c = _mm_unpacklo_epi8(c, _mm_setzero_si128());
82 c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); 84 c = _mm_unpacklo_epi16(c, _mm_setzero_si128());
83 85 __m128 cfloat = _mm_cvtepi32_ps(c);
84 __m128 cfloat = _mm_cvtepi32_ps( c );
85 86
86 __m128 weightVector = _mm_set1_ps(combined_weight); 87 __m128 weightVector = _mm_set1_ps(combined_weight);
87
88 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); 88 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector));
89 weight = _mm_add_ps( weight, weightVector ); 89 weight = _mm_add_ps( weight, weightVector );
90 } 90 }
91 } 91 }
92 92
93 accum = _mm_div_ps(accum, weight); 93 accum = _mm_div_ps(accum, weight);
94 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); 94 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f));
95 __m128i accumInt = _mm_cvttps_epi32(accum);
96 accumInt = _mm_packs_epi32(accumInt, _mm_setzero_si128());
97 accumInt = _mm_packus_epi16(accumInt, _mm_setzero_si128());
98 SkPMColor c = _mm_cvtsi128_si32(accumInt);
95 99
96 __m128i accumInt = _mm_cvtps_epi32( accum ); 100 int a = SkClampMax(SkGetPackedA32(c), 255);
97 101 int r = SkClampMax(SkGetPackedR32(c), a);
98 int localResult[4]; 102 int g = SkClampMax(SkGetPackedG32(c), a);
99 _mm_storeu_si128((__m128i *) (localResult), accumInt); 103 int b = SkClampMax(SkGetPackedB32(c), a);
100 int a = SkClampMax(localResult[0], 255);
101 int r = SkClampMax(localResult[1], a);
102 int g = SkClampMax(localResult[2], a);
103 int b = SkClampMax(localResult[3], a);
104 104
105 *colors++ = SkPackARGB32(a, r, g, b); 105 *colors++ = SkPackARGB32(a, r, g, b);
106 106
107 x++; 107 x++;
108 } 108 }
109 } 109 }
110 110
111 // Convolves horizontally along a single row. The row data is given in 111 // Convolves horizontally along a single row. The row data is given in
112 // |src_data| and continues for the num_values() of the filter. 112 // |src_data| and continues for the num_values() of the filter.
113 void convolveHorizontally_SSE2(const unsigned char* src_data, 113 void convolveHorizontally_SSE2(const unsigned char* src_data,
(...skipping 439 matching lines...) Expand 10 before | Expand all | Expand 10 after
553 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { 553 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) {
554 // Padding |paddingCount| of more dummy coefficients after the coefficients 554 // Padding |paddingCount| of more dummy coefficients after the coefficients
555 // of last filter to prevent SIMD instructions which load 8 or 16 bytes 555 // of last filter to prevent SIMD instructions which load 8 or 16 bytes
556 // together to access invalid memory areas. We are not trying to align the 556 // together to access invalid memory areas. We are not trying to align the
557 // coefficients right now due to the opaqueness of <vector> implementation. 557 // coefficients right now due to the opaqueness of <vector> implementation.
558 // This has to be done after all |AddFilter| calls. 558 // This has to be done after all |AddFilter| calls.
559 for (int i = 0; i < 8; ++i) { 559 for (int i = 0; i < 8; ++i) {
560 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix ed>(0)); 560 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix ed>(0));
561 } 561 }
562 } 562 }
OLDNEW
« no previous file with comments | « no previous file | src/opts/opts_check_x86.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698