Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Side by Side Diff: src/opts/Sk4px_SSE2.h

Issue 1230663005: 3-15% speedup to HardLight / Overlay xfermodes. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Add a test that widenLo() | widenHi() == widenLoHi() Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/Sk4px_NEON.h ('k') | src/opts/Sk4px_none.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 namespace { // See Sk4px.h 8 namespace { // See Sk4px.h
9 9
10 inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b(_mm_set1_epi32(px)); } 10 inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b(_mm_set1_epi32(px)); }
(...skipping 13 matching lines...) Expand all
24 inline Sk4px::Wide Sk4px::widenLo() const { 24 inline Sk4px::Wide Sk4px::widenLo() const {
25 return Sk16h(_mm_unpacklo_epi8(this->fVec, _mm_setzero_si128()), 25 return Sk16h(_mm_unpacklo_epi8(this->fVec, _mm_setzero_si128()),
26 _mm_unpackhi_epi8(this->fVec, _mm_setzero_si128())); 26 _mm_unpackhi_epi8(this->fVec, _mm_setzero_si128()));
27 } 27 }
28 28
29 inline Sk4px::Wide Sk4px::widenHi() const { 29 inline Sk4px::Wide Sk4px::widenHi() const {
30 return Sk16h(_mm_unpacklo_epi8(_mm_setzero_si128(), this->fVec), 30 return Sk16h(_mm_unpacklo_epi8(_mm_setzero_si128(), this->fVec),
31 _mm_unpackhi_epi8(_mm_setzero_si128(), this->fVec)); 31 _mm_unpackhi_epi8(_mm_setzero_si128(), this->fVec));
32 } 32 }
33 33
34 inline Sk4px::Wide Sk4px::widenLoHi() const {
35 return Sk16h(_mm_unpacklo_epi8(this->fVec, this->fVec),
36 _mm_unpackhi_epi8(this->fVec, this->fVec));
37 }
38
34 inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const { 39 inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
35 return this->widenLo() * Sk4px(other).widenLo(); 40 return this->widenLo() * Sk4px(other).widenLo();
36 } 41 }
37 42
38 inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const { 43 inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
39 Sk4px::Wide r = (*this + other) >> 8; 44 Sk4px::Wide r = (*this + other) >> 8;
40 return Sk4px(_mm_packus_epi16(r.fLo.fVec, r.fHi.fVec)); 45 return Sk4px(_mm_packus_epi16(r.fLo.fVec, r.fHi.fVec));
41 } 46 }
42 47
43 // Load4Alphas and Load2Alphas use possibly-unaligned loads (SkAlpha[] -> uint16 _t or uint32_t). 48 // Load4Alphas and Load2Alphas use possibly-unaligned loads (SkAlpha[] -> uint16 _t or uint32_t).
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
82 inline Sk4px Sk4px::zeroColors() const { 87 inline Sk4px Sk4px::zeroColors() const {
83 return Sk16b(_mm_and_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fVec) ); 88 return Sk16b(_mm_and_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fVec) );
84 } 89 }
85 90
86 inline Sk4px Sk4px::zeroAlphas() const { 91 inline Sk4px Sk4px::zeroAlphas() const {
87 // andnot(a,b) == ~a & b 92 // andnot(a,b) == ~a & b
88 return Sk16b(_mm_andnot_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fV ec)); 93 return Sk16b(_mm_andnot_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fV ec));
89 } 94 }
90 95
91 } // namespace 96 } // namespace
OLDNEW
« no previous file with comments | « src/opts/Sk4px_NEON.h ('k') | src/opts/Sk4px_none.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698