Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: src/opts/SkBitmapFilter_opts_SSE2.cpp

Issue 1187173005: Plumb through out_row byte length so we can assert we stay underneath it. (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBitmapFilter_opts_SSE2.h ('k') | src/opts/SkBitmapProcState_arm_neon.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 Google Inc. 2 * Copyright 2013 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmap.h" 9 #include "SkBitmap.h"
10 #include "SkBitmapFilter_opts_SSE2.h" 10 #include "SkBitmapFilter_opts_SSE2.h"
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 out_row += 4; 167 out_row += 4;
168 } 168 }
169 } 169 }
170 170
171 // Convolves horizontally along four rows. The row data is given in 171 // Convolves horizontally along four rows. The row data is given in
172 // |src_data| and continues for the num_values() of the filter. 172 // |src_data| and continues for the num_values() of the filter.
173 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please 173 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please
174 // refer to that function for detailed comments. 174 // refer to that function for detailed comments.
175 void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4], 175 void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4],
176 const SkConvolutionFilter1D& filter, 176 const SkConvolutionFilter1D& filter,
177 unsigned char* out_row[4]) { 177 unsigned char* out_row[4],
178 size_t outRowBytes) {
179 SkDEBUGCODE(const unsigned char* out_row_0_start = out_row[0];)
180
178 int num_values = filter.numValues(); 181 int num_values = filter.numValues();
179 182
180 int filter_offset, filter_length; 183 int filter_offset, filter_length;
181 __m128i zero = _mm_setzero_si128(); 184 __m128i zero = _mm_setzero_si128();
182 __m128i mask[4]; 185 __m128i mask[4];
183 // |mask| will be used to decimate all extra filter coefficients that are 186 // |mask| will be used to decimate all extra filter coefficients that are
184 // loaded by SIMD when |filter_length| is not divisible by 4. 187 // loaded by SIMD when |filter_length| is not divisible by 4.
185 // mask[0] is not used in following algorithm. 188 // mask[0] is not used in following algorithm.
186 mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1); 189 mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
187 mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1); 190 mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits); 271 accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
269 accum1 = _mm_packs_epi32(accum1, zero); 272 accum1 = _mm_packs_epi32(accum1, zero);
270 accum1 = _mm_packus_epi16(accum1, zero); 273 accum1 = _mm_packus_epi16(accum1, zero);
271 accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits); 274 accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
272 accum2 = _mm_packs_epi32(accum2, zero); 275 accum2 = _mm_packs_epi32(accum2, zero);
273 accum2 = _mm_packus_epi16(accum2, zero); 276 accum2 = _mm_packus_epi16(accum2, zero);
274 accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits); 277 accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
275 accum3 = _mm_packs_epi32(accum3, zero); 278 accum3 = _mm_packs_epi32(accum3, zero);
276 accum3 = _mm_packus_epi16(accum3, zero); 279 accum3 = _mm_packus_epi16(accum3, zero);
277 280
281 // We seem to be running off the edge here (chromium:491660).
282 SkASSERT(((size_t)out_row[0] - (size_t)out_row_0_start) < outRowBytes);
283
278 *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0); 284 *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
279 *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1); 285 *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
280 *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2); 286 *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
281 *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3); 287 *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
282 288
283 out_row[0] += 4; 289 out_row[0] += 4;
284 out_row[1] += 4; 290 out_row[1] += 4;
285 out_row[2] += 4; 291 out_row[2] += 4;
286 out_row[3] += 4; 292 out_row[3] += 4;
287 } 293 }
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
485 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { 491 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) {
486 // Padding |paddingCount| of more dummy coefficients after the coefficients 492 // Padding |paddingCount| of more dummy coefficients after the coefficients
487 // of last filter to prevent SIMD instructions which load 8 or 16 bytes 493 // of last filter to prevent SIMD instructions which load 8 or 16 bytes
488 // together to access invalid memory areas. We are not trying to align the 494 // together to access invalid memory areas. We are not trying to align the
489 // coefficients right now due to the opaqueness of <vector> implementation. 495 // coefficients right now due to the opaqueness of <vector> implementation.
490 // This has to be done after all |AddFilter| calls. 496 // This has to be done after all |AddFilter| calls.
491 for (int i = 0; i < 8; ++i) { 497 for (int i = 0; i < 8; ++i) {
492 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix ed>(0)); 498 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix ed>(0));
493 } 499 }
494 } 500 }
OLDNEW
« no previous file with comments | « src/opts/SkBitmapFilter_opts_SSE2.h ('k') | src/opts/SkBitmapProcState_arm_neon.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698