Index: src/opts/SkBlitRect_opts_SSE2.cpp |
diff --git a/src/opts/SkBlitRect_opts_SSE2.cpp b/src/opts/SkBlitRect_opts_SSE2.cpp |
deleted file mode 100644 |
index d65a313dadf5b997c64172e272dff70ac2f16ba0..0000000000000000000000000000000000000000 |
--- a/src/opts/SkBlitRect_opts_SSE2.cpp |
+++ /dev/null |
@@ -1,132 +0,0 @@ |
-/* |
- * Copyright 2011 Google Inc. |
- * |
- * Use of this source code is governed by a BSD-style license that can be |
- * found in the LICENSE file. |
- */ |
- |
-#include <emmintrin.h> |
-#include "SkBlitRect_opts_SSE2.h" |
-#include "SkBlitRow.h" |
-#include "SkColorPriv.h" |
- |
-/* Simple blitting of opaque rectangles less than 31 pixels wide: |
- * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2. |
- */ |
-static void BlitRect32_OpaqueNarrow_SSE2(SkPMColor* SK_RESTRICT destination, |
- int width, int height, |
- size_t rowBytes, uint32_t color) { |
- SkASSERT(255 == SkGetPackedA32(color)); |
- SkASSERT(width > 0); |
- SkASSERT(width < 31); |
- |
- while (--height >= 0) { |
- SkPMColor* dst = destination; |
- int count = width; |
- |
- while (count > 4) { |
- *dst++ = color; |
- *dst++ = color; |
- *dst++ = color; |
- *dst++ = color; |
- count -= 4; |
- } |
- |
- while (count > 0) { |
- *dst++ = color; |
- --count; |
- } |
- |
- destination = (uint32_t*)((char*)destination + rowBytes); |
- } |
-} |
- |
-/* |
- * Fast blitting of opaque rectangles at least 31 pixels wide: |
- * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2. |
- * A 31 pixel rectangle is guaranteed to have at least one |
- * 16-pixel aligned span that can take advantage of mm_store. |
- */ |
-static void BlitRect32_OpaqueWide_SSE2(SkPMColor* SK_RESTRICT destination, |
- int width, int height, |
- size_t rowBytes, uint32_t color) { |
- SkASSERT(255 == SkGetPackedA32(color)); |
- SkASSERT(width >= 31); |
- |
- __m128i color_wide = _mm_set1_epi32(color); |
- while (--height >= 0) { |
- // Prefetching one row ahead to L1 cache can equal hardware |
- // performance for large/tall rects, but never *beats* |
- // hardware performance. |
- SkPMColor* dst = destination; |
- int count = width; |
- |
- while (((size_t)dst) & 0x0F) { |
- *dst++ = color; |
- --count; |
- } |
- __m128i *d = reinterpret_cast<__m128i*>(dst); |
- |
- // Googling suggests _mm_stream is only going to beat _mm_store |
- // for things that wouldn't fit in L2 cache anyway, typically |
- // >500kB, and precisely fill cache lines. For us, with |
- // arrays > 100k elements _mm_stream is still 100%+ slower than |
- // mm_store. |
- |
- // Unrolling to count >= 64 is a break-even for most |
- // input patterns; we seem to be saturating the bus and having |
- // low enough overhead at 32. |
- |
- while (count >= 32) { |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- count -= 32; |
- } |
- if (count >= 16) { |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- _mm_store_si128(d++, color_wide); |
- count -= 16; |
- } |
- dst = reinterpret_cast<uint32_t*>(d); |
- |
- // Unrolling the loop in the Narrow code is a significant performance |
- // gain, but unrolling this loop appears to make no difference in |
- // benchmarks with either mm_store_si128 or individual sets. |
- |
- while (count > 0) { |
- *dst++ = color; |
- --count; |
- } |
- |
- destination = (uint32_t*)((char*)destination + rowBytes); |
- } |
-} |
- |
-void ColorRect32_SSE2(SkPMColor* destination, |
- int width, int height, |
- size_t rowBytes, uint32_t color) { |
- if (0 == height || 0 == width || 0 == color) { |
- return; |
- } |
- unsigned colorA = SkGetPackedA32(color); |
- colorA = 0; // skip below if () for now...(has been disabled since this was added in r3423). |
- if (255 == colorA) { |
- if (width < 31) { |
- BlitRect32_OpaqueNarrow_SSE2(destination, width, height, |
- rowBytes, color); |
- } else { |
- BlitRect32_OpaqueWide_SSE2(destination, width, height, |
- rowBytes, color); |
- } |
- } else { |
- SkBlitRow::ColorRect32(destination, width, height, rowBytes, color); |
- } |
-} |