| Index: src/opts/SkBlitRect_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBlitRect_opts_SSE2.cpp b/src/opts/SkBlitRect_opts_SSE2.cpp
|
| deleted file mode 100644
|
| index d65a313dadf5b997c64172e272dff70ac2f16ba0..0000000000000000000000000000000000000000
|
| --- a/src/opts/SkBlitRect_opts_SSE2.cpp
|
| +++ /dev/null
|
| @@ -1,132 +0,0 @@
|
| -/*
|
| - * Copyright 2011 Google Inc.
|
| - *
|
| - * Use of this source code is governed by a BSD-style license that can be
|
| - * found in the LICENSE file.
|
| - */
|
| -
|
| -#include <emmintrin.h>
|
| -#include "SkBlitRect_opts_SSE2.h"
|
| -#include "SkBlitRow.h"
|
| -#include "SkColorPriv.h"
|
| -
|
| -/* Simple blitting of opaque rectangles less than 31 pixels wide:
|
| - * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
|
| - */
|
| -static void BlitRect32_OpaqueNarrow_SSE2(SkPMColor* SK_RESTRICT destination,
|
| - int width, int height,
|
| - size_t rowBytes, uint32_t color) {
|
| - SkASSERT(255 == SkGetPackedA32(color));
|
| - SkASSERT(width > 0);
|
| - SkASSERT(width < 31);
|
| -
|
| - while (--height >= 0) {
|
| - SkPMColor* dst = destination;
|
| - int count = width;
|
| -
|
| - while (count > 4) {
|
| - *dst++ = color;
|
| - *dst++ = color;
|
| - *dst++ = color;
|
| - *dst++ = color;
|
| - count -= 4;
|
| - }
|
| -
|
| - while (count > 0) {
|
| - *dst++ = color;
|
| - --count;
|
| - }
|
| -
|
| - destination = (uint32_t*)((char*)destination + rowBytes);
|
| - }
|
| -}
|
| -
|
| -/*
|
| - * Fast blitting of opaque rectangles at least 31 pixels wide:
|
| - * inlines and merges sections of Color32_SSE2 and sk_memset32_SSE2.
|
| - * A 31 pixel rectangle is guaranteed to have at least one
|
| - * 16-pixel aligned span that can take advantage of mm_store.
|
| - */
|
| -static void BlitRect32_OpaqueWide_SSE2(SkPMColor* SK_RESTRICT destination,
|
| - int width, int height,
|
| - size_t rowBytes, uint32_t color) {
|
| - SkASSERT(255 == SkGetPackedA32(color));
|
| - SkASSERT(width >= 31);
|
| -
|
| - __m128i color_wide = _mm_set1_epi32(color);
|
| - while (--height >= 0) {
|
| - // Prefetching one row ahead to L1 cache can equal hardware
|
| - // performance for large/tall rects, but never *beats*
|
| - // hardware performance.
|
| - SkPMColor* dst = destination;
|
| - int count = width;
|
| -
|
| - while (((size_t)dst) & 0x0F) {
|
| - *dst++ = color;
|
| - --count;
|
| - }
|
| - __m128i *d = reinterpret_cast<__m128i*>(dst);
|
| -
|
| - // Googling suggests _mm_stream is only going to beat _mm_store
|
| - // for things that wouldn't fit in L2 cache anyway, typically
|
| - // >500kB, and precisely fill cache lines. For us, with
|
| - // arrays > 100k elements _mm_stream is still 100%+ slower than
|
| - // mm_store.
|
| -
|
| - // Unrolling to count >= 64 is a break-even for most
|
| - // input patterns; we seem to be saturating the bus and having
|
| - // low enough overhead at 32.
|
| -
|
| - while (count >= 32) {
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - count -= 32;
|
| - }
|
| - if (count >= 16) {
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - _mm_store_si128(d++, color_wide);
|
| - count -= 16;
|
| - }
|
| - dst = reinterpret_cast<uint32_t*>(d);
|
| -
|
| - // Unrolling the loop in the Narrow code is a significant performance
|
| - // gain, but unrolling this loop appears to make no difference in
|
| - // benchmarks with either mm_store_si128 or individual sets.
|
| -
|
| - while (count > 0) {
|
| - *dst++ = color;
|
| - --count;
|
| - }
|
| -
|
| - destination = (uint32_t*)((char*)destination + rowBytes);
|
| - }
|
| -}
|
| -
|
| -void ColorRect32_SSE2(SkPMColor* destination,
|
| - int width, int height,
|
| - size_t rowBytes, uint32_t color) {
|
| - if (0 == height || 0 == width || 0 == color) {
|
| - return;
|
| - }
|
| - unsigned colorA = SkGetPackedA32(color);
|
| - colorA = 0; // skip below if () for now...(has been disabled since this was added in r3423).
|
| - if (255 == colorA) {
|
| - if (width < 31) {
|
| - BlitRect32_OpaqueNarrow_SSE2(destination, width, height,
|
| - rowBytes, color);
|
| - } else {
|
| - BlitRect32_OpaqueWide_SSE2(destination, width, height,
|
| - rowBytes, color);
|
| - }
|
| - } else {
|
| - SkBlitRow::ColorRect32(destination, width, height, rowBytes, color);
|
| - }
|
| -}
|
|
|