| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 /* |  | 
| 2  * Copyright 2015 Google Inc. |  | 
| 3  * |  | 
| 4  * Use of this source code is governed by a BSD-style license that can be |  | 
| 5  * found in the LICENSE file. |  | 
| 6  */ |  | 
| 7 |  | 
| 8 #ifndef SkUtils_opts_DEFINED |  | 
| 9 #define SkUtils_opts_DEFINED |  | 
| 10 |  | 
| 11 namespace SK_OPTS_NS { |  | 
| 12 |  | 
| 13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |  | 
| 14 |  | 
| 15 static void memset16(uint16_t* dst, uint16_t val, int n) { |  | 
| 16     auto dst8 = (__m128i*)dst; |  | 
| 17     auto val8 = _mm_set1_epi16(val); |  | 
| 18     for ( ; n >= 8; n -= 8) { |  | 
| 19         _mm_storeu_si128(dst8++, val8); |  | 
| 20     } |  | 
| 21     dst = (uint16_t*)dst8; |  | 
| 22     if (n & 4) { |  | 
| 23         _mm_storel_epi64((__m128i*)dst, val8); |  | 
| 24         dst += 4; |  | 
| 25     } |  | 
| 26     if (n & 2) { |  | 
| 27         *(uint32_t*)dst = _mm_cvtsi128_si32(val8); |  | 
| 28         dst += 2; |  | 
| 29     } |  | 
| 30     if (n & 1) { |  | 
| 31         *dst = val; |  | 
| 32     } |  | 
| 33 } |  | 
| 34 |  | 
| 35 static void memset32(uint32_t* dst, uint32_t val, int n) { |  | 
| 36     auto dst4 = (__m128i*)dst; |  | 
| 37     auto val4 = _mm_set1_epi32(val); |  | 
| 38     for ( ; n >= 4; n -= 4) { |  | 
| 39         _mm_storeu_si128(dst4++, val4); |  | 
| 40     } |  | 
| 41     dst = (uint32_t*)dst4; |  | 
| 42     if (n & 2) { |  | 
| 43         _mm_storel_epi64((__m128i*)dst, val4); |  | 
| 44         dst += 2; |  | 
| 45     } |  | 
| 46     if (n & 1) { |  | 
| 47         *dst = val; |  | 
| 48     } |  | 
| 49 } |  | 
| 50 |  | 
| 51 #elif defined(SK_ARM_HAS_NEON) |  | 
| 52 |  | 
| 53 static void memset16(uint16_t* dst, uint16_t value, int n) { |  | 
| 54     uint16x8_t   v8  = vdupq_n_u16(value); |  | 
| 55     uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; |  | 
| 56 |  | 
| 57     while (n >= 32) { |  | 
| 58         vst4q_u16(dst, v32);  // This swizzles, but we don't care: all lanes are
      the same, value. |  | 
| 59         dst += 32; |  | 
| 60         n   -= 32; |  | 
| 61     } |  | 
| 62     switch (n / 8) { |  | 
| 63         case 3: vst1q_u16(dst, v8); dst += 8; |  | 
| 64         case 2: vst1q_u16(dst, v8); dst += 8; |  | 
| 65         case 1: vst1q_u16(dst, v8); dst += 8; |  | 
| 66     } |  | 
| 67     if (n & 4) { |  | 
| 68         vst1_u16(dst, vget_low_u16(v8)); |  | 
| 69         dst += 4; |  | 
| 70     } |  | 
| 71     switch (n & 3) { |  | 
| 72         case 3: *dst++ = value; |  | 
| 73         case 2: *dst++ = value; |  | 
| 74         case 1: *dst   = value; |  | 
| 75     } |  | 
| 76 } |  | 
| 77 |  | 
| 78 static void memset32(uint32_t* dst, uint32_t value, int n) { |  | 
| 79     uint32x4_t   v4  = vdupq_n_u32(value); |  | 
| 80     uint32x4x4_t v16 = {{ v4, v4, v4, v4 }}; |  | 
| 81 |  | 
| 82     while (n >= 16) { |  | 
| 83         vst4q_u32(dst, v16);  // This swizzles, but we don't care: all lanes are
      the same, value. |  | 
| 84         dst += 16; |  | 
| 85         n   -= 16; |  | 
| 86     } |  | 
| 87     switch (n / 4) { |  | 
| 88         case 3: vst1q_u32(dst, v4); dst += 4; |  | 
| 89         case 2: vst1q_u32(dst, v4); dst += 4; |  | 
| 90         case 1: vst1q_u32(dst, v4); dst += 4; |  | 
| 91     } |  | 
| 92     if (n & 2) { |  | 
| 93         vst1_u32(dst, vget_low_u32(v4)); |  | 
| 94         dst += 2; |  | 
| 95     } |  | 
| 96     if (n & 1) { |  | 
| 97         *dst = value; |  | 
| 98     } |  | 
| 99 } |  | 
| 100 |  | 
| 101 #else // Neither NEON nor SSE2. |  | 
| 102 |  | 
| 103 static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *ds
     t++ = val; } } |  | 
| 104 static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *ds
     t++ = val; } } |  | 
| 105 |  | 
| 106 #endif |  | 
| 107 |  | 
| 108 }  // namespace SK_OPTS_NS |  | 
| 109 |  | 
| 110 #endif//SkUtils_opts_DEFINED |  | 
| OLD | NEW | 
|---|