Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(861)

Side by Side Diff: src/core/SkHalf.h

Issue 1688233002: new version of SkHalfToFloat_01 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: swap cast order, neon on the brain Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/Float16Test.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkHalf_DEFINED 8 #ifndef SkHalf_DEFINED
9 #define SkHalf_DEFINED 9 #define SkHalf_DEFINED
10 10
(...skipping 19 matching lines...) Expand all
30 static inline uint64_t SkFloatToHalf_01(const Sk4f&); 30 static inline uint64_t SkFloatToHalf_01(const Sk4f&);
31 31
32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // 32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
33 33
34 // Like the serial versions in SkHalf.cpp, these are based on 34 // Like the serial versions in SkHalf.cpp, these are based on
35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ 35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
36 36
37 // TODO: NEON versions 37 // TODO: NEON versions
38 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { 38 static inline Sk4f SkHalfToFloat_01(uint64_t hs) {
39 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 39 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
40 // Load our 16-bit floats into the bottom 16 bits of each 32-bit lane, with zeroes on top. 40 // If our input is a normal 16-bit float, things are pretty easy:
41 // - shift left by 13 to put the mantissa in the right place;
42 // - the exponent is wrong, but it just needs to be rebiased;
43 // - re-bias the exponent from 15-bias to 127-bias by adding (127-15).
44
45 // If our input is denormalized, we're going to do the same steps, plus a fe w more fix ups:
46 // - the input is h = K*2^-14, for some 10-bit fixed point K in [0,1);
47 // - by shifting left 13 and adding (127-15) to the exponent, we construct ed the float value
48 // 2^-15*(1+K);
49 // - we'd need to subtract 2^-15 and multiply by 2 to get back to K*2^-14, or equivallently
50 // multiply by 2 then subtract 2^-14.
51 //
52 // - We'll work that multiply by 2 into the rebias, by adding 1 more to th e exponent.
53 // - Conveniently, this leaves that rebias constant 2^-14, exactly what we want to subtract.
54
41 __m128i h = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i*)&hs), _mm_set zero_si128()); 55 __m128i h = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i*)&hs), _mm_set zero_si128());
56 const __m128i is_denorm = _mm_cmplt_epi32(h, _mm_set1_epi32(1<<10));
42 57
43 // Fork into two paths, depending on whether the 16-bit float is denormalize d. 58 __m128i rebias = _mm_set1_epi32((127-15) << 23);
44 __m128 is_denorm = _mm_castsi128_ps(_mm_cmplt_epi32(h, _mm_set1_epi32(0x0400 ))); 59 rebias = _mm_add_epi32(rebias, _mm_and_si128(is_denorm, _mm_set1_epi32(1<<23 )));
45 60
46 // TODO: figure out, explain 61 __m128i f = _mm_add_epi32(_mm_slli_epi32(h, 13), rebias);
47 const __m128 half = _mm_set1_ps(0.5f); 62 return _mm_sub_ps(_mm_castsi128_ps(f),
48 __m128 denorm = _mm_sub_ps(_mm_or_ps(_mm_castsi128_ps(h), half), half); 63 _mm_castsi128_ps(_mm_and_si128(is_denorm, rebias)));
49
50 // If we're normalized, just shift ourselves so the exponent/mantissa dividi ng line
51 // is correct, then re-bias the exponent from 15 to 127.
52 __m128 norm = _mm_castsi128_ps(_mm_add_epi32(_mm_slli_epi32(h, 13),
53 _mm_set1_epi32((127-15) << 23)) );
54
55 return _mm_or_ps(_mm_and_ps (is_denorm, denorm),
56 _mm_andnot_ps(is_denorm, norm));
57 #else 64 #else
58 float fs[4]; 65 float fs[4];
59 for (int i = 0; i < 4; i++) { 66 for (int i = 0; i < 4; i++) {
60 fs[i] = SkHalfToFloat(hs >> (i*16)); 67 fs[i] = SkHalfToFloat(hs >> (i*16));
61 } 68 }
62 return Sk4f::Load(fs); 69 return Sk4f::Load(fs);
63 #endif 70 #endif
64 } 71 }
65 72
66 static inline uint64_t SkFloatToHalf_01(const Sk4f& fs) { 73 static inline uint64_t SkFloatToHalf_01(const Sk4f& fs) {
67 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 74 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
68 // Scale our floats down by a tiny power of 2 to pull up our mantissa bits, 75 // Scale our floats down by a tiny power of 2 to pull up our mantissa bits,
69 // then shift back down to 16-bit float layout. This doesn't round, so can be 1 bit small. 76 // then shift back down to 16-bit float layout. This doesn't round, so can be 1 bit small.
70 // TODO: understand better. Why this scale factor? 77 // TODO: understand better. Why this scale factor?
71 const __m128 scale = _mm_castsi128_ps(_mm_set1_epi32(15 << 23)); 78 const __m128 rebias = _mm_castsi128_ps(_mm_set1_epi32((127 - (127 - 15)) << 23));
72 __m128i h = _mm_srli_epi32(_mm_castps_si128(_mm_mul_ps(fs.fVec, scale)), 13) ; 79 __m128i h = _mm_srli_epi32(_mm_castps_si128(_mm_mul_ps(fs.fVec, rebias)), 13 );
73 80
74 uint64_t r; 81 uint64_t r;
75 _mm_storel_epi64((__m128i*)&r, _mm_packs_epi32(h,h)); 82 _mm_storel_epi64((__m128i*)&r, _mm_packs_epi32(h,h));
76 return r; 83 return r;
77 #else 84 #else
78 SkHalf hs[4]; 85 SkHalf hs[4];
79 for (int i = 0; i < 4; i++) { 86 for (int i = 0; i < 4; i++) {
80 hs[i] = SkFloatToHalf(fs[i]); 87 hs[i] = SkFloatToHalf(fs[i]);
81 } 88 }
82 return (uint64_t)hs[3] << 48 89 return (uint64_t)hs[3] << 48
83 | (uint64_t)hs[2] << 32 90 | (uint64_t)hs[2] << 32
84 | (uint64_t)hs[1] << 16 91 | (uint64_t)hs[1] << 16
85 | (uint64_t)hs[0] << 0; 92 | (uint64_t)hs[0] << 0;
86 #endif 93 #endif
87 } 94 }
88 95
89 #endif 96 #endif
OLDNEW
« no previous file with comments | « no previous file | tests/Float16Test.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698