Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(738)

Unified Diff: src/opts/SkUtils_opts_arm_neon.cpp

Issue 1075003002: Replace NEON assembly memset16 and memset32 with intrinsic versions. (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/opts/SkUtils_opts_arm.cpp ('k') | src/opts/memset16_neon.S » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkUtils_opts_arm_neon.cpp
diff --git a/src/opts/SkUtils_opts_arm_neon.cpp b/src/opts/SkUtils_opts_arm_neon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7d05046178bfb2d77157d91cfb69adab0235a8d
--- /dev/null
+++ b/src/opts/SkUtils_opts_arm_neon.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkTypes.h"
+#include <arm_neon.h>
+
+void sk_memset32_neon(uint32_t dst[], uint32_t value, int count) {
+ uint32x4_t v4 = vdupq_n_u32(value);
+ uint32x4x4_t v16 = { v4, v4, v4, v4 };
msarett 2015/04/10 12:23:31 Does this operation involve copying the contents o
mtklein 2015/04/10 12:45:55 Yes.
msarett 2015/04/10 12:55:10 Cool good to know!
+
+ while (count >= 16) {
+ vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
msarett 2015/04/10 12:23:31 I just read a little more about ARM deinterleaving
mtklein 2015/04/10 12:45:55 Yep. You'll find them used quite a bit under src/
msarett 2015/04/10 12:55:10 That makes sense. +1 for sticking with intrinsics
+ dst += 16;
+ count -= 16;
+ }
+ SkASSERT(count < 16);
+ switch (count / 4) {
+ case 3: vst1q_u32(dst, v4); dst += 4; count -= 4;
+ case 2: vst1q_u32(dst, v4); dst += 4; count -= 4;
+ case 1: vst1q_u32(dst, v4); dst += 4; count -= 4;
+ }
+ SkASSERT(count < 4);
+ if (count >= 2) {
+ vst1_u32(dst, vget_low_u32(v4));
msarett 2015/04/10 12:23:31 vget_low_u32 is a free call, right?
mtklein 2015/04/10 12:45:56 Yep. It just translates from whatever q-register
+ dst += 2;
+ count -= 2;
+ }
+ SkASSERT(count < 2);
+ if (count > 0) {
+ *dst = value;
+ }
+}
+
+void sk_memset16_neon(uint16_t dst[], uint16_t value, int count) {
+ uint16x8_t v8 = vdupq_n_u16(value);
+ uint16x8x4_t v32 = { v8, v8, v8, v8 };
+
+ while (count >= 32) {
+ vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
+ dst += 32;
+ count -= 32;
+ }
+ SkASSERT(count < 32);
+ switch (count / 8) {
+ case 3: vst1q_u16(dst, v8); dst += 8; count -= 8;
+ case 2: vst1q_u16(dst, v8); dst += 8; count -= 8;
+ case 1: vst1q_u16(dst, v8); dst += 8; count -= 8;
+ }
+ SkASSERT(count < 8);
+ if (count >= 4) {
+ vst1_u16(dst, vget_low_u16(v8));
+ dst += 4;
+ count -= 4;
+ }
+ SkASSERT(count < 4);
+ switch (count) {
+ case 3: *dst++ = value;
+ case 2: *dst++ = value;
+ case 1: *dst = value;
+ }
+}
+
« no previous file with comments | « src/opts/SkUtils_opts_arm.cpp ('k') | src/opts/memset16_neon.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698