Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 107 OS::ProtectCode(buffer, actual_size); | 107 OS::ProtectCode(buffer, actual_size); |
| 108 | 108 |
| 109 #if !defined(USE_SIMULATOR) | 109 #if !defined(USE_SIMULATOR) |
| 110 return FUNCTION_CAST<UnaryMathFunction>(buffer); | 110 return FUNCTION_CAST<UnaryMathFunction>(buffer); |
| 111 #else | 111 #else |
| 112 fast_exp_arm_machine_code = buffer; | 112 fast_exp_arm_machine_code = buffer; |
| 113 return &fast_exp_simulator; | 113 return &fast_exp_simulator; |
| 114 #endif | 114 #endif |
| 115 } | 115 } |
| 116 | 116 |
| 117 static void MemCopyWrapper(void* dest, const void* src, size_t size) { | |
| 118 memcpy(dest, src, size); | |
| 119 } | |
| 120 | |
| 121 // Based on Bionic's memcpy.s | |
| 122 OS::MemCopyFunction CreateMemCopyFunction() { | |
| 123 size_t actual_size; | |
| 124 static const int kCacheLineSize = 64; | |
| 125 static const int kPrefetchDistance = kCacheLineSize * 4; | |
| 126 // Allocate buffer in executable space. | |
| 127 byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, | |
| 128 &actual_size, | |
| 129 true)); | |
| 130 if (buffer == NULL) return &MemCopyWrapper; | |
| 131 if (!CpuFeatures::IsSupported(NEON)) return &MemCopyWrapper; | |
| 132 | |
| 133 MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); | |
| 134 | |
| 135 CpuFeatureScope use_neon(&masm, NEON); | |
| 136 Label less16, aligned16, aligned8, skip_copy8, skip_copy4, | |
| 137 fix_remainder, main_loop, has32, less32; | |
|
danno
2013/03/27 08:40:07
coding style: 4-char indent on line continuation.
Nike
2013/03/29 08:48:52
Done.
| |
| 138 | |
| 139 // ----------- S t a t e ------------- | |
| 140 // -- r0 : dest | |
| 141 // -- r1 : src | |
| 142 // -- r2 : count | |
| 143 // ----------------------------------- | |
| 144 | |
| 145 __ push(lr); | |
| 146 | |
| 147 // Start preloading as early as possible | |
| 148 __ pld(r1, kCacheLineSize * 0); | |
| 149 __ pld(r1, kCacheLineSize * 1); | |
| 150 | |
| 151 // Do we have at least 16-bytes to copy (needed for alignment below) | |
| 152 __ cmp(r2, Operand(16)); | |
| 153 __ b(&less16, lo); | |
|
danno
2013/03/27 08:40:07
Note that the per-OS generated mem copy doesn't ne
Nike
2013/03/29 08:48:52
Done.
| |
| 154 | |
| 155 // Align destination to half cache-line for the write-buffer | |
| 156 __ rsb(r3, r0, Operand(0)); | |
| 157 __ and_(r3, r3, Operand(0xf), SetCC); | |
| 158 __ b(&aligned16, eq); | |
| 159 | |
| 160 // Copy up to 15-bytes (count in r3) | |
| 161 __ sub(r2, r2, r3); | |
| 162 __ mov(ip, Operand(r3, LSL, 31), SetCC); | |
| 163 __ ldrb(lr, MemOperand(r1, 1, PostIndex), mi); | |
| 164 __ strb(lr, MemOperand(r0, 1, PostIndex), mi); | |
| 165 __ ldrb(ip, MemOperand(r1, 1, PostIndex), cs); | |
| 166 __ ldrb(lr, MemOperand(r1, 1, PostIndex), cs); | |
| 167 __ strb(ip, MemOperand(r0, 1, PostIndex), cs); | |
| 168 __ strb(lr, MemOperand(r0, 1, PostIndex), cs); | |
| 169 __ mov(ip, Operand(r3, LSL, 29), SetCC); | |
| 170 __ b(&aligned8, ge); | |
| 171 // Copies 4 bytes, destination 32-bits aligned | |
| 172 __ vld4(8, r1, d0, element_0, Writeback); | |
| 173 __ vst4(8, r0, d0, element_0, Writeback, 32 / 8); | |
| 174 __ bind(&aligned8); | |
| 175 __ b(&aligned16, cc); | |
| 176 // Copies 8 bytes, destination 64-bits aligned | |
| 177 __ vld1(8, r1, d0, d0, Writeback); | |
| 178 __ vst1(8, r0, d0, d0, Writeback, 64 / 8); | |
| 179 | |
| 180 __ bind(&aligned16); | |
| 181 // Preload immediately the next cache line, which we may need | |
| 182 __ pld(r1, kCacheLineSize * 0); | |
| 183 __ pld(r1, kCacheLineSize * 1); | |
| 184 | |
| 185 // Make sure we have at least 64 bytes to copy | |
| 186 __ sub(r2, r2, Operand(64), SetCC); | |
| 187 __ b(&fix_remainder, lo); | |
| 188 | |
| 189 // Preload all the cache lines we need. | |
| 190 // NOTE: the number of pld below depends on PREFETCH_DISTANCE, | |
| 191 // ideally would would increase the distance in the main loop to | |
| 192 // avoid the goofy code below. In practice this doesn't seem to make | |
| 193 // a big difference. | |
| 194 __ pld(r1, kCacheLineSize * 2); | |
| 195 __ pld(r1, kCacheLineSize * 3); | |
| 196 __ pld(r1, kPrefetchDistance); | |
| 197 | |
| 198 // The main loop copies 64 bytes at a time | |
| 199 __ bind(&main_loop); | |
| 200 __ vld1(8, r1, d0, d3, Writeback); | |
| 201 __ vld1(8, r1, d4, d7, Writeback); | |
| 202 __ pld(r1, kPrefetchDistance); | |
| 203 __ sub(r2, r2, Operand(64), SetCC); | |
| 204 __ vst1(8, r0, d0, d3, Writeback, 128 / 8); | |
| 205 __ vst1(8, r0, d4, d7, Writeback, 128 / 8); | |
| 206 __ b(&main_loop, hs); | |
| 207 | |
| 208 // Fix-up the remaining count and make sure we have >= 32 bytes left | |
| 209 __ bind(&fix_remainder); | |
| 210 __ add(r2, r2, Operand(64)); | |
| 211 __ sub(r2, r2, Operand(32), SetCC); | |
| 212 __ b(&less32, lo); | |
| 213 | |
| 214 // 32 bytes at a time. These cache lines were already preloaded | |
| 215 __ bind(&has32); | |
| 216 __ vld1(8, r1, d0, d3, Writeback); | |
| 217 __ sub(r2, r2, Operand(32), SetCC); | |
| 218 __ vst1(8, r0, d0, d3, Writeback, 128 / 8); | |
| 219 __ b(&has32, hs); | |
| 220 | |
| 221 // Less than 32 left | |
| 222 __ bind(&less32); | |
| 223 __ add(r2, r2, Operand(32)); | |
| 224 __ tst(r2, Operand(0x10)); | |
| 225 __ b(&less16, eq); | |
| 226 // Copies 16 bytes, 128-bits aligned | |
| 227 __ vld1(8, r1, d0, d1, Writeback); | |
| 228 __ vst1(8, r0, d0, d1, Writeback, 128 / 8); | |
| 229 | |
| 230 // copy up to 15-bytes (count in r2) | |
| 231 __ bind(&less16); | |
| 232 __ mov(ip, Operand(r2, LSL, 29), SetCC); | |
| 233 __ b(&skip_copy8, cc); | |
| 234 __ vld1(8, r1, d0, d0, Writeback); | |
| 235 __ vst1(8, r0, d0, d0, Writeback); | |
| 236 __ bind(&skip_copy8); | |
| 237 __ b(&skip_copy4, ge); | |
| 238 __ vld4(8, r1, d0, element_0, Writeback); | |
| 239 __ vst4(8, r0, d0, element_0, Writeback); | |
| 240 __ bind(&skip_copy4); | |
| 241 __ mov(ip, Operand(r2, LSL, 31), SetCC); | |
| 242 __ ldrb(r3, MemOperand(r1, 1, PostIndex), mi); | |
| 243 __ ldrb(ip, MemOperand(r1, 1, PostIndex), cs); | |
| 244 __ ldrb(lr, MemOperand(r1, 1, PostIndex), cs); | |
| 245 __ strb(r3, MemOperand(r0, 1, PostIndex), mi); | |
| 246 __ strb(ip, MemOperand(r0, 1, PostIndex), cs); | |
| 247 __ strb(lr, MemOperand(r0, 1, PostIndex), cs); | |
| 248 | |
| 249 __ pop(lr); | |
| 250 __ bx(lr); | |
| 251 | |
| 252 CodeDesc desc; | |
| 253 masm.GetCode(&desc); | |
| 254 ASSERT(!RelocInfo::RequiresRelocation(desc)); | |
| 255 | |
| 256 CPU::FlushICache(buffer, actual_size); | |
| 257 OS::ProtectCode(buffer, actual_size); | |
| 258 return FUNCTION_CAST<OS::MemCopyFunction>(buffer); | |
| 259 } | |
| 117 | 260 |
| 118 #undef __ | 261 #undef __ |
| 119 | 262 |
| 120 | |
|
danno
2013/03/27 08:40:07
Please remove this extraneous whitespace change.
| |
| 121 UnaryMathFunction CreateSqrtFunction() { | 263 UnaryMathFunction CreateSqrtFunction() { |
| 122 return &sqrt; | 264 return &sqrt; |
| 123 } | 265 } |
| 124 | 266 |
| 125 // ------------------------------------------------------------------------- | 267 // ------------------------------------------------------------------------- |
| 126 // Platform-specific RuntimeCallHelper functions. | 268 // Platform-specific RuntimeCallHelper functions. |
| 127 | 269 |
| 128 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { | 270 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { |
| 129 masm->EnterFrame(StackFrame::INTERNAL); | 271 masm->EnterFrame(StackFrame::INTERNAL); |
| 130 ASSERT(!masm->has_frame()); | 272 ASSERT(!masm->has_frame()); |
| (...skipping 568 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 699 patcher.masm()->add(r0, pc, Operand(-8)); | 841 patcher.masm()->add(r0, pc, Operand(-8)); |
| 700 patcher.masm()->ldr(pc, MemOperand(pc, -4)); | 842 patcher.masm()->ldr(pc, MemOperand(pc, -4)); |
| 701 patcher.masm()->dd(reinterpret_cast<uint32_t>(stub->instruction_start())); | 843 patcher.masm()->dd(reinterpret_cast<uint32_t>(stub->instruction_start())); |
| 702 } | 844 } |
| 703 } | 845 } |
| 704 | 846 |
| 705 | 847 |
| 706 } } // namespace v8::internal | 848 } } // namespace v8::internal |
| 707 | 849 |
| 708 #endif // V8_TARGET_ARCH_ARM | 850 #endif // V8_TARGET_ARCH_ARM |
| OLD | NEW |