Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(371)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1266673003: Subzero. Implements x86-64 lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: git pull && addresses comments && git format Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the TargetLoweringX8632 class, which 11 /// This file implements the TargetLoweringX8632 class, which
12 /// consists almost entirely of the lowering sequence for each 12 /// consists almost entirely of the lowering sequence for each
13 /// high-level instruction. 13 /// high-level instruction.
14 /// 14 ///
15 //===----------------------------------------------------------------------===// 15 //===----------------------------------------------------------------------===//
16 16
17 #include "IceTargetLoweringX8632.h" 17 #include "IceTargetLoweringX8632.h"
18 18
19 #include "IceTargetLoweringX8632Traits.h" 19 #include "IceTargetLoweringX8632Traits.h"
20 #include "IceTargetLoweringX86Base.h" 20 #include "IceTargetLoweringX86Base.h"
21 21
22 namespace Ice { 22 namespace Ice {
23 23
24 //------------------------------------------------------------------------------
25 // ______ ______ ______ __ ______ ______
26 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
27 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
28 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
29 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
30 //
31 //------------------------------------------------------------------------------
24 namespace X86Internal { 32 namespace X86Internal {
25 const MachineTraits<TargetX8632>::TableFcmpType 33 const MachineTraits<TargetX8632>::TableFcmpType
26 MachineTraits<TargetX8632>::TableFcmp[] = { 34 MachineTraits<TargetX8632>::TableFcmp[] = {
27 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ 35 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
28 { \ 36 { \
29 dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV, \ 37 dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV, \
30 X8632::Traits::Cond::pred \ 38 X8632::Traits::Cond::pred \
31 } \ 39 } \
32 , 40 ,
33 FCMPX8632_TABLE 41 FCMPX8632_TABLE
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 // Repeat the static asserts with respect to the high-level table 400 // Repeat the static asserts with respect to the high-level table
393 // entries in case the high-level table has extra entries. 401 // entries in case the high-level table has extra entries.
394 #define X(tag, sizeLog2, align, elts, elty, str) \ 402 #define X(tag, sizeLog2, align, elts, elty, str) \
395 static_assert(_table1_##tag == _table2_##tag, \ 403 static_assert(_table1_##tag == _table2_##tag, \
396 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
397 ICETYPE_TABLE 405 ICETYPE_TABLE
398 #undef X 406 #undef X
399 } // end of namespace dummy3 407 } // end of namespace dummy3
400 } // end of anonymous namespace 408 } // end of anonymous namespace
401 409
410 //------------------------------------------------------------------------------
411 // __ ______ __ __ ______ ______ __ __ __ ______
412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
416 //
417 //------------------------------------------------------------------------------
418 void TargetX8632::lowerCall(const InstCall *Instr) {
419 // x86-32 calling convention:
420 //
421 // * At the point before the call, the stack must be aligned to 16
422 // bytes.
423 //
424 // * The first four arguments of vector type, regardless of their
425 // position relative to the other arguments in the argument list, are
426 // placed in registers xmm0 - xmm3.
427 //
428 // * Other arguments are pushed onto the stack in right-to-left order,
429 // such that the left-most argument ends up on the top of the stack at
430 // the lowest memory address.
431 //
432 // * Stack arguments of vector type are aligned to start at the next
433 // highest multiple of 16 bytes. Other stack arguments are aligned to
434 // 4 bytes.
435 //
436 // This intends to match the section "IA-32 Function Calling
437 // Convention" of the document "OS X ABI Function Call Guide" by
438 // Apple.
439 NeedsStackAlignment = true;
440
441 typedef std::vector<Operand *> OperandList;
442 OperandList XmmArgs;
443 OperandList StackArgs, StackArgLocations;
444 uint32_t ParameterAreaSizeBytes = 0;
445
446 // Classify each argument operand according to the location where the
447 // argument is passed.
448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
449 Operand *Arg = Instr->getArg(i);
450 Type Ty = Arg->getType();
451 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
452 assert(typeWidthInBytes(Ty) >= 4);
453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
454 XmmArgs.push_back(Arg);
455 } else {
456 StackArgs.push_back(Arg);
457 if (isVectorType(Arg->getType())) {
458 ParameterAreaSizeBytes =
459 Traits::applyStackAlignment(ParameterAreaSizeBytes);
460 }
461 Variable *esp =
462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
464 StackArgLocations.push_back(
465 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
467 }
468 }
469
470 // Adjust the parameter area so that the stack is aligned. It is
471 // assumed that the stack is already aligned at the start of the
472 // calling sequence.
473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
474
475 // Subtract the appropriate amount for the argument area. This also
476 // takes care of setting the stack adjustment during emission.
477 //
478 // TODO: If for some reason the call instruction gets dead-code
479 // eliminated after lowering, we would need to ensure that the
480 // pre-call and the post-call esp adjustment get eliminated as well.
481 if (ParameterAreaSizeBytes) {
482 _adjust_stack(ParameterAreaSizeBytes);
483 }
484
485 // Copy arguments that are passed on the stack to the appropriate
486 // stack locations.
487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
489 }
490
491 // Copy arguments to be passed in registers to the appropriate
492 // registers.
493 // TODO: Investigate the impact of lowering arguments passed in
494 // registers after lowering stack arguments as opposed to the other
495 // way around. Lowering register arguments after stack arguments may
496 // reduce register pressure. On the other hand, lowering register
497 // arguments first (before stack arguments) may result in more compact
498 // code, as the memory operand displacements may end up being smaller
499 // before any stack adjustment is done.
500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
501 Variable *Reg =
502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
503 // Generate a FakeUse of register arguments so that they do not get
504 // dead code eliminated as a result of the FakeKill of scratch
505 // registers after the call.
506 Context.insert(InstFakeUse::create(Func, Reg));
507 }
508 // Generate the call instruction. Assign its result to a temporary
509 // with high register allocation weight.
510 Variable *Dest = Instr->getDest();
511 // ReturnReg doubles as ReturnRegLo as necessary.
512 Variable *ReturnReg = nullptr;
513 Variable *ReturnRegHi = nullptr;
514 if (Dest) {
515 switch (Dest->getType()) {
516 case IceType_NUM:
517 llvm_unreachable("Invalid Call dest type");
518 break;
519 case IceType_void:
520 break;
521 case IceType_i1:
522 case IceType_i8:
523 case IceType_i16:
524 case IceType_i32:
525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
526 break;
527 case IceType_i64:
528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
530 break;
531 case IceType_f32:
532 case IceType_f64:
533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
534 // the fstp instruction.
535 break;
536 case IceType_v4i1:
537 case IceType_v8i1:
538 case IceType_v16i1:
539 case IceType_v16i8:
540 case IceType_v8i16:
541 case IceType_v4i32:
542 case IceType_v4f32:
543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
544 break;
545 }
546 }
547 Operand *CallTarget = legalize(Instr->getCallTarget());
548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
549 if (NeedSandboxing) {
550 if (llvm::isa<Constant>(CallTarget)) {
551 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
552 } else {
553 Variable *CallTargetVar = nullptr;
554 _mov(CallTargetVar, CallTarget);
555 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
556 const SizeT BundleSize =
557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
559 CallTarget = CallTargetVar;
560 }
561 }
562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
563 Context.insert(NewCall);
564 if (NeedSandboxing)
565 _bundle_unlock();
566 if (ReturnRegHi)
567 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
568
569 // Add the appropriate offset to esp. The call instruction takes care
570 // of resetting the stack offset during emission.
571 if (ParameterAreaSizeBytes) {
572 Variable *esp =
573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
575 }
576
577 // Insert a register-kill pseudo instruction.
578 Context.insert(InstFakeKill::create(Func, NewCall));
579
580 // Generate a FakeUse to keep the call live if necessary.
581 if (Instr->hasSideEffects() && ReturnReg) {
582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
583 Context.insert(FakeUse);
584 }
585
586 if (!Dest)
587 return;
588
589 // Assign the result of the call to Dest.
590 if (ReturnReg) {
591 if (ReturnRegHi) {
592 assert(Dest->getType() == IceType_i64);
593 split64(Dest);
594 Variable *DestLo = Dest->getLo();
595 Variable *DestHi = Dest->getHi();
596 _mov(DestLo, ReturnReg);
597 _mov(DestHi, ReturnRegHi);
598 } else {
599 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
600 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
601 isVectorType(Dest->getType()));
602 if (isVectorType(Dest->getType())) {
603 _movp(Dest, ReturnReg);
604 } else {
605 _mov(Dest, ReturnReg);
606 }
607 }
608 } else if (isScalarFloatingType(Dest->getType())) {
609 // Special treatment for an FP function which returns its result in
610 // st(0).
611 // If Dest ends up being a physical xmm register, the fstp emit code
612 // will route st(0) through a temporary stack slot.
613 _fstp(Dest);
614 // Create a fake use of Dest in case it actually isn't used,
615 // because st(0) still needs to be popped.
616 Context.insert(InstFakeUse::create(Func, Dest));
617 }
618 }
619
402 } // end of namespace Ice 620 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698