src/compiler/memory-optimizer.cc - Issue 1963583004: [turbofan] Initial version of allocation folding and write barrier elimination.

Unified Diff: src/compiler/memory-optimizer.cc

Issue 1963583004: [turbofan] Initial version of allocation folding and write barrier elimination. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/compiler/memory-optimizer.cc

diff --git a/src/compiler/memory-optimizer.cc b/src/compiler/memory-optimizer.cc

new file mode 100644

index 0000000000000000000000000000000000000000..fb1d0ab6735fcbebfb06cf7110f9008a91e37f4c

--- /dev/null

+++ b/src/compiler/memory-optimizer.cc

@@ -0,0 +1,494 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "src/compiler/memory-optimizer.h"

+#include "src/compiler/js-graph.h"

+#include "src/compiler/linkage.h"

+#include "src/compiler/node-matchers.h"

+#include "src/compiler/node-properties.h"

+#include "src/compiler/node.h"

+#include "src/compiler/simplified-operator.h"

+namespace v8 {

+namespace internal {

+namespace compiler {

+MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)

+ : jsgraph_(jsgraph),

+ empty_state_(new (zone) AllocationState()),

+ pending_(zone),

+ tokens_(zone),

+ zone_(zone) {}

+void MemoryOptimizer::Optimize() {

+ EnqueueUses(graph()->start(), empty_state());

+ while (!tokens_.empty()) {

+ Token const token = tokens_.front();

+ tokens_.pop();

+ VisitNode(token.node, token.state);

+ }

+ DCHECK(pending_.empty());

+ DCHECK(tokens_.empty());

+MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,

+ PretenureFlag pretenure,

+ Zone* zone)

+ : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {

+ node_ids_.insert(node->id());

+MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,

+ PretenureFlag pretenure,

+ Node* size, Zone* zone)

+ : node_ids_(zone), pretenure_(pretenure), size_(size) {

+ node_ids_.insert(node->id());

+void MemoryOptimizer::AllocationGroup::Add(Node* node) {

+ node_ids_.insert(node->id());

+bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {

+ return node_ids_.find(node->id()) != node_ids_.end();

+MemoryOptimizer::AllocationState::AllocationState()

+ : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

+MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)

+ : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

+MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,

+ int size, Node* top)

+ : group_(group), size_(size), top_(top) {}

+bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {

+ return group() && group()->IsNewSpaceAllocation();

+void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {

+ DCHECK(!node->IsDead());

+ DCHECK_LT(0, node->op()->EffectInputCount());

+ switch (node->opcode()) {

+ case IrOpcode::kAllocate:

+ return VisitAllocate(node, state);

+ case IrOpcode::kCall:

+ return VisitCall(node, state);

+ case IrOpcode::kLoadElement:

+ return VisitLoadElement(node, state);

+ case IrOpcode::kLoadField:

+ return VisitLoadField(node, state);

+ case IrOpcode::kStoreElement:

+ return VisitStoreElement(node, state);

+ case IrOpcode::kStoreField:

+ return VisitStoreField(node, state);

+ case IrOpcode::kCheckedLoad:

+ case IrOpcode::kCheckedStore:

+ case IrOpcode::kIfException:

+ case IrOpcode::kLoad:

+ case IrOpcode::kStore:

+ return VisitOtherEffect(node, state);

+ default:

+ break;

+ }

+ DCHECK_EQ(0, node->op()->EffectOutputCount());

+void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kAllocate, node->opcode());

+ Node* value;

+ Node* size = node->InputAt(0);

+ Node* effect = node->InputAt(1);

+ Node* control = node->InputAt(2);

+ PretenureFlag pretenure = OpParameter<PretenureFlag>(node->op());

+ // Determine the top/limit addresses.

+ Node* top_address = jsgraph()->ExternalConstant(

+ pretenure == NOT_TENURED

+ ? ExternalReference::new_space_allocation_top_address(isolate())

+ : ExternalReference::old_space_allocation_top_address(isolate()));

+ Node* limit_address = jsgraph()->ExternalConstant(

+ pretenure == NOT_TENURED

+ ? ExternalReference::new_space_allocation_limit_address(isolate())

+ : ExternalReference::old_space_allocation_limit_address(isolate()));

+ // Check if we can fold this allocation into a previous allocation represented

+ // by the incoming {state}.

+ Int32Matcher m(size);

+ if (m.HasValue() && m.Value() < Page::kMaxRegularHeapObjectSize) {

+ int32_t const object_size = m.Value();

+ if (state->size() <= Page::kMaxRegularHeapObjectSize - object_size &&

+ state->group()->pretenure() == pretenure) {

+ // We can fold this Allocate {node} into the allocation {group}

+ // represented by the given {state}. Compute the upper bound for

+ // the new {state}.

+ int32_t const state_size = state->size() + object_size;

Hannes Payer (out of office) 2016/05/10 07:49:33 Hoist this computation and use state_size already

Benedikt Meurer 2016/05/10 08:01:45 Cannot do because state->size() + object_size can

+ // Update the reservation check to the actual maximum upper bound.

+ AllocationGroup* const group = state->group();

+ if (OpParameter<int32_t>(group->size()) < state_size) {

+ NodeProperties::ChangeOp(group->size(),

+ common()->Int32Constant(state_size));

+ }

+ // Update the allocation top with the new object allocation.

+ // TODO(bmeurer): Defer writing back top as much as possible.

+ Node* top = graph()->NewNode(machine()->IntAdd(), state->top(),

+ jsgraph()->IntPtrConstant(object_size));

+ effect = graph()->NewNode(

+ machine()->Store(StoreRepresentation(

+ MachineType::PointerRepresentation(), kNoWriteBarrier)),

+ top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

+ // Compute the effective inner allocated address.

+ value = graph()->NewNode(

+ machine()->BitcastWordToTagged(),

+ graph()->NewNode(machine()->IntAdd(), state->top(),

+ jsgraph()->IntPtrConstant(kHeapObjectTag)));

+ // Extend the allocation {group}.

+ group->Add(value);

+ state = new (zone()) AllocationState(group, state_size, top);

+ } else {

+ // Setup a mutable reservation size node; will be patched as we fold

+ // additional allocations into this new group.

+ Node* size = graph()->NewNode(common()->Int32Constant(object_size));

+ // Load allocation top and limit.

+ Node* top = effect =

+ graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,

+ jsgraph()->IntPtrConstant(0), effect, control);

+ Node* limit = effect = graph()->NewNode(

+ machine()->Load(MachineType::Pointer()), limit_address,

+ jsgraph()->IntPtrConstant(0), effect, control);

+ // Check if we need to collect garbage before we can start bump pointer

+ // allocation (always done for folded allocations).

+ Node* check = graph()->NewNode(

+ machine()->UintLessThan(),

+ graph()->NewNode(

+ machine()->IntAdd(), top,

+ machine()->Is64()

+ ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)

+ : size),

+ limit);

+ Node* branch =

+ graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

+ Node* if_true = graph()->NewNode(common()->IfTrue(), branch);

+ Node* etrue = effect;

+ Node* vtrue = top;

+ Node* if_false = graph()->NewNode(common()->IfFalse(), branch);

+ Node* efalse = effect;

+ Node* vfalse;

+ {

+ Node* target = pretenure == NOT_TENURED

+ ? jsgraph()->AllocateInNewSpaceStubConstant()

+ : jsgraph()->AllocateInOldSpaceStubConstant();

+ if (!allocate_operator_.is_set()) {

+ CallDescriptor* descriptor =

+ Linkage::GetAllocateCallDescriptor(graph()->zone());

+ allocate_operator_.set(common()->Call(descriptor));

+ }

+ vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target,

+ size, efalse, if_false);

+ vfalse = graph()->NewNode(machine()->IntSub(), vfalse,

+ jsgraph()->IntPtrConstant(kHeapObjectTag));

+ }

+ control = graph()->NewNode(common()->Merge(2), if_true, if_false);

+ effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);

+ value = graph()->NewNode(

+ common()->Phi(MachineType::PointerRepresentation(), 2), vtrue, vfalse,

+ control);

+ // Compute the new top and write it back.

+ top = graph()->NewNode(machine()->IntAdd(), value,

+ jsgraph()->IntPtrConstant(object_size));

+ effect = graph()->NewNode(

+ machine()->Store(StoreRepresentation(

+ MachineType::PointerRepresentation(), kNoWriteBarrier)),

+ top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

+ // Compute the initial object address.

+ value = graph()->NewNode(

+ machine()->BitcastWordToTagged(),

+ graph()->NewNode(machine()->IntAdd(), value,

+ jsgraph()->IntPtrConstant(kHeapObjectTag)));

+ // Start a new allocation group.

+ AllocationGroup* group =

+ new (zone()) AllocationGroup(value, pretenure, size, zone());

+ state = new (zone()) AllocationState(group, object_size, top);

+ }

+ } else {

+ // Load allocation top and limit.

Hannes Payer (out of office) 2016/05/10 07:49:33 This code is similar to the one in the else case a

Benedikt Meurer 2016/05/10 08:01:45 It's just almost the same, but not 100%. For now I

+ Node* top = effect =

+ graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,

+ jsgraph()->IntPtrConstant(0), effect, control);

+ Node* limit = effect =

+ graph()->NewNode(machine()->Load(MachineType::Pointer()), limit_address,

+ jsgraph()->IntPtrConstant(0), effect, control);

+ // Compute the new top.

+ Node* new_top = graph()->NewNode(

+ machine()->IntAdd(), top,

+ machine()->Is64()

+ ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)

+ : size);

+ // Check if we can do bump pointer allocation here.

+ Node* check = graph()->NewNode(machine()->UintLessThan(), new_top, limit);

+ Node* branch =

+ graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

+ Node* if_true = graph()->NewNode(common()->IfTrue(), branch);

+ Node* etrue = effect;

+ Node* vtrue;

+ {

+ etrue = graph()->NewNode(

+ machine()->Store(StoreRepresentation(

+ MachineType::PointerRepresentation(), kNoWriteBarrier)),

+ top_address, jsgraph()->IntPtrConstant(0), new_top, etrue, if_true);

+ vtrue = graph()->NewNode(

+ machine()->BitcastWordToTagged(),

+ graph()->NewNode(machine()->IntAdd(), top,

+ jsgraph()->IntPtrConstant(kHeapObjectTag)));

+ }

+ Node* if_false = graph()->NewNode(common()->IfFalse(), branch);

+ Node* efalse = effect;

+ Node* vfalse;

+ {

+ Node* target = pretenure == NOT_TENURED

+ ? jsgraph()->AllocateInNewSpaceStubConstant()

+ : jsgraph()->AllocateInOldSpaceStubConstant();

+ if (!allocate_operator_.is_set()) {

+ CallDescriptor* descriptor =

+ Linkage::GetAllocateCallDescriptor(graph()->zone());

+ allocate_operator_.set(common()->Call(descriptor));

+ }

+ vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target, size,

+ efalse, if_false);

+ }

+ control = graph()->NewNode(common()->Merge(2), if_true, if_false);

+ effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);

+ value = graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, 2),

+ vtrue, vfalse, control);

+ // Create an unfoldable allocation group.

+ AllocationGroup* group =

+ new (zone()) AllocationGroup(value, pretenure, zone());

+ state = new (zone()) AllocationState(group);

+ }

+ // Replace all effect uses of {node} with the {effect}, enqueue the

+ // effect uses for further processing, and replace all value uses of

+ // {node} with the {value}.

+ for (Edge edge : node->use_edges()) {

+ if (NodeProperties::IsEffectEdge(edge)) {

+ EnqueueUse(edge.from(), edge.index(), state);

+ edge.UpdateTo(effect);

+ } else {

+ DCHECK(NodeProperties::IsValueEdge(edge));

+ edge.UpdateTo(value);

+ }

+ // Kill the {node} to make sure we don't leave dangling dead uses.

+ node->Kill();

+void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kCall, node->opcode());

+ // If the call can allocate, we start with a fresh state.

+ if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {

+ state = empty_state();

+ }

+ EnqueueUses(node, state);

+void MemoryOptimizer::VisitLoadElement(Node* node,

+ AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());

+ ElementAccess const& access = ElementAccessOf(node->op());

+ Node* index = node->InputAt(1);

+ node->ReplaceInput(1, ComputeIndex(access, index));

+ NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));

+ EnqueueUses(node, state);

+void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kLoadField, node->opcode());

+ FieldAccess const& access = FieldAccessOf(node->op());

+ Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());

+ node->InsertInput(graph()->zone(), 1, offset);

+ NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));

+ EnqueueUses(node, state);

+void MemoryOptimizer::VisitStoreElement(Node* node,

+ AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());

+ ElementAccess const& access = ElementAccessOf(node->op());

+ Node* object = node->InputAt(0);

+ Node* index = node->InputAt(1);

+ WriteBarrierKind write_barrier_kind =

+ ComputeWriteBarrierKind(object, state, access.write_barrier_kind);

+ node->ReplaceInput(1, ComputeIndex(access, index));

+ NodeProperties::ChangeOp(

+ node, machine()->Store(StoreRepresentation(

+ access.machine_type.representation(), write_barrier_kind)));

+ EnqueueUses(node, state);

+void MemoryOptimizer::VisitStoreField(Node* node,

+ AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kStoreField, node->opcode());

+ FieldAccess const& access = FieldAccessOf(node->op());

+ Node* object = node->InputAt(0);

+ WriteBarrierKind write_barrier_kind =

+ ComputeWriteBarrierKind(object, state, access.write_barrier_kind);

+ Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());

+ node->InsertInput(graph()->zone(), 1, offset);

+ NodeProperties::ChangeOp(

+ node, machine()->Store(StoreRepresentation(

+ access.machine_type.representation(), write_barrier_kind)));

+ EnqueueUses(node, state);

+void MemoryOptimizer::VisitOtherEffect(Node* node,

+ AllocationState const* state) {

+ EnqueueUses(node, state);

+Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {

+ Node* index = key;

+ int element_size_shift =

+ ElementSizeLog2Of(access.machine_type.representation());

+ if (element_size_shift) {

+ index = graph()->NewNode(machine()->Word32Shl(), index,

+ jsgraph()->Int32Constant(element_size_shift));

+ }

+ const int fixed_offset = access.header_size - access.tag();

+ if (fixed_offset) {

+ index = graph()->NewNode(machine()->Int32Add(), index,

+ jsgraph()->Int32Constant(fixed_offset));

+ }

+ if (machine()->Is64()) {

+ // TODO(turbofan): This is probably only correct for typed arrays, and only

+ // if the typed arrays are at most 2GiB in size, which happens to match

+ // exactly our current situation.

+ index = graph()->NewNode(machine()->ChangeUint32ToUint64(), index);

+ }

+ return index;

+WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(

+ Node* object, AllocationState const* state,

+ WriteBarrierKind write_barrier_kind) {

+ if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {

+ write_barrier_kind = kNoWriteBarrier;

+ }

+ return write_barrier_kind;

+MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(

+ AllocationStates const& states) {

+ // Check if all states are the same; or at least if all allocation

+ // states belong to the same allocation group.

+ AllocationState const* state = states.front();

+ AllocationGroup* group = state->group();

+ for (size_t i = 1; i < states.size(); ++i) {

+ if (states[i] != state) state = nullptr;

+ if (states[i]->group() != group) group = nullptr;

+ }

+ if (state == nullptr) {

+ if (group != nullptr) {

+ // We cannot fold any more allocations into this group, but we can still

+ // eliminate write barriers on stores to this group.

+ // TODO(bmeurer): We could potentially just create a Phi here to merge

+ // the various tops; but we need to pay special attention not to create

+ // an unschedulable graph.

+ state = new (zone()) AllocationState(group);

+ } else {

+ // The states are from different allocation groups.

+ state = empty_state();

+ }

+ return state;

+void MemoryOptimizer::EnqueueMerge(Node* node, int index,

+ AllocationState const* state) {

+ DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());

+ int const input_count = node->InputCount() - 1;

+ DCHECK_LT(0, input_count);

+ Node* const control = node->InputAt(input_count);

+ if (control->opcode() == IrOpcode::kLoop) {

+ // For loops we always start with an empty state at the beginning.

+ if (index == 0) EnqueueUses(node, empty_state());

+ } else {

+ DCHECK_EQ(IrOpcode::kMerge, control->opcode());

+ // Check if we already know about this pending merge.

+ NodeId const id = node->id();

+ auto it = pending_.find(id);

+ if (it == pending_.end()) {

+ // Insert a new pending merge.

+ it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;

+ }

+ // Add the next input state.

+ it->second.push_back(state);

+ // Check if states for all inputs are available by now.

+ if (it->second.size() == static_cast<size_t>(input_count)) {

+ // All inputs to this effect merge are done, merge the states given all

+ // input constraints, drop the pending merge and enqueue uses of the

+ // EffectPhi {node}.

+ state = MergeStates(it->second);

+ EnqueueUses(node, state);

+ pending_.erase(it);

+ }

+void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {

+ for (Edge const edge : node->use_edges()) {

+ if (NodeProperties::IsEffectEdge(edge)) {

+ EnqueueUse(edge.from(), edge.index(), state);

+ }

+void MemoryOptimizer::EnqueueUse(Node* node, int index,

+ AllocationState const* state) {

+ if (node->opcode() == IrOpcode::kEffectPhi) {

+ // An EffectPhi represents a merge of different effect chains, which

+ // needs special handling depending on whether the merge is part of a

+ // loop or just a normal control join.

+ EnqueueMerge(node, index, state);

+ } else {

+ Token token = {node, state};

+ tokens_.push(token);

+ }

+Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }

+Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }

+CommonOperatorBuilder* MemoryOptimizer::common() const {

+ return jsgraph()->common();

+MachineOperatorBuilder* MemoryOptimizer::machine() const {

+ return jsgraph()->machine();

+} // namespace compiler

+} // namespace internal

+} // namespace v8

« src/compiler/memory-optimizer.h ('K') | « src/compiler/memory-optimizer.h ('k') | src/compiler/pipeline.cc » ('j') | src/heap/spaces.cc » ('J')