src/compiler/memory-optimizer.cc - Issue 1963583004: [turbofan] Initial version of allocation folding and write barrier elimination.

Side by Side Diff: src/compiler/memory-optimizer.cc

Issue 1963583004: [turbofan] Initial version of allocation folding and write barrier elimination. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "src/compiler/memory-optimizer.h"

	6

	7 #include "src/compiler/js-graph.h"

	8 #include "src/compiler/linkage.h"

	9 #include "src/compiler/node-matchers.h"

	10 #include "src/compiler/node-properties.h"

	11 #include "src/compiler/node.h"

	12 #include "src/compiler/simplified-operator.h"

	13

	14 namespace v8 {

	15 namespace internal {

	16 namespace compiler {

	17

	18 MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)

	19 : jsgraph_(jsgraph),

	20 empty_state_(new (zone) AllocationState()),

	21 pending_(zone),

	22 tokens_(zone),

	23 zone_(zone) {}

	24

	25 void MemoryOptimizer::Optimize() {

	26 EnqueueUses(graph()->start(), empty_state());

	27 while (!tokens_.empty()) {

	28 Token const token = tokens_.front();

	29 tokens_.pop();

	30 VisitNode(token.node, token.state);

	31 }

	32 DCHECK(pending_.empty());

	33 DCHECK(tokens_.empty());

	34 }

	35

	36 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,

	37 PretenureFlag pretenure,

	38 Zone* zone)

	39 : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {

	40 node_ids_.insert(node->id());

	41 }

	42

	43 MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,

	44 PretenureFlag pretenure,

	45 Node* size, Zone* zone)

	46 : node_ids_(zone), pretenure_(pretenure), size_(size) {

	47 node_ids_.insert(node->id());

	48 }

	49

	50 void MemoryOptimizer::AllocationGroup::Add(Node* node) {

	51 node_ids_.insert(node->id());

	52 }

	53

	54 bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {

	55 return node_ids_.find(node->id()) != node_ids_.end();

	56 }

	57

	58 MemoryOptimizer::AllocationState::AllocationState()

	59 : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

	60

	61 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)

	62 : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

	63

	64 MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,

	65 int size, Node* top)

	66 : group_(group), size_(size), top_(top) {}

	67

	68 bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {

	69 return group() && group()->IsNewSpaceAllocation();

	70 }

	71

	72 void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {

	73 DCHECK(!node->IsDead());

	74 DCHECK_LT(0, node->op()->EffectInputCount());

	75 switch (node->opcode()) {

	76 case IrOpcode::kAllocate:

	77 return VisitAllocate(node, state);

	78 case IrOpcode::kCall:

	79 return VisitCall(node, state);

	80 case IrOpcode::kLoadElement:

	81 return VisitLoadElement(node, state);

	82 case IrOpcode::kLoadField:

	83 return VisitLoadField(node, state);

	84 case IrOpcode::kStoreElement:

	85 return VisitStoreElement(node, state);

	86 case IrOpcode::kStoreField:

	87 return VisitStoreField(node, state);

	88 case IrOpcode::kCheckedLoad:

	89 case IrOpcode::kCheckedStore:

	90 case IrOpcode::kIfException:

	91 case IrOpcode::kLoad:

	92 case IrOpcode::kStore:

	93 return VisitOtherEffect(node, state);

	94 default:

	95 break;

	96 }

	97 DCHECK_EQ(0, node->op()->EffectOutputCount());

	98 }

	99

	100 void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {

	101 DCHECK_EQ(IrOpcode::kAllocate, node->opcode());

	102 Node* value;

	103 Node* size = node->InputAt(0);

	104 Node* effect = node->InputAt(1);

	105 Node* control = node->InputAt(2);

	106 PretenureFlag pretenure = OpParameter<PretenureFlag>(node->op());

	107

	108 // Determine the top/limit addresses.

	109 Node* top_address = jsgraph()->ExternalConstant(

	110 pretenure == NOT_TENURED

	111 ? ExternalReference::new_space_allocation_top_address(isolate())

	112 : ExternalReference::old_space_allocation_top_address(isolate()));

	113 Node* limit_address = jsgraph()->ExternalConstant(

	114 pretenure == NOT_TENURED

	115 ? ExternalReference::new_space_allocation_limit_address(isolate())

	116 : ExternalReference::old_space_allocation_limit_address(isolate()));

	117

	118 // Check if we can fold this allocation into a previous allocation represented

	119 // by the incoming {state}.

	120 Int32Matcher m(size);

	121 if (m.HasValue() && m.Value() < Page::kMaxRegularHeapObjectSize) {

	122 int32_t const object_size = m.Value();

	123 if (state->size() <= Page::kMaxRegularHeapObjectSize - object_size &&

	124 state->group()->pretenure() == pretenure) {

	125 // We can fold this Allocate {node} into the allocation {group}

	126 // represented by the given {state}. Compute the upper bound for

	127 // the new {state}.

	128 int32_t const state_size = state->size() + object_size;
	Hannes Payer (out of office) 2016/05/10 07:49:33 Hoist this computation and use state_size already Hoist this computation and use state_size already in the if check above. Benedikt Meurer 2016/05/10 08:01:45 Cannot do because state->size() + object_size can Cannot do because state->size() + object_size can overflow.
	129

	130 // Update the reservation check to the actual maximum upper bound.

	131 AllocationGroup* const group = state->group();

	132 if (OpParameter<int32_t>(group->size()) < state_size) {

	133 NodeProperties::ChangeOp(group->size(),

	134 common()->Int32Constant(state_size));

	135 }

	136

	137 // Update the allocation top with the new object allocation.

	138 // TODO(bmeurer): Defer writing back top as much as possible.

	139 Node* top = graph()->NewNode(machine()->IntAdd(), state->top(),

	140 jsgraph()->IntPtrConstant(object_size));

	141 effect = graph()->NewNode(

	142 machine()->Store(StoreRepresentation(

	143 MachineType::PointerRepresentation(), kNoWriteBarrier)),

	144 top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

	145

	146 // Compute the effective inner allocated address.

	147 value = graph()->NewNode(

	148 machine()->BitcastWordToTagged(),

	149 graph()->NewNode(machine()->IntAdd(), state->top(),

	150 jsgraph()->IntPtrConstant(kHeapObjectTag)));

	151

	152 // Extend the allocation {group}.

	153 group->Add(value);

	154 state = new (zone()) AllocationState(group, state_size, top);

	155 } else {

	156 // Setup a mutable reservation size node; will be patched as we fold

	157 // additional allocations into this new group.

	158 Node* size = graph()->NewNode(common()->Int32Constant(object_size));

	159

	160 // Load allocation top and limit.

	161 Node* top = effect =

	162 graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,

	163 jsgraph()->IntPtrConstant(0), effect, control);

	164 Node* limit = effect = graph()->NewNode(

	165 machine()->Load(MachineType::Pointer()), limit_address,

	166 jsgraph()->IntPtrConstant(0), effect, control);

	167

	168 // Check if we need to collect garbage before we can start bump pointer

	169 // allocation (always done for folded allocations).

	170 Node* check = graph()->NewNode(

	171 machine()->UintLessThan(),

	172 graph()->NewNode(

	173 machine()->IntAdd(), top,

	174 machine()->Is64()

	175 ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)

	176 : size),

	177 limit);

	178 Node* branch =

	179 graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

	180

	181 Node* if_true = graph()->NewNode(common()->IfTrue(), branch);

	182 Node* etrue = effect;

	183 Node* vtrue = top;

	184

	185 Node* if_false = graph()->NewNode(common()->IfFalse(), branch);

	186 Node* efalse = effect;

	187 Node* vfalse;

	188 {

	189 Node* target = pretenure == NOT_TENURED

	190 ? jsgraph()->AllocateInNewSpaceStubConstant()

	191 : jsgraph()->AllocateInOldSpaceStubConstant();

	192 if (!allocate_operator_.is_set()) {

	193 CallDescriptor* descriptor =

	194 Linkage::GetAllocateCallDescriptor(graph()->zone());

	195 allocate_operator_.set(common()->Call(descriptor));

	196 }

	197 vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target,

	198 size, efalse, if_false);

	199 vfalse = graph()->NewNode(machine()->IntSub(), vfalse,

	200 jsgraph()->IntPtrConstant(kHeapObjectTag));

	201 }

	202

	203 control = graph()->NewNode(common()->Merge(2), if_true, if_false);

	204 effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);

	205 value = graph()->NewNode(

	206 common()->Phi(MachineType::PointerRepresentation(), 2), vtrue, vfalse,

	207 control);

	208

	209 // Compute the new top and write it back.

	210 top = graph()->NewNode(machine()->IntAdd(), value,

	211 jsgraph()->IntPtrConstant(object_size));

	212 effect = graph()->NewNode(

	213 machine()->Store(StoreRepresentation(

	214 MachineType::PointerRepresentation(), kNoWriteBarrier)),

	215 top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

	216

	217 // Compute the initial object address.

	218 value = graph()->NewNode(

	219 machine()->BitcastWordToTagged(),

	220 graph()->NewNode(machine()->IntAdd(), value,

	221 jsgraph()->IntPtrConstant(kHeapObjectTag)));

	222

	223 // Start a new allocation group.

	224 AllocationGroup* group =

	225 new (zone()) AllocationGroup(value, pretenure, size, zone());

	226 state = new (zone()) AllocationState(group, object_size, top);

	227 }

	228 } else {

	229 // Load allocation top and limit.
	Hannes Payer (out of office) 2016/05/10 07:49:33 This code is similar to the one in the else case a This code is similar to the one in the else case above. I guess it would make sense to refactor the common path of bump pointer allocation. WDYT? Benedikt Meurer 2016/05/10 08:01:45 It's just almost the same, but not 100%. For now I It's just almost the same, but not 100%. For now I'd like to keep it this way and refactor it once the top is tagged.
	230 Node* top = effect =

	231 graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,

	232 jsgraph()->IntPtrConstant(0), effect, control);

	233 Node* limit = effect =

	234 graph()->NewNode(machine()->Load(MachineType::Pointer()), limit_address,

	235 jsgraph()->IntPtrConstant(0), effect, control);

	236

	237 // Compute the new top.

	238 Node* new_top = graph()->NewNode(

	239 machine()->IntAdd(), top,

	240 machine()->Is64()

	241 ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)

	242 : size);

	243

	244 // Check if we can do bump pointer allocation here.

	245 Node* check = graph()->NewNode(machine()->UintLessThan(), new_top, limit);

	246 Node* branch =

	247 graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

	248

	249 Node* if_true = graph()->NewNode(common()->IfTrue(), branch);

	250 Node* etrue = effect;

	251 Node* vtrue;

	252 {

	253 etrue = graph()->NewNode(

	254 machine()->Store(StoreRepresentation(

	255 MachineType::PointerRepresentation(), kNoWriteBarrier)),

	256 top_address, jsgraph()->IntPtrConstant(0), new_top, etrue, if_true);

	257 vtrue = graph()->NewNode(

	258 machine()->BitcastWordToTagged(),

	259 graph()->NewNode(machine()->IntAdd(), top,

	260 jsgraph()->IntPtrConstant(kHeapObjectTag)));

	261 }

	262

	263 Node* if_false = graph()->NewNode(common()->IfFalse(), branch);

	264 Node* efalse = effect;

	265 Node* vfalse;

	266 {

	267 Node* target = pretenure == NOT_TENURED

	268 ? jsgraph()->AllocateInNewSpaceStubConstant()

	269 : jsgraph()->AllocateInOldSpaceStubConstant();

	270 if (!allocate_operator_.is_set()) {

	271 CallDescriptor* descriptor =

	272 Linkage::GetAllocateCallDescriptor(graph()->zone());

	273 allocate_operator_.set(common()->Call(descriptor));

	274 }

	275 vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target, size,

	276 efalse, if_false);

	277 }

	278

	279 control = graph()->NewNode(common()->Merge(2), if_true, if_false);

	280 effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);

	281 value = graph()->NewNode(common()->Phi(MachineRepresentation::kTagged, 2),

	282 vtrue, vfalse, control);

	283

	284 // Create an unfoldable allocation group.

	285 AllocationGroup* group =

	286 new (zone()) AllocationGroup(value, pretenure, zone());

	287 state = new (zone()) AllocationState(group);

	288 }

	289

	290 // Replace all effect uses of {node} with the {effect}, enqueue the

	291 // effect uses for further processing, and replace all value uses of

	292 // {node} with the {value}.

	293 for (Edge edge : node->use_edges()) {

	294 if (NodeProperties::IsEffectEdge(edge)) {

	295 EnqueueUse(edge.from(), edge.index(), state);

	296 edge.UpdateTo(effect);

	297 } else {

	298 DCHECK(NodeProperties::IsValueEdge(edge));

	299 edge.UpdateTo(value);

	300 }

	301 }

	302

	303 // Kill the {node} to make sure we don't leave dangling dead uses.

	304 node->Kill();

	305 }

	306

	307 void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {

	308 DCHECK_EQ(IrOpcode::kCall, node->opcode());

	309 // If the call can allocate, we start with a fresh state.

	310 if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {

	311 state = empty_state();

	312 }

	313 EnqueueUses(node, state);

	314 }

	315

	316 void MemoryOptimizer::VisitLoadElement(Node* node,

	317 AllocationState const* state) {

	318 DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());

	319 ElementAccess const& access = ElementAccessOf(node->op());

	320 Node* index = node->InputAt(1);

	321 node->ReplaceInput(1, ComputeIndex(access, index));

	322 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));

	323 EnqueueUses(node, state);

	324 }

	325

	326 void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {

	327 DCHECK_EQ(IrOpcode::kLoadField, node->opcode());

	328 FieldAccess const& access = FieldAccessOf(node->op());

	329 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());

	330 node->InsertInput(graph()->zone(), 1, offset);

	331 NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));

	332 EnqueueUses(node, state);

	333 }

	334

	335 void MemoryOptimizer::VisitStoreElement(Node* node,

	336 AllocationState const* state) {

	337 DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());

	338 ElementAccess const& access = ElementAccessOf(node->op());

	339 Node* object = node->InputAt(0);

	340 Node* index = node->InputAt(1);

	341 WriteBarrierKind write_barrier_kind =

	342 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);

	343 node->ReplaceInput(1, ComputeIndex(access, index));

	344 NodeProperties::ChangeOp(

	345 node, machine()->Store(StoreRepresentation(

	346 access.machine_type.representation(), write_barrier_kind)));

	347 EnqueueUses(node, state);

	348 }

	349

	350 void MemoryOptimizer::VisitStoreField(Node* node,

	351 AllocationState const* state) {

	352 DCHECK_EQ(IrOpcode::kStoreField, node->opcode());

	353 FieldAccess const& access = FieldAccessOf(node->op());

	354 Node* object = node->InputAt(0);

	355 WriteBarrierKind write_barrier_kind =

	356 ComputeWriteBarrierKind(object, state, access.write_barrier_kind);

	357 Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());

	358 node->InsertInput(graph()->zone(), 1, offset);

	359 NodeProperties::ChangeOp(

	360 node, machine()->Store(StoreRepresentation(

	361 access.machine_type.representation(), write_barrier_kind)));

	362 EnqueueUses(node, state);

	363 }

	364

	365 void MemoryOptimizer::VisitOtherEffect(Node* node,

	366 AllocationState const* state) {

	367 EnqueueUses(node, state);

	368 }

	369

	370 Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {

	371 Node* index = key;

	372 int element_size_shift =

	373 ElementSizeLog2Of(access.machine_type.representation());

	374 if (element_size_shift) {

	375 index = graph()->NewNode(machine()->Word32Shl(), index,

	376 jsgraph()->Int32Constant(element_size_shift));

	377 }

	378 const int fixed_offset = access.header_size - access.tag();

	379 if (fixed_offset) {

	380 index = graph()->NewNode(machine()->Int32Add(), index,

	381 jsgraph()->Int32Constant(fixed_offset));

	382 }

	383 if (machine()->Is64()) {

	384 // TODO(turbofan): This is probably only correct for typed arrays, and only

	385 // if the typed arrays are at most 2GiB in size, which happens to match

	386 // exactly our current situation.

	387 index = graph()->NewNode(machine()->ChangeUint32ToUint64(), index);

	388 }

	389 return index;

	390 }

	391

	392 WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(

	393 Node* object, AllocationState const* state,

	394 WriteBarrierKind write_barrier_kind) {

	395 if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {

	396 write_barrier_kind = kNoWriteBarrier;

	397 }

	398 return write_barrier_kind;

	399 }

	400

	401 MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(

	402 AllocationStates const& states) {

	403 // Check if all states are the same; or at least if all allocation

	404 // states belong to the same allocation group.

	405 AllocationState const* state = states.front();

	406 AllocationGroup* group = state->group();

	407 for (size_t i = 1; i < states.size(); ++i) {

	408 if (states[i] != state) state = nullptr;

	409 if (states[i]->group() != group) group = nullptr;

	410 }

	411 if (state == nullptr) {

	412 if (group != nullptr) {

	413 // We cannot fold any more allocations into this group, but we can still

	414 // eliminate write barriers on stores to this group.

	415 // TODO(bmeurer): We could potentially just create a Phi here to merge

	416 // the various tops; but we need to pay special attention not to create

	417 // an unschedulable graph.

	418 state = new (zone()) AllocationState(group);

	419 } else {

	420 // The states are from different allocation groups.

	421 state = empty_state();

	422 }

	423 }

	424 return state;

	425 }

	426

	427 void MemoryOptimizer::EnqueueMerge(Node* node, int index,

	428 AllocationState const* state) {

	429 DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());

	430 int const input_count = node->InputCount() - 1;

	431 DCHECK_LT(0, input_count);

	432 Node* const control = node->InputAt(input_count);

	433 if (control->opcode() == IrOpcode::kLoop) {

	434 // For loops we always start with an empty state at the beginning.

	435 if (index == 0) EnqueueUses(node, empty_state());

	436 } else {

	437 DCHECK_EQ(IrOpcode::kMerge, control->opcode());

	438 // Check if we already know about this pending merge.

	439 NodeId const id = node->id();

	440 auto it = pending_.find(id);

	441 if (it == pending_.end()) {

	442 // Insert a new pending merge.

	443 it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;

	444 }

	445 // Add the next input state.

	446 it->second.push_back(state);

	447 // Check if states for all inputs are available by now.

	448 if (it->second.size() == static_cast<size_t>(input_count)) {

	449 // All inputs to this effect merge are done, merge the states given all

	450 // input constraints, drop the pending merge and enqueue uses of the

	451 // EffectPhi {node}.

	452 state = MergeStates(it->second);

	453 EnqueueUses(node, state);

	454 pending_.erase(it);

	455 }

	456 }

	457 }

	458

	459 void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {

	460 for (Edge const edge : node->use_edges()) {

	461 if (NodeProperties::IsEffectEdge(edge)) {

	462 EnqueueUse(edge.from(), edge.index(), state);

	463 }

	464 }

	465 }

	466

	467 void MemoryOptimizer::EnqueueUse(Node* node, int index,

	468 AllocationState const* state) {

	469 if (node->opcode() == IrOpcode::kEffectPhi) {

	470 // An EffectPhi represents a merge of different effect chains, which

	471 // needs special handling depending on whether the merge is part of a

	472 // loop or just a normal control join.

	473 EnqueueMerge(node, index, state);

	474 } else {

	475 Token token = {node, state};

	476 tokens_.push(token);

	477 }

	478 }

	479

	480 Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }

	481

	482 Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }

	483

	484 CommonOperatorBuilder* MemoryOptimizer::common() const {

	485 return jsgraph()->common();

	486 }

	487

	488 MachineOperatorBuilder* MemoryOptimizer::machine() const {

	489 return jsgraph()->machine();

	490 }

	491

	492 } // namespace compiler

	493 } // namespace internal

	494 } // namespace v8

OLD	NEW

« src/compiler/memory-optimizer.h ('K') | « src/compiler/memory-optimizer.h ('k') | src/compiler/pipeline.cc » ('j') | src/heap/spaces.cc » ('J')