Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(674)

Side by Side Diff: runtime/vm/flow_graph_optimizer.cc

Issue 1679853002: VM: Move redundancy elimination phases into a separate file. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: fixed indentation Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « runtime/vm/flow_graph_optimizer.h ('k') | runtime/vm/redundancy_elimination.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/flow_graph_optimizer.h" 5 #include "vm/flow_graph_optimizer.h"
6 6
7 #include "vm/bit_vector.h" 7 #include "vm/bit_vector.h"
8 #include "vm/branch_optimizer.h" 8 #include "vm/branch_optimizer.h"
9 #include "vm/cha.h" 9 #include "vm/cha.h"
10 #include "vm/compiler.h" 10 #include "vm/compiler.h"
(...skipping 14 matching lines...) Expand all
25 #include "vm/stack_frame.h" 25 #include "vm/stack_frame.h"
26 #include "vm/symbols.h" 26 #include "vm/symbols.h"
27 27
28 namespace dart { 28 namespace dart {
29 29
30 DEFINE_FLAG(int, getter_setter_ratio, 13, 30 DEFINE_FLAG(int, getter_setter_ratio, 13,
31 "Ratio of getter/setter usage used for double field unboxing heuristics"); 31 "Ratio of getter/setter usage used for double field unboxing heuristics");
32 DEFINE_FLAG(bool, guess_icdata_cid, true, 32 DEFINE_FLAG(bool, guess_icdata_cid, true,
33 "Artificially create type feedback for arithmetic etc. operations" 33 "Artificially create type feedback for arithmetic etc. operations"
34 " by guessing the other unknown argument cid"); 34 " by guessing the other unknown argument cid");
35 DEFINE_FLAG(bool, load_cse, true, "Use redundant load elimination.");
36 DEFINE_FLAG(bool, dead_store_elimination, true, "Eliminate dead stores");
37 DEFINE_FLAG(int, max_polymorphic_checks, 4, 35 DEFINE_FLAG(int, max_polymorphic_checks, 4,
38 "Maximum number of polymorphic check, otherwise it is megamorphic."); 36 "Maximum number of polymorphic check, otherwise it is megamorphic.");
39 DEFINE_FLAG(int, max_equality_polymorphic_checks, 32, 37 DEFINE_FLAG(int, max_equality_polymorphic_checks, 32,
40 "Maximum number of polymorphic checks in equality operator," 38 "Maximum number of polymorphic checks in equality operator,"
41 " otherwise use megamorphic dispatch."); 39 " otherwise use megamorphic dispatch.");
42 DEFINE_FLAG(bool, merge_sin_cos, false, "Merge sin/cos into sincos"); 40 DEFINE_FLAG(bool, merge_sin_cos, false, "Merge sin/cos into sincos");
43 DEFINE_FLAG(bool, trace_load_optimization, false,
44 "Print live sets for load optimization pass.");
45 DEFINE_FLAG(bool, trace_optimization, false, "Print optimization details."); 41 DEFINE_FLAG(bool, trace_optimization, false, "Print optimization details.");
46 DEFINE_FLAG(bool, truncating_left_shift, true, 42 DEFINE_FLAG(bool, truncating_left_shift, true,
47 "Optimize left shift to truncate if possible"); 43 "Optimize left shift to truncate if possible");
48 DEFINE_FLAG(bool, use_cha_deopt, true, 44 DEFINE_FLAG(bool, use_cha_deopt, true,
49 "Use class hierarchy analysis even if it can cause deoptimization."); 45 "Use class hierarchy analysis even if it can cause deoptimization.");
50 #if defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_IA32) 46 #if defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_IA32)
51 DEFINE_FLAG(bool, trace_smi_widening, false, "Trace Smi->Int32 widening pass."); 47 DEFINE_FLAG(bool, trace_smi_widening, false, "Trace Smi->Int32 widening pass.");
52 #endif 48 #endif
53 49
54 DECLARE_FLAG(bool, precompilation); 50 DECLARE_FLAG(bool, precompilation);
55 DECLARE_FLAG(bool, polymorphic_with_deopt); 51 DECLARE_FLAG(bool, polymorphic_with_deopt);
56 DECLARE_FLAG(bool, source_lines); 52 DECLARE_FLAG(bool, source_lines);
57 DECLARE_FLAG(bool, trace_cha); 53 DECLARE_FLAG(bool, trace_cha);
58 DECLARE_FLAG(bool, trace_field_guards); 54 DECLARE_FLAG(bool, trace_field_guards);
59 DECLARE_FLAG(bool, trace_type_check_elimination); 55 DECLARE_FLAG(bool, trace_type_check_elimination);
60 DECLARE_FLAG(bool, warn_on_javascript_compatibility); 56 DECLARE_FLAG(bool, warn_on_javascript_compatibility);
61 DECLARE_FLAG(bool, fields_may_be_reset);
62 57
63 // Quick access to the current isolate and zone. 58 // Quick access to the current isolate and zone.
64 #define I (isolate()) 59 #define I (isolate())
65 #define Z (zone()) 60 #define Z (zone())
66 61
67 static bool ShouldInlineSimd() { 62 static bool ShouldInlineSimd() {
68 return FlowGraphCompiler::SupportsUnboxedSimd128(); 63 return FlowGraphCompiler::SupportsUnboxedSimd128();
69 } 64 }
70 65
71 66
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after
635 } 630 }
636 } 631 }
637 } 632 }
638 TryMergeTruncDivMod(&div_mod_merge); 633 TryMergeTruncDivMod(&div_mod_merge);
639 TryMergeMathUnary(&sin_cos_merge); 634 TryMergeMathUnary(&sin_cos_merge);
640 current_iterator_ = NULL; 635 current_iterator_ = NULL;
641 } 636 }
642 } 637 }
643 638
644 639
645 static void EnsureSSATempIndex(FlowGraph* graph,
646 Definition* defn,
647 Definition* replacement) {
648 if ((replacement->ssa_temp_index() == -1) &&
649 (defn->ssa_temp_index() != -1)) {
650 graph->AllocateSSAIndexes(replacement);
651 }
652 }
653
654
655 static void ReplaceCurrentInstruction(ForwardInstructionIterator* iterator,
656 Instruction* current,
657 Instruction* replacement,
658 FlowGraph* graph) {
659 Definition* current_defn = current->AsDefinition();
660 if ((replacement != NULL) && (current_defn != NULL)) {
661 Definition* replacement_defn = replacement->AsDefinition();
662 ASSERT(replacement_defn != NULL);
663 current_defn->ReplaceUsesWith(replacement_defn);
664 EnsureSSATempIndex(graph, current_defn, replacement_defn);
665
666 if (FLAG_trace_optimization) {
667 THR_Print("Replacing v%" Pd " with v%" Pd "\n",
668 current_defn->ssa_temp_index(),
669 replacement_defn->ssa_temp_index());
670 }
671 } else if (FLAG_trace_optimization) {
672 if (current_defn == NULL) {
673 THR_Print("Removing %s\n", current->DebugName());
674 } else {
675 ASSERT(!current_defn->HasUses());
676 THR_Print("Removing v%" Pd ".\n", current_defn->ssa_temp_index());
677 }
678 }
679 iterator->RemoveCurrentFromGraph();
680 }
681
682
683 bool FlowGraphOptimizer::Canonicalize() { 640 bool FlowGraphOptimizer::Canonicalize() {
684 bool changed = false; 641 bool changed = false;
685 for (intptr_t i = 0; i < block_order_.length(); ++i) { 642 for (intptr_t i = 0; i < block_order_.length(); ++i) {
686 BlockEntryInstr* entry = block_order_[i]; 643 BlockEntryInstr* entry = block_order_[i];
687 for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) { 644 for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
688 Instruction* current = it.Current(); 645 Instruction* current = it.Current();
689 if (current->HasUnmatchedInputRepresentations()) { 646 if (current->HasUnmatchedInputRepresentations()) {
690 // Can't canonicalize this instruction until all conversions for its 647 // Can't canonicalize this instruction until all conversions for its
691 // inputs are inserted. 648 // inputs are inserted.
692 continue; 649 continue;
693 } 650 }
694 651
695 Instruction* replacement = current->Canonicalize(flow_graph()); 652 Instruction* replacement = current->Canonicalize(flow_graph());
696 653
697 if (replacement != current) { 654 if (replacement != current) {
698 // For non-definitions Canonicalize should return either NULL or 655 // For non-definitions Canonicalize should return either NULL or
699 // this. 656 // this.
700 ASSERT((replacement == NULL) || current->IsDefinition()); 657 ASSERT((replacement == NULL) || current->IsDefinition());
701 ReplaceCurrentInstruction(&it, current, replacement, flow_graph_); 658 flow_graph_->ReplaceCurrentInstruction(&it, current, replacement);
702 changed = true; 659 changed = true;
703 } 660 }
704 } 661 }
705 } 662 }
706 return changed; 663 return changed;
707 } 664 }
708 665
709 666
710 static bool IsUnboxedInteger(Representation rep) { 667 static bool IsUnboxedInteger(Representation rep) {
711 return (rep == kUnboxedInt32) || 668 return (rep == kUnboxedInt32) ||
(...skipping 4354 matching lines...) Expand 10 before | Expand all | Expand 10 after
5066 // save enough range information in the ICData to drive this decision. 5023 // save enough range information in the ICData to drive this decision.
5067 } 5024 }
5068 #endif 5025 #endif
5069 5026
5070 void FlowGraphOptimizer::InferIntRanges() { 5027 void FlowGraphOptimizer::InferIntRanges() {
5071 RangeAnalysis range_analysis(flow_graph_); 5028 RangeAnalysis range_analysis(flow_graph_);
5072 range_analysis.Analyze(); 5029 range_analysis.Analyze();
5073 } 5030 }
5074 5031
5075 5032
5076 void TryCatchAnalyzer::Optimize(FlowGraph* flow_graph) {
5077 // For every catch-block: Iterate over all call instructions inside the
5078 // corresponding try-block and figure out for each environment value if it
5079 // is the same constant at all calls. If yes, replace the initial definition
5080 // at the catch-entry with this constant.
5081 const GrowableArray<CatchBlockEntryInstr*>& catch_entries =
5082 flow_graph->graph_entry()->catch_entries();
5083 intptr_t base = kFirstLocalSlotFromFp + flow_graph->num_non_copied_params();
5084 for (intptr_t catch_idx = 0;
5085 catch_idx < catch_entries.length();
5086 ++catch_idx) {
5087 CatchBlockEntryInstr* catch_entry = catch_entries[catch_idx];
5088
5089 // Initialize cdefs with the original initial definitions (ParameterInstr).
5090 // The following representation is used:
5091 // ParameterInstr => unknown
5092 // ConstantInstr => known constant
5093 // NULL => non-constant
5094 GrowableArray<Definition*>* idefs = catch_entry->initial_definitions();
5095 GrowableArray<Definition*> cdefs(idefs->length());
5096 cdefs.AddArray(*idefs);
5097
5098 // exception_var and stacktrace_var are never constant.
5099 intptr_t ex_idx = base - catch_entry->exception_var().index();
5100 intptr_t st_idx = base - catch_entry->stacktrace_var().index();
5101 cdefs[ex_idx] = cdefs[st_idx] = NULL;
5102
5103 for (BlockIterator block_it = flow_graph->reverse_postorder_iterator();
5104 !block_it.Done();
5105 block_it.Advance()) {
5106 BlockEntryInstr* block = block_it.Current();
5107 if (block->try_index() == catch_entry->catch_try_index()) {
5108 for (ForwardInstructionIterator instr_it(block);
5109 !instr_it.Done();
5110 instr_it.Advance()) {
5111 Instruction* current = instr_it.Current();
5112 if (current->MayThrow()) {
5113 Environment* env = current->env()->Outermost();
5114 ASSERT(env != NULL);
5115 for (intptr_t env_idx = 0; env_idx < cdefs.length(); ++env_idx) {
5116 if (cdefs[env_idx] != NULL &&
5117 env->ValueAt(env_idx)->BindsToConstant()) {
5118 cdefs[env_idx] = env->ValueAt(env_idx)->definition();
5119 }
5120 if (cdefs[env_idx] != env->ValueAt(env_idx)->definition()) {
5121 cdefs[env_idx] = NULL;
5122 }
5123 }
5124 }
5125 }
5126 }
5127 }
5128 for (intptr_t j = 0; j < idefs->length(); ++j) {
5129 if (cdefs[j] != NULL && cdefs[j]->IsConstant()) {
5130 // TODO(fschneider): Use constants from the constant pool.
5131 Definition* old = (*idefs)[j];
5132 ConstantInstr* orig = cdefs[j]->AsConstant();
5133 ConstantInstr* copy =
5134 new(flow_graph->zone()) ConstantInstr(orig->value());
5135 copy->set_ssa_temp_index(flow_graph->alloc_ssa_temp_index());
5136 old->ReplaceUsesWith(copy);
5137 (*idefs)[j] = copy;
5138 }
5139 }
5140 }
5141 }
5142
5143
5144 LICM::LICM(FlowGraph* flow_graph) : flow_graph_(flow_graph) {
5145 ASSERT(flow_graph->is_licm_allowed());
5146 }
5147
5148
5149 void LICM::Hoist(ForwardInstructionIterator* it,
5150 BlockEntryInstr* pre_header,
5151 Instruction* current) {
5152 if (current->IsCheckClass()) {
5153 current->AsCheckClass()->set_licm_hoisted(true);
5154 } else if (current->IsCheckSmi()) {
5155 current->AsCheckSmi()->set_licm_hoisted(true);
5156 } else if (current->IsCheckEitherNonSmi()) {
5157 current->AsCheckEitherNonSmi()->set_licm_hoisted(true);
5158 } else if (current->IsCheckArrayBound()) {
5159 current->AsCheckArrayBound()->set_licm_hoisted(true);
5160 }
5161 if (FLAG_trace_optimization) {
5162 THR_Print("Hoisting instruction %s:%" Pd " from B%" Pd " to B%" Pd "\n",
5163 current->DebugName(),
5164 current->GetDeoptId(),
5165 current->GetBlock()->block_id(),
5166 pre_header->block_id());
5167 }
5168 // Move the instruction out of the loop.
5169 current->RemoveEnvironment();
5170 if (it != NULL) {
5171 it->RemoveCurrentFromGraph();
5172 } else {
5173 current->RemoveFromGraph();
5174 }
5175 GotoInstr* last = pre_header->last_instruction()->AsGoto();
5176 // Using kind kEffect will not assign a fresh ssa temporary index.
5177 flow_graph()->InsertBefore(last, current, last->env(), FlowGraph::kEffect);
5178 current->CopyDeoptIdFrom(*last);
5179 }
5180
5181
5182 void LICM::TrySpecializeSmiPhi(PhiInstr* phi,
5183 BlockEntryInstr* header,
5184 BlockEntryInstr* pre_header) {
5185 if (phi->Type()->ToCid() == kSmiCid) {
5186 return;
5187 }
5188
5189 // Check if there is only a single kDynamicCid input to the phi that
5190 // comes from the pre-header.
5191 const intptr_t kNotFound = -1;
5192 intptr_t non_smi_input = kNotFound;
5193 for (intptr_t i = 0; i < phi->InputCount(); ++i) {
5194 Value* input = phi->InputAt(i);
5195 if (input->Type()->ToCid() != kSmiCid) {
5196 if ((non_smi_input != kNotFound) ||
5197 (input->Type()->ToCid() != kDynamicCid)) {
5198 // There are multiple kDynamicCid inputs or there is an input that is
5199 // known to be non-smi.
5200 return;
5201 } else {
5202 non_smi_input = i;
5203 }
5204 }
5205 }
5206
5207 if ((non_smi_input == kNotFound) ||
5208 (phi->block()->PredecessorAt(non_smi_input) != pre_header)) {
5209 return;
5210 }
5211
5212 CheckSmiInstr* check = NULL;
5213 for (Value* use = phi->input_use_list();
5214 (use != NULL) && (check == NULL);
5215 use = use->next_use()) {
5216 check = use->instruction()->AsCheckSmi();
5217 }
5218
5219 if (check == NULL) {
5220 return;
5221 }
5222
5223 // Host CheckSmi instruction and make this phi smi one.
5224 Hoist(NULL, pre_header, check);
5225
5226 // Replace value we are checking with phi's input.
5227 check->value()->BindTo(phi->InputAt(non_smi_input)->definition());
5228
5229 phi->UpdateType(CompileType::FromCid(kSmiCid));
5230 }
5231
5232
5233 // Load instructions handled by load elimination.
5234 static bool IsLoadEliminationCandidate(Instruction* instr) {
5235 return instr->IsLoadField()
5236 || instr->IsLoadIndexed()
5237 || instr->IsLoadStaticField();
5238 }
5239
5240
5241 static bool IsLoopInvariantLoad(ZoneGrowableArray<BitVector*>* sets,
5242 intptr_t loop_header_index,
5243 Instruction* instr) {
5244 return IsLoadEliminationCandidate(instr) &&
5245 (sets != NULL) &&
5246 instr->HasPlaceId() &&
5247 ((*sets)[loop_header_index] != NULL) &&
5248 (*sets)[loop_header_index]->Contains(instr->place_id());
5249 }
5250
5251
5252 void LICM::OptimisticallySpecializeSmiPhis() {
5253 if (!flow_graph()->function().allows_hoisting_check_class() ||
5254 FLAG_precompilation) {
5255 // Do not hoist any: Either deoptimized on a hoisted check,
5256 // or compiling precompiled code where we can't do optimistic
5257 // hoisting of checks.
5258 return;
5259 }
5260
5261 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
5262 flow_graph()->LoopHeaders();
5263
5264 for (intptr_t i = 0; i < loop_headers.length(); ++i) {
5265 JoinEntryInstr* header = loop_headers[i]->AsJoinEntry();
5266 // Skip loop that don't have a pre-header block.
5267 BlockEntryInstr* pre_header = header->ImmediateDominator();
5268 if (pre_header == NULL) continue;
5269
5270 for (PhiIterator it(header); !it.Done(); it.Advance()) {
5271 TrySpecializeSmiPhi(it.Current(), header, pre_header);
5272 }
5273 }
5274 }
5275
5276
5277 void LICM::Optimize() {
5278 if (!flow_graph()->function().allows_hoisting_check_class()) {
5279 // Do not hoist any.
5280 return;
5281 }
5282
5283 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
5284 flow_graph()->LoopHeaders();
5285
5286 ZoneGrowableArray<BitVector*>* loop_invariant_loads =
5287 flow_graph()->loop_invariant_loads();
5288
5289 BlockEffects* block_effects = flow_graph()->block_effects();
5290
5291 for (intptr_t i = 0; i < loop_headers.length(); ++i) {
5292 BlockEntryInstr* header = loop_headers[i];
5293 // Skip loop that don't have a pre-header block.
5294 BlockEntryInstr* pre_header = header->ImmediateDominator();
5295 if (pre_header == NULL) continue;
5296
5297 for (BitVector::Iterator loop_it(header->loop_info());
5298 !loop_it.Done();
5299 loop_it.Advance()) {
5300 BlockEntryInstr* block = flow_graph()->preorder()[loop_it.Current()];
5301 for (ForwardInstructionIterator it(block);
5302 !it.Done();
5303 it.Advance()) {
5304 Instruction* current = it.Current();
5305 if ((current->AllowsCSE() &&
5306 block_effects->CanBeMovedTo(current, pre_header)) ||
5307 IsLoopInvariantLoad(loop_invariant_loads, i, current)) {
5308 bool inputs_loop_invariant = true;
5309 for (int i = 0; i < current->InputCount(); ++i) {
5310 Definition* input_def = current->InputAt(i)->definition();
5311 if (!input_def->GetBlock()->Dominates(pre_header)) {
5312 inputs_loop_invariant = false;
5313 break;
5314 }
5315 }
5316 if (inputs_loop_invariant &&
5317 !current->IsAssertAssignable() &&
5318 !current->IsAssertBoolean()) {
5319 // TODO(fschneider): Enable hoisting of Assert-instructions
5320 // if it safe to do.
5321 Hoist(&it, pre_header, current);
5322 }
5323 }
5324 }
5325 }
5326 }
5327 }
5328
5329
5330 // Place describes an abstract location (e.g. field) that IR can load
5331 // from or store to.
5332 //
5333 // Places are also used to describe wild-card locations also known as aliases,
5334 // that essentially represent sets of places that alias each other. Places A
5335 // and B are said to alias each other if store into A can affect load from B.
5336 //
5337 // We distinguish the following aliases:
5338 //
5339 // - for fields
5340 // - *.f, *.@offs - field inside some object;
5341 // - X.f, X.@offs - field inside an allocated object X;
5342 // - for indexed accesses
5343 // - *[*] - non-constant index inside some object;
5344 // - *[C] - constant index inside some object;
5345 // - X[*] - non-constant index inside an allocated object X;
5346 // - X[C] - constant index inside an allocated object X.
5347 //
5348 // Constant indexed places are divided into two subcategories:
5349 //
5350 // - Access to homogeneous array-like objects: Array, ImmutableArray,
5351 // OneByteString, TwoByteString. These objects can only be accessed
5352 // on element by element basis with all elements having the same size.
5353 // This means X[C] aliases X[K] if and only if C === K.
5354 // - TypedData accesses. TypedData allow to read one of the primitive
5355 // data types at the given byte offset. When TypedData is accessed through
5356 // index operator on a typed array or a typed array view it is guaranteed
5357 // that the byte offset is always aligned by the element size. We write
5358 // these accesses as X[C|S], where C is constant byte offset and S is size
5359 // of the data type. Obviously X[C|S] and X[K|U] alias if and only if either
5360 // C = RoundDown(K, S) or K = RoundDown(C, U).
5361 // Note that not all accesses to typed data are aligned: e.g. ByteData
5362 // allows unanaligned access through it's get*/set* methods.
5363 // Check in Place::SetIndex ensures that we never create a place X[C|S]
5364 // such that C is not aligned by S.
5365 //
5366 // Separating allocations from other objects improves precision of the
5367 // load forwarding pass because of the following two properties:
5368 //
5369 // - if X can be proven to have no aliases itself (i.e. there is no other SSA
5370 // variable that points to X) then no place inside X can be aliased with any
5371 // wildcard dependent place (*.f, *.@offs, *[*], *[C]);
5372 // - given allocations X and Y no place inside X can be aliased with any place
5373 // inside Y even if any of them or both escape.
5374 //
5375 // It important to realize that single place can belong to multiple aliases.
5376 // For example place X.f with aliased allocation X belongs both to X.f and *.f
5377 // aliases. Likewise X[C] with non-aliased allocation X belongs to X[C] and X[*]
5378 // aliases.
5379 //
5380 class Place : public ValueObject {
5381 public:
5382 enum Kind {
5383 kNone,
5384
5385 // Field location. For instance fields is represented as a pair of a Field
5386 // object and an instance (SSA definition) that is being accessed.
5387 // For static fields instance is NULL.
5388 kField,
5389
5390 // VMField location. Represented as a pair of an instance (SSA definition)
5391 // being accessed and offset to the field.
5392 kVMField,
5393
5394 // Indexed location with a non-constant index.
5395 kIndexed,
5396
5397 // Indexed location with a constant index.
5398 kConstantIndexed,
5399 };
5400
5401 // Size of the element accessed by constant index. Size is only important
5402 // for TypedData because those accesses can alias even when constant indexes
5403 // are not the same: X[0|4] aliases X[0|2] and X[2|2].
5404 enum ElementSize {
5405 // If indexed access is not a TypedData access then element size is not
5406 // important because there is only a single possible access size depending
5407 // on the receiver - X[C] aliases X[K] if and only if C == K.
5408 // This is the size set for Array, ImmutableArray, OneByteString and
5409 // TwoByteString accesses.
5410 kNoSize,
5411
5412 // 1 byte (Int8List, Uint8List, Uint8ClampedList).
5413 kInt8,
5414
5415 // 2 bytes (Int16List, Uint16List).
5416 kInt16,
5417
5418 // 4 bytes (Int32List, Uint32List, Float32List).
5419 kInt32,
5420
5421 // 8 bytes (Int64List, Uint64List, Float64List).
5422 kInt64,
5423
5424 // 16 bytes (Int32x4List, Float32x4List, Float64x2List).
5425 kInt128,
5426
5427 kLargestElementSize = kInt128,
5428 };
5429
5430 Place(const Place& other)
5431 : ValueObject(),
5432 flags_(other.flags_),
5433 instance_(other.instance_),
5434 raw_selector_(other.raw_selector_),
5435 id_(other.id_) {
5436 }
5437
5438 // Construct a place from instruction if instruction accesses any place.
5439 // Otherwise constructs kNone place.
5440 Place(Instruction* instr, bool* is_load, bool* is_store)
5441 : flags_(0),
5442 instance_(NULL),
5443 raw_selector_(0),
5444 id_(0) {
5445 switch (instr->tag()) {
5446 case Instruction::kLoadField: {
5447 LoadFieldInstr* load_field = instr->AsLoadField();
5448 set_representation(load_field->representation());
5449 instance_ = load_field->instance()->definition()->OriginalDefinition();
5450 if (load_field->field() != NULL) {
5451 set_kind(kField);
5452 field_ = load_field->field();
5453 } else {
5454 set_kind(kVMField);
5455 offset_in_bytes_ = load_field->offset_in_bytes();
5456 }
5457 *is_load = true;
5458 break;
5459 }
5460
5461 case Instruction::kStoreInstanceField: {
5462 StoreInstanceFieldInstr* store =
5463 instr->AsStoreInstanceField();
5464 set_representation(store->RequiredInputRepresentation(
5465 StoreInstanceFieldInstr::kValuePos));
5466 instance_ = store->instance()->definition()->OriginalDefinition();
5467 if (!store->field().IsNull()) {
5468 set_kind(kField);
5469 field_ = &store->field();
5470 } else {
5471 set_kind(kVMField);
5472 offset_in_bytes_ = store->offset_in_bytes();
5473 }
5474 *is_store = true;
5475 break;
5476 }
5477
5478 case Instruction::kLoadStaticField:
5479 set_kind(kField);
5480 set_representation(instr->AsLoadStaticField()->representation());
5481 field_ = &instr->AsLoadStaticField()->StaticField();
5482 *is_load = true;
5483 break;
5484
5485 case Instruction::kStoreStaticField:
5486 set_kind(kField);
5487 set_representation(instr->AsStoreStaticField()->
5488 RequiredInputRepresentation(StoreStaticFieldInstr::kValuePos));
5489 field_ = &instr->AsStoreStaticField()->field();
5490 *is_store = true;
5491 break;
5492
5493 case Instruction::kLoadIndexed: {
5494 LoadIndexedInstr* load_indexed = instr->AsLoadIndexed();
5495 set_representation(load_indexed->representation());
5496 instance_ = load_indexed->array()->definition()->OriginalDefinition();
5497 SetIndex(load_indexed->index()->definition(),
5498 load_indexed->index_scale(),
5499 load_indexed->class_id());
5500 *is_load = true;
5501 break;
5502 }
5503
5504 case Instruction::kStoreIndexed: {
5505 StoreIndexedInstr* store_indexed = instr->AsStoreIndexed();
5506 set_representation(store_indexed->
5507 RequiredInputRepresentation(StoreIndexedInstr::kValuePos));
5508 instance_ = store_indexed->array()->definition()->OriginalDefinition();
5509 SetIndex(store_indexed->index()->definition(),
5510 store_indexed->index_scale(),
5511 store_indexed->class_id());
5512 *is_store = true;
5513 break;
5514 }
5515
5516 default:
5517 break;
5518 }
5519 }
5520
5521 // Create object representing *[*] alias.
5522 static Place* CreateAnyInstanceAnyIndexAlias(Zone* zone,
5523 intptr_t id) {
5524 return Wrap(zone, Place(
5525 EncodeFlags(kIndexed, kNoRepresentation, kNoSize),
5526 NULL,
5527 0), id);
5528 }
5529
5530 // Return least generic alias for this place. Given that aliases are
5531 // essentially sets of places we define least generic alias as a smallest
5532 // alias that contains this place.
5533 //
5534 // We obtain such alias by a simple transformation:
5535 //
5536 // - for places that depend on an instance X.f, X.@offs, X[i], X[C]
5537 // we drop X if X is not an allocation because in this case X does not
5538 // posess an identity obtaining aliases *.f, *.@offs, *[i] and *[C]
5539 // respectively;
5540 // - for non-constant indexed places X[i] we drop information about the
5541 // index obtaining alias X[*].
5542 // - we drop information about representation, but keep element size
5543 // if any.
5544 //
5545 Place ToAlias() const {
5546 return Place(
5547 RepresentationBits::update(kNoRepresentation, flags_),
5548 (DependsOnInstance() && IsAllocation(instance())) ? instance() : NULL,
5549 (kind() == kIndexed) ? 0 : raw_selector_);
5550 }
5551
5552 bool DependsOnInstance() const {
5553 switch (kind()) {
5554 case kField:
5555 case kVMField:
5556 case kIndexed:
5557 case kConstantIndexed:
5558 return true;
5559
5560 case kNone:
5561 return false;
5562 }
5563
5564 UNREACHABLE();
5565 return false;
5566 }
5567
5568 // Given instance dependent alias X.f, X.@offs, X[C], X[*] return
5569 // wild-card dependent alias *.f, *.@offs, *[C] or *[*] respectively.
5570 Place CopyWithoutInstance() const {
5571 ASSERT(DependsOnInstance());
5572 return Place(flags_, NULL, raw_selector_);
5573 }
5574
5575 // Given alias X[C] or *[C] return X[*] and *[*] respectively.
5576 Place CopyWithoutIndex() const {
5577 ASSERT(kind() == kConstantIndexed);
5578 return Place(EncodeFlags(kIndexed, kNoRepresentation, kNoSize),
5579 instance_,
5580 0);
5581 }
5582
5583 // Given alias X[ByteOffs|S] and a larger element size S', return
5584 // alias X[RoundDown(ByteOffs, S')|S'] - this is the byte offset of a larger
5585 // typed array element that contains this typed array element.
5586 // In other words this method computes the only possible place with the given
5587 // size that can alias this place (due to alignment restrictions).
5588 // For example for X[9|kInt8] and target size kInt32 we would return
5589 // X[8|kInt32].
5590 Place ToLargerElement(ElementSize to) const {
5591 ASSERT(kind() == kConstantIndexed);
5592 ASSERT(element_size() != kNoSize);
5593 ASSERT(element_size() < to);
5594 return Place(ElementSizeBits::update(to, flags_),
5595 instance_,
5596 RoundByteOffset(to, index_constant_));
5597 }
5598
5599
5600 intptr_t id() const { return id_; }
5601
5602 Kind kind() const { return KindBits::decode(flags_); }
5603
5604 Representation representation() const {
5605 return RepresentationBits::decode(flags_);
5606 }
5607
5608 Definition* instance() const {
5609 ASSERT(DependsOnInstance());
5610 return instance_;
5611 }
5612
5613 void set_instance(Definition* def) {
5614 ASSERT(DependsOnInstance());
5615 instance_ = def->OriginalDefinition();
5616 }
5617
5618 const Field& field() const {
5619 ASSERT(kind() == kField);
5620 return *field_;
5621 }
5622
5623 intptr_t offset_in_bytes() const {
5624 ASSERT(kind() == kVMField);
5625 return offset_in_bytes_;
5626 }
5627
5628 Definition* index() const {
5629 ASSERT(kind() == kIndexed);
5630 return index_;
5631 }
5632
5633 ElementSize element_size() const {
5634 return ElementSizeBits::decode(flags_);
5635 }
5636
5637 intptr_t index_constant() const {
5638 ASSERT(kind() == kConstantIndexed);
5639 return index_constant_;
5640 }
5641
5642 static const char* DefinitionName(Definition* def) {
5643 if (def == NULL) {
5644 return "*";
5645 } else {
5646 return Thread::Current()->zone()->PrintToString(
5647 "v%" Pd, def->ssa_temp_index());
5648 }
5649 }
5650
5651 const char* ToCString() const {
5652 switch (kind()) {
5653 case kNone:
5654 return "<none>";
5655
5656 case kField: {
5657 const char* field_name = String::Handle(field().name()).ToCString();
5658 if (field().is_static()) {
5659 return Thread::Current()->zone()->PrintToString(
5660 "<%s>", field_name);
5661 } else {
5662 return Thread::Current()->zone()->PrintToString(
5663 "<%s.%s>", DefinitionName(instance()), field_name);
5664 }
5665 }
5666
5667 case kVMField:
5668 return Thread::Current()->zone()->PrintToString(
5669 "<%s.@%" Pd ">",
5670 DefinitionName(instance()),
5671 offset_in_bytes());
5672
5673 case kIndexed:
5674 return Thread::Current()->zone()->PrintToString(
5675 "<%s[%s]>",
5676 DefinitionName(instance()),
5677 DefinitionName(index()));
5678
5679 case kConstantIndexed:
5680 if (element_size() == kNoSize) {
5681 return Thread::Current()->zone()->PrintToString(
5682 "<%s[%" Pd "]>",
5683 DefinitionName(instance()),
5684 index_constant());
5685 } else {
5686 return Thread::Current()->zone()->PrintToString(
5687 "<%s[%" Pd "|%" Pd "]>",
5688 DefinitionName(instance()),
5689 index_constant(),
5690 ElementSizeMultiplier(element_size()));
5691 }
5692 }
5693 UNREACHABLE();
5694 return "<?>";
5695 }
5696
5697 // Fields that are considered immutable by load optimization.
5698 // Handle static finals as non-final with precompilation because
5699 // they may be reset to uninitialized after compilation.
5700 bool IsImmutableField() const {
5701 return (kind() == kField)
5702 && field().is_final()
5703 && (!field().is_static() || !FLAG_fields_may_be_reset);
5704 }
5705
5706 intptr_t Hashcode() const {
5707 return (flags_ * 63 + reinterpret_cast<intptr_t>(instance_)) * 31 +
5708 FieldHashcode();
5709 }
5710
5711 bool Equals(const Place* other) const {
5712 return (flags_ == other->flags_) &&
5713 (instance_ == other->instance_) &&
5714 SameField(other);
5715 }
5716
5717 // Create a zone allocated copy of this place and assign given id to it.
5718 static Place* Wrap(Zone* zone, const Place& place, intptr_t id);
5719
5720 static bool IsAllocation(Definition* defn) {
5721 return (defn != NULL) &&
5722 (defn->IsAllocateObject() ||
5723 defn->IsCreateArray() ||
5724 defn->IsAllocateUninitializedContext() ||
5725 (defn->IsStaticCall() &&
5726 defn->AsStaticCall()->IsRecognizedFactory()));
5727 }
5728
5729 private:
5730 Place(uword flags, Definition* instance, intptr_t selector)
5731 : flags_(flags),
5732 instance_(instance),
5733 raw_selector_(selector),
5734 id_(0) {
5735 }
5736
5737 bool SameField(const Place* other) const {
5738 return (kind() == kField) ? (field().raw() == other->field().raw())
5739 : (offset_in_bytes_ == other->offset_in_bytes_);
5740 }
5741
5742 intptr_t FieldHashcode() const {
5743 return (kind() == kField) ? reinterpret_cast<intptr_t>(field().raw())
5744 : offset_in_bytes_;
5745 }
5746
5747 void set_representation(Representation rep) {
5748 flags_ = RepresentationBits::update(rep, flags_);
5749 }
5750
5751 void set_kind(Kind kind) {
5752 flags_ = KindBits::update(kind, flags_);
5753 }
5754
5755 void set_element_size(ElementSize scale) {
5756 flags_ = ElementSizeBits::update(scale, flags_);
5757 }
5758
5759 void SetIndex(Definition* index, intptr_t scale, intptr_t class_id) {
5760 ConstantInstr* index_constant = index->AsConstant();
5761 if ((index_constant != NULL) && index_constant->value().IsSmi()) {
5762 const intptr_t index_value = Smi::Cast(index_constant->value()).Value();
5763 const ElementSize size = ElementSizeFor(class_id);
5764 const bool is_typed_data = (size != kNoSize);
5765
5766 // If we are writing into the typed data scale the index to
5767 // get byte offset. Otherwise ignore the scale.
5768 if (!is_typed_data) {
5769 scale = 1;
5770 }
5771
5772 // Guard against potential multiplication overflow and negative indices.
5773 if ((0 <= index_value) && (index_value < (kMaxInt32 / scale))) {
5774 const intptr_t scaled_index = index_value * scale;
5775
5776 // Guard against unaligned byte offsets.
5777 if (!is_typed_data ||
5778 Utils::IsAligned(scaled_index, ElementSizeMultiplier(size))) {
5779 set_kind(kConstantIndexed);
5780 set_element_size(size);
5781 index_constant_ = scaled_index;
5782 return;
5783 }
5784 }
5785
5786 // Fallthrough: create generic _[*] place.
5787 }
5788
5789 set_kind(kIndexed);
5790 index_ = index;
5791 }
5792
5793 static uword EncodeFlags(Kind kind, Representation rep, ElementSize scale) {
5794 ASSERT((kind == kConstantIndexed) || (scale == kNoSize));
5795 return KindBits::encode(kind) |
5796 RepresentationBits::encode(rep) |
5797 ElementSizeBits::encode(scale);
5798 }
5799
5800 static ElementSize ElementSizeFor(intptr_t class_id) {
5801 switch (class_id) {
5802 case kArrayCid:
5803 case kImmutableArrayCid:
5804 case kOneByteStringCid:
5805 case kTwoByteStringCid:
5806 // Object arrays and strings do not allow accessing them through
5807 // different types. No need to attach scale.
5808 return kNoSize;
5809
5810 case kTypedDataInt8ArrayCid:
5811 case kTypedDataUint8ArrayCid:
5812 case kTypedDataUint8ClampedArrayCid:
5813 case kExternalTypedDataUint8ArrayCid:
5814 case kExternalTypedDataUint8ClampedArrayCid:
5815 return kInt8;
5816
5817 case kTypedDataInt16ArrayCid:
5818 case kTypedDataUint16ArrayCid:
5819 return kInt16;
5820
5821 case kTypedDataInt32ArrayCid:
5822 case kTypedDataUint32ArrayCid:
5823 case kTypedDataFloat32ArrayCid:
5824 return kInt32;
5825
5826 case kTypedDataInt64ArrayCid:
5827 case kTypedDataUint64ArrayCid:
5828 case kTypedDataFloat64ArrayCid:
5829 return kInt64;
5830
5831 case kTypedDataInt32x4ArrayCid:
5832 case kTypedDataFloat32x4ArrayCid:
5833 case kTypedDataFloat64x2ArrayCid:
5834 return kInt128;
5835
5836 default:
5837 UNREACHABLE();
5838 return kNoSize;
5839 }
5840 }
5841
5842 static intptr_t ElementSizeMultiplier(ElementSize size) {
5843 return 1 << (static_cast<intptr_t>(size) - static_cast<intptr_t>(kInt8));
5844 }
5845
5846 static intptr_t RoundByteOffset(ElementSize size, intptr_t offset) {
5847 return offset & ~(ElementSizeMultiplier(size) - 1);
5848 }
5849
5850 class KindBits : public BitField<uword, Kind, 0, 3> {};
5851 class RepresentationBits :
5852 public BitField<uword, Representation, KindBits::kNextBit, 11> {};
5853 class ElementSizeBits :
5854 public BitField<uword, ElementSize, RepresentationBits::kNextBit, 3> {};
5855
5856 uword flags_;
5857 Definition* instance_;
5858 union {
5859 intptr_t raw_selector_;
5860 const Field* field_;
5861 intptr_t offset_in_bytes_;
5862 intptr_t index_constant_;
5863 Definition* index_;
5864 };
5865
5866 intptr_t id_;
5867 };
5868
5869
5870 class ZonePlace : public ZoneAllocated {
5871 public:
5872 explicit ZonePlace(const Place& place) : place_(place) { }
5873
5874 Place* place() { return &place_; }
5875
5876 private:
5877 Place place_;
5878 };
5879
5880
5881 Place* Place::Wrap(Zone* zone, const Place& place, intptr_t id) {
5882 Place* wrapped = (new(zone) ZonePlace(place))->place();
5883 wrapped->id_ = id;
5884 return wrapped;
5885 }
5886
5887
5888 // Correspondence between places connected through outgoing phi moves on the
5889 // edge that targets join.
5890 class PhiPlaceMoves : public ZoneAllocated {
5891 public:
5892 // Record a move from the place with id |from| to the place with id |to| at
5893 // the given block.
5894 void CreateOutgoingMove(Zone* zone,
5895 BlockEntryInstr* block, intptr_t from, intptr_t to) {
5896 const intptr_t block_num = block->preorder_number();
5897 while (moves_.length() <= block_num) {
5898 moves_.Add(NULL);
5899 }
5900
5901 if (moves_[block_num] == NULL) {
5902 moves_[block_num] = new(zone) ZoneGrowableArray<Move>(5);
5903 }
5904
5905 moves_[block_num]->Add(Move(from, to));
5906 }
5907
5908 class Move {
5909 public:
5910 Move(intptr_t from, intptr_t to) : from_(from), to_(to) { }
5911
5912 intptr_t from() const { return from_; }
5913 intptr_t to() const { return to_; }
5914
5915 private:
5916 intptr_t from_;
5917 intptr_t to_;
5918 };
5919
5920 typedef const ZoneGrowableArray<Move>* MovesList;
5921
5922 MovesList GetOutgoingMoves(BlockEntryInstr* block) const {
5923 const intptr_t block_num = block->preorder_number();
5924 return (block_num < moves_.length()) ?
5925 moves_[block_num] : NULL;
5926 }
5927
5928 private:
5929 GrowableArray<ZoneGrowableArray<Move>* > moves_;
5930 };
5931
5932
5933 // A map from aliases to a set of places sharing the alias. Additionally
5934 // carries a set of places that can be aliased by side-effects, essentially
5935 // those that are affected by calls.
5936 class AliasedSet : public ZoneAllocated {
5937 public:
5938 AliasedSet(Zone* zone,
5939 DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map,
5940 ZoneGrowableArray<Place*>* places,
5941 PhiPlaceMoves* phi_moves)
5942 : zone_(zone),
5943 places_map_(places_map),
5944 places_(*places),
5945 phi_moves_(phi_moves),
5946 aliases_(5),
5947 aliases_map_(),
5948 typed_data_access_sizes_(),
5949 representatives_(),
5950 killed_(),
5951 aliased_by_effects_(new(zone) BitVector(zone, places->length())) {
5952 InsertAlias(Place::CreateAnyInstanceAnyIndexAlias(zone_,
5953 kAnyInstanceAnyIndexAlias));
5954 for (intptr_t i = 0; i < places_.length(); i++) {
5955 AddRepresentative(places_[i]);
5956 }
5957 ComputeKillSets();
5958 }
5959
5960 intptr_t LookupAliasId(const Place& alias) {
5961 const Place* result = aliases_map_.Lookup(&alias);
5962 return (result != NULL) ? result->id() : static_cast<intptr_t>(kNoAlias);
5963 }
5964
5965 BitVector* GetKilledSet(intptr_t alias) {
5966 return (alias < killed_.length()) ? killed_[alias] : NULL;
5967 }
5968
5969 intptr_t max_place_id() const { return places().length(); }
5970 bool IsEmpty() const { return max_place_id() == 0; }
5971
5972 BitVector* aliased_by_effects() const { return aliased_by_effects_; }
5973
5974 const ZoneGrowableArray<Place*>& places() const {
5975 return places_;
5976 }
5977
5978 Place* LookupCanonical(Place* place) const {
5979 return places_map_->Lookup(place);
5980 }
5981
5982 void PrintSet(BitVector* set) {
5983 bool comma = false;
5984 for (BitVector::Iterator it(set);
5985 !it.Done();
5986 it.Advance()) {
5987 if (comma) {
5988 THR_Print(", ");
5989 }
5990 THR_Print("%s", places_[it.Current()]->ToCString());
5991 comma = true;
5992 }
5993 }
5994
5995 const PhiPlaceMoves* phi_moves() const { return phi_moves_; }
5996
5997 void RollbackAliasedIdentites() {
5998 for (intptr_t i = 0; i < identity_rollback_.length(); ++i) {
5999 identity_rollback_[i]->SetIdentity(AliasIdentity::Unknown());
6000 }
6001 }
6002
6003 // Returns false if the result of an allocation instruction can't be aliased
6004 // by another SSA variable and true otherwise.
6005 bool CanBeAliased(Definition* alloc) {
6006 if (!Place::IsAllocation(alloc)) {
6007 return true;
6008 }
6009
6010 if (alloc->Identity().IsUnknown()) {
6011 ComputeAliasing(alloc);
6012 }
6013
6014 return !alloc->Identity().IsNotAliased();
6015 }
6016
6017 enum {
6018 kNoAlias = 0
6019 };
6020
6021 private:
6022 enum {
6023 // Artificial alias that is used to collect all representatives of the
6024 // *[C], X[C] aliases for arbitrary C.
6025 kAnyConstantIndexedAlias = 1,
6026
6027 // Artificial alias that is used to collect all representatives of
6028 // *[C] alias for arbitrary C.
6029 kUnknownInstanceConstantIndexedAlias = 2,
6030
6031 // Artificial alias that is used to collect all representatives of
6032 // X[*] alias for all X.
6033 kAnyAllocationIndexedAlias = 3,
6034
6035 // *[*] alias.
6036 kAnyInstanceAnyIndexAlias = 4
6037 };
6038
6039 // Compute least generic alias for the place and assign alias id to it.
6040 void AddRepresentative(Place* place) {
6041 if (!place->IsImmutableField()) {
6042 const Place* alias = CanonicalizeAlias(place->ToAlias());
6043 EnsureSet(&representatives_, alias->id())->Add(place->id());
6044
6045 // Update cumulative representative sets that are used during
6046 // killed sets computation.
6047 if (alias->kind() == Place::kConstantIndexed) {
6048 if (CanBeAliased(alias->instance())) {
6049 EnsureSet(&representatives_, kAnyConstantIndexedAlias)->
6050 Add(place->id());
6051 }
6052
6053 if (alias->instance() == NULL) {
6054 EnsureSet(&representatives_, kUnknownInstanceConstantIndexedAlias)->
6055 Add(place->id());
6056 }
6057
6058 // Collect all element sizes used to access TypedData arrays in
6059 // the function. This is used to skip sizes without representatives
6060 // when computing kill sets.
6061 if (alias->element_size() != Place::kNoSize) {
6062 typed_data_access_sizes_.Add(alias->element_size());
6063 }
6064 } else if ((alias->kind() == Place::kIndexed) &&
6065 CanBeAliased(place->instance())) {
6066 EnsureSet(&representatives_, kAnyAllocationIndexedAlias)->
6067 Add(place->id());
6068 }
6069
6070 if (!IsIndependentFromEffects(place)) {
6071 aliased_by_effects_->Add(place->id());
6072 }
6073 }
6074 }
6075
6076 void ComputeKillSets() {
6077 for (intptr_t i = 0; i < aliases_.length(); ++i) {
6078 const Place* alias = aliases_[i];
6079 // Add all representatives to the kill set.
6080 AddAllRepresentatives(alias->id(), alias->id());
6081 ComputeKillSet(alias);
6082 }
6083
6084 if (FLAG_trace_load_optimization) {
6085 THR_Print("Aliases KILL sets:\n");
6086 for (intptr_t i = 0; i < aliases_.length(); ++i) {
6087 const Place* alias = aliases_[i];
6088 BitVector* kill = GetKilledSet(alias->id());
6089
6090 THR_Print("%s: ", alias->ToCString());
6091 if (kill != NULL) {
6092 PrintSet(kill);
6093 }
6094 THR_Print("\n");
6095 }
6096 }
6097 }
6098
6099 void InsertAlias(const Place* alias) {
6100 aliases_map_.Insert(alias);
6101 aliases_.Add(alias);
6102 }
6103
6104 const Place* CanonicalizeAlias(const Place& alias) {
6105 const Place* canonical = aliases_map_.Lookup(&alias);
6106 if (canonical == NULL) {
6107 canonical = Place::Wrap(zone_,
6108 alias,
6109 kAnyInstanceAnyIndexAlias + aliases_.length());
6110 InsertAlias(canonical);
6111 }
6112 ASSERT(aliases_map_.Lookup(&alias) == canonical);
6113 return canonical;
6114 }
6115
6116 BitVector* GetRepresentativesSet(intptr_t alias) {
6117 return (alias < representatives_.length()) ? representatives_[alias] : NULL;
6118 }
6119
6120 BitVector* EnsureSet(GrowableArray<BitVector*>* sets,
6121 intptr_t alias) {
6122 while (sets->length() <= alias) {
6123 sets->Add(NULL);
6124 }
6125
6126 BitVector* set = (*sets)[alias];
6127 if (set == NULL) {
6128 (*sets)[alias] = set = new(zone_) BitVector(zone_, max_place_id());
6129 }
6130 return set;
6131 }
6132
6133 void AddAllRepresentatives(const Place* to, intptr_t from) {
6134 AddAllRepresentatives(to->id(), from);
6135 }
6136
6137 void AddAllRepresentatives(intptr_t to, intptr_t from) {
6138 BitVector* from_set = GetRepresentativesSet(from);
6139 if (from_set != NULL) {
6140 EnsureSet(&killed_, to)->AddAll(from_set);
6141 }
6142 }
6143
6144 void CrossAlias(const Place* to, const Place& from) {
6145 const intptr_t from_id = LookupAliasId(from);
6146 if (from_id == kNoAlias) {
6147 return;
6148 }
6149 CrossAlias(to, from_id);
6150 }
6151
6152 void CrossAlias(const Place* to, intptr_t from) {
6153 AddAllRepresentatives(to->id(), from);
6154 AddAllRepresentatives(from, to->id());
6155 }
6156
6157 // When computing kill sets we let less generic alias insert its
6158 // representatives into more generic alias'es kill set. For example
6159 // when visiting alias X[*] instead of searching for all aliases X[C]
6160 // and inserting their representatives into kill set for X[*] we update
6161 // kill set for X[*] each time we visit new X[C] for some C.
6162 // There is an exception however: if both aliases are parametric like *[C]
6163 // and X[*] which cross alias when X is an aliased allocation then we use
6164 // artificial aliases that contain all possible representatives for the given
6165 // alias for any value of the parameter to compute resulting kill set.
6166 void ComputeKillSet(const Place* alias) {
6167 switch (alias->kind()) {
6168 case Place::kIndexed: // Either *[*] or X[*] alias.
6169 if (alias->instance() == NULL) {
6170 // *[*] aliases with X[*], X[C], *[C].
6171 AddAllRepresentatives(alias, kAnyConstantIndexedAlias);
6172 AddAllRepresentatives(alias, kAnyAllocationIndexedAlias);
6173 } else if (CanBeAliased(alias->instance())) {
6174 // X[*] aliases with X[C].
6175 // If X can be aliased then X[*] also aliases with *[C], *[*].
6176 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
6177 AddAllRepresentatives(alias, kUnknownInstanceConstantIndexedAlias);
6178 }
6179 break;
6180
6181 case Place::kConstantIndexed: // Either X[C] or *[C] alias.
6182 if (alias->element_size() != Place::kNoSize) {
6183 const bool has_aliased_instance =
6184 (alias->instance() != NULL) && CanBeAliased(alias->instance());
6185
6186 // If this is a TypedData access then X[C|S] aliases larger elements
6187 // covering this one X[RoundDown(C, S')|S'] for all S' > S and
6188 // all smaller elements being covered by this one X[C'|S'] for
6189 // some S' < S and all C' such that C = RoundDown(C', S).
6190 // In the loop below it's enough to only propagate aliasing to
6191 // larger aliases because propagation is symmetric: smaller aliases
6192 // (if there are any) would update kill set for this alias when they
6193 // are visited.
6194 for (intptr_t i = static_cast<intptr_t>(alias->element_size()) + 1;
6195 i <= Place::kLargestElementSize;
6196 i++) {
6197 // Skip element sizes that a guaranteed to have no representatives.
6198 if (!typed_data_access_sizes_.Contains(alias->element_size())) {
6199 continue;
6200 }
6201
6202 // X[C|S] aliases with X[RoundDown(C, S')|S'] and likewise
6203 // *[C|S] aliases with *[RoundDown(C, S')|S'].
6204 const Place larger_alias =
6205 alias->ToLargerElement(static_cast<Place::ElementSize>(i));
6206 CrossAlias(alias, larger_alias);
6207 if (has_aliased_instance) {
6208 // If X is an aliased instance then X[C|S] aliases
6209 // with *[RoundDown(C, S')|S'].
6210 CrossAlias(alias, larger_alias.CopyWithoutInstance());
6211 }
6212 }
6213 }
6214
6215 if (alias->instance() == NULL) {
6216 // *[C] aliases with X[C], X[*], *[*].
6217 AddAllRepresentatives(alias, kAnyAllocationIndexedAlias);
6218 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
6219 } else {
6220 // X[C] aliases with X[*].
6221 // If X can be aliased then X[C] also aliases with *[C], *[*].
6222 CrossAlias(alias, alias->CopyWithoutIndex());
6223 if (CanBeAliased(alias->instance())) {
6224 CrossAlias(alias, alias->CopyWithoutInstance());
6225 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
6226 }
6227 }
6228 break;
6229
6230 case Place::kField:
6231 case Place::kVMField:
6232 if (CanBeAliased(alias->instance())) {
6233 // X.f or X.@offs alias with *.f and *.@offs respectively.
6234 CrossAlias(alias, alias->CopyWithoutInstance());
6235 }
6236 break;
6237
6238 case Place::kNone:
6239 UNREACHABLE();
6240 }
6241 }
6242
6243 // Returns true if the given load is unaffected by external side-effects.
6244 // This essentially means that no stores to the same location can
6245 // occur in other functions.
6246 bool IsIndependentFromEffects(Place* place) {
6247 if (place->IsImmutableField()) {
6248 // Note that we can't use LoadField's is_immutable attribute here because
6249 // some VM-fields (those that have no corresponding Field object and
6250 // accessed through offset alone) can share offset but have different
6251 // immutability properties.
6252 // One example is the length property of growable and fixed size list. If
6253 // loads of these two properties occur in the same function for the same
6254 // receiver then they will get the same expression number. However
6255 // immutability of the length of fixed size list does not mean that
6256 // growable list also has immutable property. Thus we will make a
6257 // conservative assumption for the VM-properties.
6258 // TODO(vegorov): disambiguate immutable and non-immutable VM-fields with
6259 // the same offset e.g. through recognized kind.
6260 return true;
6261 }
6262
6263 return ((place->kind() == Place::kField) ||
6264 (place->kind() == Place::kVMField)) &&
6265 !CanBeAliased(place->instance());
6266 }
6267
6268 // Returns true if there are direct loads from the given place.
6269 bool HasLoadsFromPlace(Definition* defn, const Place* place) {
6270 ASSERT((place->kind() == Place::kField) ||
6271 (place->kind() == Place::kVMField));
6272
6273 for (Value* use = defn->input_use_list();
6274 use != NULL;
6275 use = use->next_use()) {
6276 Instruction* instr = use->instruction();
6277 if ((instr->IsRedefinition() ||
6278 instr->IsAssertAssignable()) &&
6279 HasLoadsFromPlace(instr->AsDefinition(), place)) {
6280 return true;
6281 }
6282 bool is_load = false, is_store;
6283 Place load_place(instr, &is_load, &is_store);
6284
6285 if (is_load && load_place.Equals(place)) {
6286 return true;
6287 }
6288 }
6289
6290 return false;
6291 }
6292
6293 // Check if any use of the definition can create an alias.
6294 // Can add more objects into aliasing_worklist_.
6295 bool AnyUseCreatesAlias(Definition* defn) {
6296 for (Value* use = defn->input_use_list();
6297 use != NULL;
6298 use = use->next_use()) {
6299 Instruction* instr = use->instruction();
6300 if (instr->IsPushArgument() ||
6301 (instr->IsStoreIndexed()
6302 && (use->use_index() == StoreIndexedInstr::kValuePos)) ||
6303 instr->IsStoreStaticField() ||
6304 instr->IsPhi()) {
6305 return true;
6306 } else if ((instr->IsAssertAssignable() || instr->IsRedefinition()) &&
6307 AnyUseCreatesAlias(instr->AsDefinition())) {
6308 return true;
6309 } else if ((instr->IsStoreInstanceField()
6310 && (use->use_index() != StoreInstanceFieldInstr::kInstancePos))) {
6311 ASSERT(use->use_index() == StoreInstanceFieldInstr::kValuePos);
6312 // If we store this value into an object that is not aliased itself
6313 // and we never load again then the store does not create an alias.
6314 StoreInstanceFieldInstr* store = instr->AsStoreInstanceField();
6315 Definition* instance =
6316 store->instance()->definition()->OriginalDefinition();
6317 if (Place::IsAllocation(instance) &&
6318 !instance->Identity().IsAliased()) {
6319 bool is_load, is_store;
6320 Place store_place(instr, &is_load, &is_store);
6321
6322 if (!HasLoadsFromPlace(instance, &store_place)) {
6323 // No loads found that match this store. If it is yet unknown if
6324 // the object is not aliased then optimistically assume this but
6325 // add it to the worklist to check its uses transitively.
6326 if (instance->Identity().IsUnknown()) {
6327 instance->SetIdentity(AliasIdentity::NotAliased());
6328 aliasing_worklist_.Add(instance);
6329 }
6330 continue;
6331 }
6332 }
6333 return true;
6334 }
6335 }
6336 return false;
6337 }
6338
6339 // Mark any value stored into the given object as potentially aliased.
6340 void MarkStoredValuesEscaping(Definition* defn) {
6341 // Find all stores into this object.
6342 for (Value* use = defn->input_use_list();
6343 use != NULL;
6344 use = use->next_use()) {
6345 if (use->instruction()->IsRedefinition() ||
6346 use->instruction()->IsAssertAssignable()) {
6347 MarkStoredValuesEscaping(use->instruction()->AsDefinition());
6348 continue;
6349 }
6350 if ((use->use_index() == StoreInstanceFieldInstr::kInstancePos) &&
6351 use->instruction()->IsStoreInstanceField()) {
6352 StoreInstanceFieldInstr* store =
6353 use->instruction()->AsStoreInstanceField();
6354 Definition* value = store->value()->definition()->OriginalDefinition();
6355 if (value->Identity().IsNotAliased()) {
6356 value->SetIdentity(AliasIdentity::Aliased());
6357 identity_rollback_.Add(value);
6358
6359 // Add to worklist to propagate the mark transitively.
6360 aliasing_worklist_.Add(value);
6361 }
6362 }
6363 }
6364 }
6365
6366 // Determine if the given definition can't be aliased.
6367 void ComputeAliasing(Definition* alloc) {
6368 ASSERT(Place::IsAllocation(alloc));
6369 ASSERT(alloc->Identity().IsUnknown());
6370 ASSERT(aliasing_worklist_.is_empty());
6371
6372 alloc->SetIdentity(AliasIdentity::NotAliased());
6373 aliasing_worklist_.Add(alloc);
6374
6375 while (!aliasing_worklist_.is_empty()) {
6376 Definition* defn = aliasing_worklist_.RemoveLast();
6377 ASSERT(Place::IsAllocation(defn));
6378 // If the definition in the worklist was optimistically marked as
6379 // not-aliased check that optimistic assumption still holds: check if
6380 // any of its uses can create an alias.
6381 if (!defn->Identity().IsAliased() && AnyUseCreatesAlias(defn)) {
6382 defn->SetIdentity(AliasIdentity::Aliased());
6383 identity_rollback_.Add(defn);
6384 }
6385
6386 // If the allocation site is marked as aliased conservatively mark
6387 // any values stored into the object aliased too.
6388 if (defn->Identity().IsAliased()) {
6389 MarkStoredValuesEscaping(defn);
6390 }
6391 }
6392 }
6393
6394 Zone* zone_;
6395
6396 DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map_;
6397
6398 const ZoneGrowableArray<Place*>& places_;
6399
6400 const PhiPlaceMoves* phi_moves_;
6401
6402 // A list of all seen aliases and a map that allows looking up canonical
6403 // alias object.
6404 GrowableArray<const Place*> aliases_;
6405 DirectChainedHashMap<PointerKeyValueTrait<const Place> > aliases_map_;
6406
6407 SmallSet<Place::ElementSize> typed_data_access_sizes_;
6408
6409 // Maps alias id to set of ids of places representing the alias.
6410 // Place represents an alias if this alias is least generic alias for
6411 // the place.
6412 // (see ToAlias for the definition of least generic alias).
6413 GrowableArray<BitVector*> representatives_;
6414
6415 // Maps alias id to set of ids of places aliased.
6416 GrowableArray<BitVector*> killed_;
6417
6418 // Set of ids of places that can be affected by side-effects other than
6419 // explicit stores (i.e. through calls).
6420 BitVector* aliased_by_effects_;
6421
6422 // Worklist used during alias analysis.
6423 GrowableArray<Definition*> aliasing_worklist_;
6424
6425 // List of definitions that had their identity set to Aliased. At the end
6426 // of load optimization their identity will be rolled back to Unknown to
6427 // avoid treating them as Aliased at later stages without checking first
6428 // as optimizations can potentially eliminate instructions leading to
6429 // aliasing.
6430 GrowableArray<Definition*> identity_rollback_;
6431 };
6432
6433
6434 static Definition* GetStoredValue(Instruction* instr) {
6435 if (instr->IsStoreIndexed()) {
6436 return instr->AsStoreIndexed()->value()->definition();
6437 }
6438
6439 StoreInstanceFieldInstr* store_instance_field = instr->AsStoreInstanceField();
6440 if (store_instance_field != NULL) {
6441 return store_instance_field->value()->definition();
6442 }
6443
6444 StoreStaticFieldInstr* store_static_field = instr->AsStoreStaticField();
6445 if (store_static_field != NULL) {
6446 return store_static_field->value()->definition();
6447 }
6448
6449 UNREACHABLE(); // Should only be called for supported store instructions.
6450 return NULL;
6451 }
6452
6453
6454 static bool IsPhiDependentPlace(Place* place) {
6455 return ((place->kind() == Place::kField) ||
6456 (place->kind() == Place::kVMField)) &&
6457 (place->instance() != NULL) &&
6458 place->instance()->IsPhi();
6459 }
6460
6461
6462 // For each place that depends on a phi ensure that equivalent places
6463 // corresponding to phi input are numbered and record outgoing phi moves
6464 // for each block which establish correspondence between phi dependent place
6465 // and phi input's place that is flowing in.
6466 static PhiPlaceMoves* ComputePhiMoves(
6467 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map,
6468 ZoneGrowableArray<Place*>* places) {
6469 Thread* thread = Thread::Current();
6470 Zone* zone = thread->zone();
6471 PhiPlaceMoves* phi_moves = new(zone) PhiPlaceMoves();
6472
6473 for (intptr_t i = 0; i < places->length(); i++) {
6474 Place* place = (*places)[i];
6475
6476 if (IsPhiDependentPlace(place)) {
6477 PhiInstr* phi = place->instance()->AsPhi();
6478 BlockEntryInstr* block = phi->GetBlock();
6479
6480 if (FLAG_trace_optimization) {
6481 THR_Print("phi dependent place %s\n", place->ToCString());
6482 }
6483
6484 Place input_place(*place);
6485 for (intptr_t j = 0; j < phi->InputCount(); j++) {
6486 input_place.set_instance(phi->InputAt(j)->definition());
6487
6488 Place* result = map->Lookup(&input_place);
6489 if (result == NULL) {
6490 result = Place::Wrap(zone, input_place, places->length());
6491 map->Insert(result);
6492 places->Add(result);
6493 if (FLAG_trace_optimization) {
6494 THR_Print(" adding place %s as %" Pd "\n",
6495 result->ToCString(),
6496 result->id());
6497 }
6498 }
6499 phi_moves->CreateOutgoingMove(zone,
6500 block->PredecessorAt(j),
6501 result->id(),
6502 place->id());
6503 }
6504 }
6505 }
6506
6507 return phi_moves;
6508 }
6509
6510
6511 enum CSEMode {
6512 kOptimizeLoads,
6513 kOptimizeStores
6514 };
6515
6516
6517 static AliasedSet* NumberPlaces(
6518 FlowGraph* graph,
6519 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map,
6520 CSEMode mode) {
6521 // Loads representing different expression ids will be collected and
6522 // used to build per offset kill sets.
6523 Zone* zone = graph->zone();
6524 ZoneGrowableArray<Place*>* places =
6525 new(zone) ZoneGrowableArray<Place*>(10);
6526
6527 bool has_loads = false;
6528 bool has_stores = false;
6529 for (BlockIterator it = graph->reverse_postorder_iterator();
6530 !it.Done();
6531 it.Advance()) {
6532 BlockEntryInstr* block = it.Current();
6533
6534 for (ForwardInstructionIterator instr_it(block);
6535 !instr_it.Done();
6536 instr_it.Advance()) {
6537 Instruction* instr = instr_it.Current();
6538 Place place(instr, &has_loads, &has_stores);
6539 if (place.kind() == Place::kNone) {
6540 continue;
6541 }
6542
6543 Place* result = map->Lookup(&place);
6544 if (result == NULL) {
6545 result = Place::Wrap(zone, place, places->length());
6546 map->Insert(result);
6547 places->Add(result);
6548
6549 if (FLAG_trace_optimization) {
6550 THR_Print("numbering %s as %" Pd "\n",
6551 result->ToCString(),
6552 result->id());
6553 }
6554 }
6555
6556 instr->set_place_id(result->id());
6557 }
6558 }
6559
6560 if ((mode == kOptimizeLoads) && !has_loads) {
6561 return NULL;
6562 }
6563 if ((mode == kOptimizeStores) && !has_stores) {
6564 return NULL;
6565 }
6566
6567 PhiPlaceMoves* phi_moves = ComputePhiMoves(map, places);
6568
6569 // Build aliasing sets mapping aliases to loads.
6570 return new(zone) AliasedSet(zone, map, places, phi_moves);
6571 }
6572
6573
6574 class LoadOptimizer : public ValueObject {
6575 public:
6576 LoadOptimizer(FlowGraph* graph, AliasedSet* aliased_set)
6577 : graph_(graph),
6578 aliased_set_(aliased_set),
6579 in_(graph_->preorder().length()),
6580 out_(graph_->preorder().length()),
6581 gen_(graph_->preorder().length()),
6582 kill_(graph_->preorder().length()),
6583 exposed_values_(graph_->preorder().length()),
6584 out_values_(graph_->preorder().length()),
6585 phis_(5),
6586 worklist_(5),
6587 congruency_worklist_(6),
6588 in_worklist_(NULL),
6589 forwarded_(false) {
6590 const intptr_t num_blocks = graph_->preorder().length();
6591 for (intptr_t i = 0; i < num_blocks; i++) {
6592 out_.Add(NULL);
6593 gen_.Add(new(Z) BitVector(Z, aliased_set_->max_place_id()));
6594 kill_.Add(new(Z) BitVector(Z, aliased_set_->max_place_id()));
6595 in_.Add(new(Z) BitVector(Z, aliased_set_->max_place_id()));
6596
6597 exposed_values_.Add(NULL);
6598 out_values_.Add(NULL);
6599 }
6600 }
6601
6602 ~LoadOptimizer() {
6603 aliased_set_->RollbackAliasedIdentites();
6604 }
6605
6606 Isolate* isolate() const { return graph_->isolate(); }
6607 Zone* zone() const { return graph_->zone(); }
6608
6609 static bool OptimizeGraph(FlowGraph* graph) {
6610 ASSERT(FLAG_load_cse);
6611 if (FLAG_trace_load_optimization) {
6612 FlowGraphPrinter::PrintGraph("Before LoadOptimizer", graph);
6613 }
6614
6615 DirectChainedHashMap<PointerKeyValueTrait<Place> > map;
6616 AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeLoads);
6617 if ((aliased_set != NULL) && !aliased_set->IsEmpty()) {
6618 // If any loads were forwarded return true from Optimize to run load
6619 // forwarding again. This will allow to forward chains of loads.
6620 // This is especially important for context variables as they are built
6621 // as loads from loaded context.
6622 // TODO(vegorov): renumber newly discovered congruences during the
6623 // forwarding to forward chains without running whole pass twice.
6624 LoadOptimizer load_optimizer(graph, aliased_set);
6625 return load_optimizer.Optimize();
6626 }
6627 return false;
6628 }
6629
6630 private:
6631 bool Optimize() {
6632 ComputeInitialSets();
6633 ComputeOutSets();
6634 ComputeOutValues();
6635 if (graph_->is_licm_allowed()) {
6636 MarkLoopInvariantLoads();
6637 }
6638 ForwardLoads();
6639 EmitPhis();
6640
6641 if (FLAG_trace_load_optimization) {
6642 FlowGraphPrinter::PrintGraph("After LoadOptimizer", graph_);
6643 }
6644
6645 return forwarded_;
6646 }
6647
6648 // Compute sets of loads generated and killed by each block.
6649 // Additionally compute upwards exposed and generated loads for each block.
6650 // Exposed loads are those that can be replaced if a corresponding
6651 // reaching load will be found.
6652 // Loads that are locally redundant will be replaced as we go through
6653 // instructions.
6654 void ComputeInitialSets() {
6655 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
6656 !block_it.Done();
6657 block_it.Advance()) {
6658 BlockEntryInstr* block = block_it.Current();
6659 const intptr_t preorder_number = block->preorder_number();
6660
6661 BitVector* kill = kill_[preorder_number];
6662 BitVector* gen = gen_[preorder_number];
6663
6664 ZoneGrowableArray<Definition*>* exposed_values = NULL;
6665 ZoneGrowableArray<Definition*>* out_values = NULL;
6666
6667 for (ForwardInstructionIterator instr_it(block);
6668 !instr_it.Done();
6669 instr_it.Advance()) {
6670 Instruction* instr = instr_it.Current();
6671
6672 bool is_load = false, is_store = false;
6673 Place place(instr, &is_load, &is_store);
6674
6675 BitVector* killed = NULL;
6676 if (is_store) {
6677 const intptr_t alias_id =
6678 aliased_set_->LookupAliasId(place.ToAlias());
6679 if (alias_id != AliasedSet::kNoAlias) {
6680 killed = aliased_set_->GetKilledSet(alias_id);
6681 } else if (!place.IsImmutableField()) {
6682 // We encountered unknown alias: this means intrablock load
6683 // forwarding refined parameter of this store, for example
6684 //
6685 // o <- alloc()
6686 // a.f <- o
6687 // u <- a.f
6688 // u.x <- null ;; this store alias is *.x
6689 //
6690 // after intrablock load forwarding
6691 //
6692 // o <- alloc()
6693 // a.f <- o
6694 // o.x <- null ;; this store alias is o.x
6695 //
6696 // In this case we fallback to using place id recorded in the
6697 // instruction that still points to the old place with a more
6698 // generic alias.
6699 const intptr_t old_alias_id = aliased_set_->LookupAliasId(
6700 aliased_set_->places()[instr->place_id()]->ToAlias());
6701 killed = aliased_set_->GetKilledSet(old_alias_id);
6702 }
6703
6704 if (killed != NULL) {
6705 kill->AddAll(killed);
6706 // There is no need to clear out_values when clearing GEN set
6707 // because only those values that are in the GEN set
6708 // will ever be used.
6709 gen->RemoveAll(killed);
6710 }
6711
6712 // Only forward stores to normal arrays, float64, and simd arrays
6713 // to loads because other array stores (intXX/uintXX/float32)
6714 // may implicitly convert the value stored.
6715 StoreIndexedInstr* array_store = instr->AsStoreIndexed();
6716 if ((array_store == NULL) ||
6717 (array_store->class_id() == kArrayCid) ||
6718 (array_store->class_id() == kTypedDataFloat64ArrayCid) ||
6719 (array_store->class_id() == kTypedDataFloat32ArrayCid) ||
6720 (array_store->class_id() == kTypedDataFloat32x4ArrayCid)) {
6721 Place* canonical_place = aliased_set_->LookupCanonical(&place);
6722 if (canonical_place != NULL) {
6723 // Store has a corresponding numbered place that might have a
6724 // load. Try forwarding stored value to it.
6725 gen->Add(canonical_place->id());
6726 if (out_values == NULL) out_values = CreateBlockOutValues();
6727 (*out_values)[canonical_place->id()] = GetStoredValue(instr);
6728 }
6729 }
6730
6731 ASSERT(!instr->IsDefinition() ||
6732 !IsLoadEliminationCandidate(instr->AsDefinition()));
6733 continue;
6734 } else if (is_load) {
6735 // Check if this load needs renumbering because of the intrablock
6736 // load forwarding.
6737 const Place* canonical = aliased_set_->LookupCanonical(&place);
6738 if ((canonical != NULL) &&
6739 (canonical->id() != instr->AsDefinition()->place_id())) {
6740 instr->AsDefinition()->set_place_id(canonical->id());
6741 }
6742 }
6743
6744 // If instruction has effects then kill all loads affected.
6745 if (!instr->Effects().IsNone()) {
6746 kill->AddAll(aliased_set_->aliased_by_effects());
6747 // There is no need to clear out_values when removing values from GEN
6748 // set because only those values that are in the GEN set
6749 // will ever be used.
6750 gen->RemoveAll(aliased_set_->aliased_by_effects());
6751 continue;
6752 }
6753
6754 Definition* defn = instr->AsDefinition();
6755 if (defn == NULL) {
6756 continue;
6757 }
6758
6759 // For object allocation forward initial values of the fields to
6760 // subsequent loads. For skip final fields. Final fields are
6761 // initialized in constructor that potentially can be not inlined into
6762 // the function that we are currently optimizing. However at the same
6763 // time we assume that values of the final fields can be forwarded
6764 // across side-effects. If we add 'null' as known values for these
6765 // fields here we will incorrectly propagate this null across
6766 // constructor invocation.
6767 AllocateObjectInstr* alloc = instr->AsAllocateObject();
6768 if ((alloc != NULL)) {
6769 for (Value* use = alloc->input_use_list();
6770 use != NULL;
6771 use = use->next_use()) {
6772 // Look for all immediate loads from this object.
6773 if (use->use_index() != 0) {
6774 continue;
6775 }
6776
6777 LoadFieldInstr* load = use->instruction()->AsLoadField();
6778 if (load != NULL) {
6779 // Found a load. Initialize current value of the field to null for
6780 // normal fields, or with type arguments.
6781
6782 // Forward for all fields for non-escaping objects and only
6783 // non-final fields and type arguments for escaping ones.
6784 if (aliased_set_->CanBeAliased(alloc) &&
6785 (load->field() != NULL) &&
6786 load->field()->is_final()) {
6787 continue;
6788 }
6789
6790 Definition* forward_def = graph_->constant_null();
6791 if (alloc->ArgumentCount() > 0) {
6792 ASSERT(alloc->ArgumentCount() == 1);
6793 intptr_t type_args_offset =
6794 alloc->cls().type_arguments_field_offset();
6795 if (load->offset_in_bytes() == type_args_offset) {
6796 forward_def = alloc->PushArgumentAt(0)->value()->definition();
6797 }
6798 }
6799 gen->Add(load->place_id());
6800 if (out_values == NULL) out_values = CreateBlockOutValues();
6801 (*out_values)[load->place_id()] = forward_def;
6802 }
6803 }
6804 continue;
6805 }
6806
6807 if (!IsLoadEliminationCandidate(defn)) {
6808 continue;
6809 }
6810
6811 const intptr_t place_id = defn->place_id();
6812 if (gen->Contains(place_id)) {
6813 // This is a locally redundant load.
6814 ASSERT((out_values != NULL) && ((*out_values)[place_id] != NULL));
6815
6816 Definition* replacement = (*out_values)[place_id];
6817 EnsureSSATempIndex(graph_, defn, replacement);
6818 if (FLAG_trace_optimization) {
6819 THR_Print("Replacing load v%" Pd " with v%" Pd "\n",
6820 defn->ssa_temp_index(),
6821 replacement->ssa_temp_index());
6822 }
6823
6824 defn->ReplaceUsesWith(replacement);
6825 instr_it.RemoveCurrentFromGraph();
6826 forwarded_ = true;
6827 continue;
6828 } else if (!kill->Contains(place_id)) {
6829 // This is an exposed load: it is the first representative of a
6830 // given expression id and it is not killed on the path from
6831 // the block entry.
6832 if (exposed_values == NULL) {
6833 static const intptr_t kMaxExposedValuesInitialSize = 5;
6834 exposed_values = new(Z) ZoneGrowableArray<Definition*>(
6835 Utils::Minimum(kMaxExposedValuesInitialSize,
6836 aliased_set_->max_place_id()));
6837 }
6838
6839 exposed_values->Add(defn);
6840 }
6841
6842 gen->Add(place_id);
6843
6844 if (out_values == NULL) out_values = CreateBlockOutValues();
6845 (*out_values)[place_id] = defn;
6846 }
6847
6848 exposed_values_[preorder_number] = exposed_values;
6849 out_values_[preorder_number] = out_values;
6850 }
6851 }
6852
6853 static void PerformPhiMoves(PhiPlaceMoves::MovesList phi_moves,
6854 BitVector* out,
6855 BitVector* forwarded_loads) {
6856 forwarded_loads->Clear();
6857
6858 for (intptr_t i = 0; i < phi_moves->length(); i++) {
6859 const intptr_t from = (*phi_moves)[i].from();
6860 const intptr_t to = (*phi_moves)[i].to();
6861 if (from == to) continue;
6862
6863 if (out->Contains(from)) {
6864 forwarded_loads->Add(to);
6865 }
6866 }
6867
6868 for (intptr_t i = 0; i < phi_moves->length(); i++) {
6869 const intptr_t from = (*phi_moves)[i].from();
6870 const intptr_t to = (*phi_moves)[i].to();
6871 if (from == to) continue;
6872
6873 out->Remove(to);
6874 }
6875
6876 out->AddAll(forwarded_loads);
6877 }
6878
6879 // Compute OUT sets by propagating them iteratively until fix point
6880 // is reached.
6881 void ComputeOutSets() {
6882 BitVector* temp = new(Z) BitVector(Z, aliased_set_->max_place_id());
6883 BitVector* forwarded_loads =
6884 new(Z) BitVector(Z, aliased_set_->max_place_id());
6885 BitVector* temp_out = new(Z) BitVector(Z, aliased_set_->max_place_id());
6886
6887 bool changed = true;
6888 while (changed) {
6889 changed = false;
6890
6891 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
6892 !block_it.Done();
6893 block_it.Advance()) {
6894 BlockEntryInstr* block = block_it.Current();
6895
6896 const intptr_t preorder_number = block->preorder_number();
6897
6898 BitVector* block_in = in_[preorder_number];
6899 BitVector* block_out = out_[preorder_number];
6900 BitVector* block_kill = kill_[preorder_number];
6901 BitVector* block_gen = gen_[preorder_number];
6902
6903 // Compute block_in as the intersection of all out(p) where p
6904 // is a predecessor of the current block.
6905 if (block->IsGraphEntry()) {
6906 temp->Clear();
6907 } else {
6908 temp->SetAll();
6909 ASSERT(block->PredecessorCount() > 0);
6910 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
6911 BlockEntryInstr* pred = block->PredecessorAt(i);
6912 BitVector* pred_out = out_[pred->preorder_number()];
6913 if (pred_out == NULL) continue;
6914 PhiPlaceMoves::MovesList phi_moves =
6915 aliased_set_->phi_moves()->GetOutgoingMoves(pred);
6916 if (phi_moves != NULL) {
6917 // If there are phi moves, perform intersection with
6918 // a copy of pred_out where the phi moves are applied.
6919 temp_out->CopyFrom(pred_out);
6920 PerformPhiMoves(phi_moves, temp_out, forwarded_loads);
6921 pred_out = temp_out;
6922 }
6923 temp->Intersect(pred_out);
6924 }
6925 }
6926
6927 if (!temp->Equals(*block_in) || (block_out == NULL)) {
6928 // If IN set has changed propagate the change to OUT set.
6929 block_in->CopyFrom(temp);
6930
6931 temp->RemoveAll(block_kill);
6932 temp->AddAll(block_gen);
6933
6934 if ((block_out == NULL) || !block_out->Equals(*temp)) {
6935 if (block_out == NULL) {
6936 block_out = out_[preorder_number] =
6937 new(Z) BitVector(Z, aliased_set_->max_place_id());
6938 }
6939 block_out->CopyFrom(temp);
6940 changed = true;
6941 }
6942 }
6943 }
6944 }
6945 }
6946
6947 // Compute out_values mappings by propagating them in reverse postorder once
6948 // through the graph. Generate phis on back edges where eager merge is
6949 // impossible.
6950 // No replacement is done at this point and thus any out_value[place_id] is
6951 // changed at most once: from NULL to an actual value.
6952 // When merging incoming loads we might need to create a phi.
6953 // These phis are not inserted at the graph immediately because some of them
6954 // might become redundant after load forwarding is done.
6955 void ComputeOutValues() {
6956 GrowableArray<PhiInstr*> pending_phis(5);
6957 ZoneGrowableArray<Definition*>* temp_forwarded_values = NULL;
6958
6959 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
6960 !block_it.Done();
6961 block_it.Advance()) {
6962 BlockEntryInstr* block = block_it.Current();
6963
6964 const bool can_merge_eagerly = CanMergeEagerly(block);
6965
6966 const intptr_t preorder_number = block->preorder_number();
6967
6968 ZoneGrowableArray<Definition*>* block_out_values =
6969 out_values_[preorder_number];
6970
6971
6972 // If OUT set has changed then we have new values available out of
6973 // the block. Compute these values creating phi where necessary.
6974 for (BitVector::Iterator it(out_[preorder_number]);
6975 !it.Done();
6976 it.Advance()) {
6977 const intptr_t place_id = it.Current();
6978
6979 if (block_out_values == NULL) {
6980 out_values_[preorder_number] = block_out_values =
6981 CreateBlockOutValues();
6982 }
6983
6984 if ((*block_out_values)[place_id] == NULL) {
6985 ASSERT(block->PredecessorCount() > 0);
6986 Definition* in_value = can_merge_eagerly ?
6987 MergeIncomingValues(block, place_id) : NULL;
6988 if ((in_value == NULL) &&
6989 (in_[preorder_number]->Contains(place_id))) {
6990 PhiInstr* phi = new(Z) PhiInstr(block->AsJoinEntry(),
6991 block->PredecessorCount());
6992 phi->set_place_id(place_id);
6993 pending_phis.Add(phi);
6994 in_value = phi;
6995 }
6996 (*block_out_values)[place_id] = in_value;
6997 }
6998 }
6999
7000 // If the block has outgoing phi moves perform them. Use temporary list
7001 // of values to ensure that cyclic moves are performed correctly.
7002 PhiPlaceMoves::MovesList phi_moves =
7003 aliased_set_->phi_moves()->GetOutgoingMoves(block);
7004 if ((phi_moves != NULL) && (block_out_values != NULL)) {
7005 if (temp_forwarded_values == NULL) {
7006 temp_forwarded_values = CreateBlockOutValues();
7007 }
7008
7009 for (intptr_t i = 0; i < phi_moves->length(); i++) {
7010 const intptr_t from = (*phi_moves)[i].from();
7011 const intptr_t to = (*phi_moves)[i].to();
7012 if (from == to) continue;
7013
7014 (*temp_forwarded_values)[to] = (*block_out_values)[from];
7015 }
7016
7017 for (intptr_t i = 0; i < phi_moves->length(); i++) {
7018 const intptr_t from = (*phi_moves)[i].from();
7019 const intptr_t to = (*phi_moves)[i].to();
7020 if (from == to) continue;
7021
7022 (*block_out_values)[to] = (*temp_forwarded_values)[to];
7023 }
7024 }
7025
7026 if (FLAG_trace_load_optimization) {
7027 THR_Print("B%" Pd "\n", block->block_id());
7028 THR_Print(" IN: ");
7029 aliased_set_->PrintSet(in_[preorder_number]);
7030 THR_Print("\n");
7031
7032 THR_Print(" KILL: ");
7033 aliased_set_->PrintSet(kill_[preorder_number]);
7034 THR_Print("\n");
7035
7036 THR_Print(" OUT: ");
7037 aliased_set_->PrintSet(out_[preorder_number]);
7038 THR_Print("\n");
7039 }
7040 }
7041
7042 // All blocks were visited. Fill pending phis with inputs
7043 // that flow on back edges.
7044 for (intptr_t i = 0; i < pending_phis.length(); i++) {
7045 FillPhiInputs(pending_phis[i]);
7046 }
7047 }
7048
7049 bool CanMergeEagerly(BlockEntryInstr* block) {
7050 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
7051 BlockEntryInstr* pred = block->PredecessorAt(i);
7052 if (pred->postorder_number() < block->postorder_number()) {
7053 return false;
7054 }
7055 }
7056 return true;
7057 }
7058
7059 void MarkLoopInvariantLoads() {
7060 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
7061 graph_->LoopHeaders();
7062
7063 ZoneGrowableArray<BitVector*>* invariant_loads =
7064 new(Z) ZoneGrowableArray<BitVector*>(loop_headers.length());
7065
7066 for (intptr_t i = 0; i < loop_headers.length(); i++) {
7067 BlockEntryInstr* header = loop_headers[i];
7068 BlockEntryInstr* pre_header = header->ImmediateDominator();
7069 if (pre_header == NULL) {
7070 invariant_loads->Add(NULL);
7071 continue;
7072 }
7073
7074 BitVector* loop_gen = new(Z) BitVector(Z, aliased_set_->max_place_id());
7075 for (BitVector::Iterator loop_it(header->loop_info());
7076 !loop_it.Done();
7077 loop_it.Advance()) {
7078 const intptr_t preorder_number = loop_it.Current();
7079 loop_gen->AddAll(gen_[preorder_number]);
7080 }
7081
7082 for (BitVector::Iterator loop_it(header->loop_info());
7083 !loop_it.Done();
7084 loop_it.Advance()) {
7085 const intptr_t preorder_number = loop_it.Current();
7086 loop_gen->RemoveAll(kill_[preorder_number]);
7087 }
7088
7089 if (FLAG_trace_optimization) {
7090 for (BitVector::Iterator it(loop_gen); !it.Done(); it.Advance()) {
7091 THR_Print("place %s is loop invariant for B%" Pd "\n",
7092 aliased_set_->places()[it.Current()]->ToCString(),
7093 header->block_id());
7094 }
7095 }
7096
7097 invariant_loads->Add(loop_gen);
7098 }
7099
7100 graph_->set_loop_invariant_loads(invariant_loads);
7101 }
7102
7103 // Compute incoming value for the given expression id.
7104 // Will create a phi if different values are incoming from multiple
7105 // predecessors.
7106 Definition* MergeIncomingValues(BlockEntryInstr* block, intptr_t place_id) {
7107 // First check if the same value is coming in from all predecessors.
7108 static Definition* const kDifferentValuesMarker =
7109 reinterpret_cast<Definition*>(-1);
7110 Definition* incoming = NULL;
7111 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
7112 BlockEntryInstr* pred = block->PredecessorAt(i);
7113 ZoneGrowableArray<Definition*>* pred_out_values =
7114 out_values_[pred->preorder_number()];
7115 if ((pred_out_values == NULL) || ((*pred_out_values)[place_id] == NULL)) {
7116 return NULL;
7117 } else if (incoming == NULL) {
7118 incoming = (*pred_out_values)[place_id];
7119 } else if (incoming != (*pred_out_values)[place_id]) {
7120 incoming = kDifferentValuesMarker;
7121 }
7122 }
7123
7124 if (incoming != kDifferentValuesMarker) {
7125 ASSERT(incoming != NULL);
7126 return incoming;
7127 }
7128
7129 // Incoming values are different. Phi is required to merge.
7130 PhiInstr* phi = new(Z) PhiInstr(
7131 block->AsJoinEntry(), block->PredecessorCount());
7132 phi->set_place_id(place_id);
7133 FillPhiInputs(phi);
7134 return phi;
7135 }
7136
7137 void FillPhiInputs(PhiInstr* phi) {
7138 BlockEntryInstr* block = phi->GetBlock();
7139 const intptr_t place_id = phi->place_id();
7140
7141 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
7142 BlockEntryInstr* pred = block->PredecessorAt(i);
7143 ZoneGrowableArray<Definition*>* pred_out_values =
7144 out_values_[pred->preorder_number()];
7145 ASSERT((*pred_out_values)[place_id] != NULL);
7146
7147 // Sets of outgoing values are not linked into use lists so
7148 // they might contain values that were replaced and removed
7149 // from the graph by this iteration.
7150 // To prevent using them we additionally mark definitions themselves
7151 // as replaced and store a pointer to the replacement.
7152 Definition* replacement = (*pred_out_values)[place_id]->Replacement();
7153 Value* input = new(Z) Value(replacement);
7154 phi->SetInputAt(i, input);
7155 replacement->AddInputUse(input);
7156 }
7157
7158 graph_->AllocateSSAIndexes(phi);
7159 phis_.Add(phi); // Postpone phi insertion until after load forwarding.
7160
7161 if (FLAG_trace_load_optimization) {
7162 THR_Print("created pending phi %s for %s at B%" Pd "\n",
7163 phi->ToCString(),
7164 aliased_set_->places()[place_id]->ToCString(),
7165 block->block_id());
7166 }
7167 }
7168
7169 // Iterate over basic blocks and replace exposed loads with incoming
7170 // values.
7171 void ForwardLoads() {
7172 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
7173 !block_it.Done();
7174 block_it.Advance()) {
7175 BlockEntryInstr* block = block_it.Current();
7176
7177 ZoneGrowableArray<Definition*>* loads =
7178 exposed_values_[block->preorder_number()];
7179 if (loads == NULL) continue; // No exposed loads.
7180
7181 BitVector* in = in_[block->preorder_number()];
7182
7183 for (intptr_t i = 0; i < loads->length(); i++) {
7184 Definition* load = (*loads)[i];
7185 if (!in->Contains(load->place_id())) continue; // No incoming value.
7186
7187 Definition* replacement = MergeIncomingValues(block, load->place_id());
7188 ASSERT(replacement != NULL);
7189
7190 // Sets of outgoing values are not linked into use lists so
7191 // they might contain values that were replace and removed
7192 // from the graph by this iteration.
7193 // To prevent using them we additionally mark definitions themselves
7194 // as replaced and store a pointer to the replacement.
7195 replacement = replacement->Replacement();
7196
7197 if (load != replacement) {
7198 EnsureSSATempIndex(graph_, load, replacement);
7199
7200 if (FLAG_trace_optimization) {
7201 THR_Print("Replacing load v%" Pd " with v%" Pd "\n",
7202 load->ssa_temp_index(),
7203 replacement->ssa_temp_index());
7204 }
7205
7206 load->ReplaceUsesWith(replacement);
7207 load->RemoveFromGraph();
7208 load->SetReplacement(replacement);
7209 forwarded_ = true;
7210 }
7211 }
7212 }
7213 }
7214
7215 // Check if the given phi take the same value on all code paths.
7216 // Eliminate it as redundant if this is the case.
7217 // When analyzing phi operands assumes that only generated during
7218 // this load phase can be redundant. They can be distinguished because
7219 // they are not marked alive.
7220 // TODO(vegorov): move this into a separate phase over all phis.
7221 bool EliminateRedundantPhi(PhiInstr* phi) {
7222 Definition* value = NULL; // Possible value of this phi.
7223
7224 worklist_.Clear();
7225 if (in_worklist_ == NULL) {
7226 in_worklist_ = new(Z) BitVector(Z, graph_->current_ssa_temp_index());
7227 } else {
7228 in_worklist_->Clear();
7229 }
7230
7231 worklist_.Add(phi);
7232 in_worklist_->Add(phi->ssa_temp_index());
7233
7234 for (intptr_t i = 0; i < worklist_.length(); i++) {
7235 PhiInstr* phi = worklist_[i];
7236
7237 for (intptr_t i = 0; i < phi->InputCount(); i++) {
7238 Definition* input = phi->InputAt(i)->definition();
7239 if (input == phi) continue;
7240
7241 PhiInstr* phi_input = input->AsPhi();
7242 if ((phi_input != NULL) && !phi_input->is_alive()) {
7243 if (!in_worklist_->Contains(phi_input->ssa_temp_index())) {
7244 worklist_.Add(phi_input);
7245 in_worklist_->Add(phi_input->ssa_temp_index());
7246 }
7247 continue;
7248 }
7249
7250 if (value == NULL) {
7251 value = input;
7252 } else if (value != input) {
7253 return false; // This phi is not redundant.
7254 }
7255 }
7256 }
7257
7258 // All phis in the worklist are redundant and have the same computed
7259 // value on all code paths.
7260 ASSERT(value != NULL);
7261 for (intptr_t i = 0; i < worklist_.length(); i++) {
7262 worklist_[i]->ReplaceUsesWith(value);
7263 }
7264
7265 return true;
7266 }
7267
7268 // Returns true if definitions are congruent assuming their inputs
7269 // are congruent.
7270 bool CanBeCongruent(Definition* a, Definition* b) {
7271 return (a->tag() == b->tag()) &&
7272 ((a->IsPhi() && (a->GetBlock() == b->GetBlock())) ||
7273 (a->AllowsCSE() && a->Dependencies().IsNone() &&
7274 a->AttributesEqual(b)));
7275 }
7276
7277 // Given two definitions check if they are congruent under assumption that
7278 // their inputs will be proven congruent. If they are - add them to the
7279 // worklist to check their inputs' congruency.
7280 // Returns true if pair was added to the worklist or is already in the
7281 // worklist and false if a and b are not congruent.
7282 bool AddPairToCongruencyWorklist(Definition* a, Definition* b) {
7283 if (!CanBeCongruent(a, b)) {
7284 return false;
7285 }
7286
7287 // If a is already in the worklist check if it is being compared to b.
7288 // Give up if it is not.
7289 if (in_worklist_->Contains(a->ssa_temp_index())) {
7290 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
7291 if (a == congruency_worklist_[i]) {
7292 return (b == congruency_worklist_[i + 1]);
7293 }
7294 }
7295 UNREACHABLE();
7296 } else if (in_worklist_->Contains(b->ssa_temp_index())) {
7297 return AddPairToCongruencyWorklist(b, a);
7298 }
7299
7300 congruency_worklist_.Add(a);
7301 congruency_worklist_.Add(b);
7302 in_worklist_->Add(a->ssa_temp_index());
7303 return true;
7304 }
7305
7306 bool AreInputsCongruent(Definition* a, Definition* b) {
7307 ASSERT(a->tag() == b->tag());
7308 ASSERT(a->InputCount() == b->InputCount());
7309 for (intptr_t j = 0; j < a->InputCount(); j++) {
7310 Definition* inputA = a->InputAt(j)->definition();
7311 Definition* inputB = b->InputAt(j)->definition();
7312
7313 if (inputA != inputB) {
7314 if (!AddPairToCongruencyWorklist(inputA, inputB)) {
7315 return false;
7316 }
7317 }
7318 }
7319 return true;
7320 }
7321
7322 // Returns true if instruction dom dominates instruction other.
7323 static bool Dominates(Instruction* dom, Instruction* other) {
7324 BlockEntryInstr* dom_block = dom->GetBlock();
7325 BlockEntryInstr* other_block = other->GetBlock();
7326
7327 if (dom_block == other_block) {
7328 for (Instruction* current = dom->next();
7329 current != NULL;
7330 current = current->next()) {
7331 if (current == other) {
7332 return true;
7333 }
7334 }
7335 return false;
7336 }
7337
7338 return dom_block->Dominates(other_block);
7339 }
7340
7341 // Replace the given phi with another if they are congruent.
7342 // Returns true if succeeds.
7343 bool ReplacePhiWith(PhiInstr* phi, PhiInstr* replacement) {
7344 ASSERT(phi->InputCount() == replacement->InputCount());
7345 ASSERT(phi->block() == replacement->block());
7346
7347 congruency_worklist_.Clear();
7348 if (in_worklist_ == NULL) {
7349 in_worklist_ = new(Z) BitVector(Z, graph_->current_ssa_temp_index());
7350 } else {
7351 in_worklist_->Clear();
7352 }
7353
7354 // During the comparison worklist contains pairs of definitions to be
7355 // compared.
7356 if (!AddPairToCongruencyWorklist(phi, replacement)) {
7357 return false;
7358 }
7359
7360 // Process the worklist. It might grow during each comparison step.
7361 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
7362 if (!AreInputsCongruent(congruency_worklist_[i],
7363 congruency_worklist_[i + 1])) {
7364 return false;
7365 }
7366 }
7367
7368 // At this point worklist contains pairs of congruent definitions.
7369 // Replace the one member of the pair with another maintaining proper
7370 // domination relation between definitions and uses.
7371 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
7372 Definition* a = congruency_worklist_[i];
7373 Definition* b = congruency_worklist_[i + 1];
7374
7375 // If these definitions are not phis then we need to pick up one
7376 // that dominates another as the replacement: if a dominates b swap them.
7377 // Note: both a and b are used as a phi input at the same block B which
7378 // means a dominates B and b dominates B, which guarantees that either
7379 // a dominates b or b dominates a.
7380 if (!a->IsPhi()) {
7381 if (Dominates(a, b)) {
7382 Definition* t = a;
7383 a = b;
7384 b = t;
7385 }
7386 ASSERT(Dominates(b, a));
7387 }
7388
7389 if (FLAG_trace_load_optimization) {
7390 THR_Print("Replacing %s with congruent %s\n",
7391 a->ToCString(),
7392 b->ToCString());
7393 }
7394
7395 a->ReplaceUsesWith(b);
7396 if (a->IsPhi()) {
7397 // We might be replacing a phi introduced by the load forwarding
7398 // that is not inserted in the graph yet.
7399 ASSERT(b->IsPhi());
7400 PhiInstr* phi_a = a->AsPhi();
7401 if (phi_a->is_alive()) {
7402 phi_a->mark_dead();
7403 phi_a->block()->RemovePhi(phi_a);
7404 phi_a->UnuseAllInputs();
7405 }
7406 } else {
7407 a->RemoveFromGraph();
7408 }
7409 }
7410
7411 return true;
7412 }
7413
7414 // Insert the given phi into the graph. Attempt to find an equal one in the
7415 // target block first.
7416 // Returns true if the phi was inserted and false if it was replaced.
7417 bool EmitPhi(PhiInstr* phi) {
7418 for (PhiIterator it(phi->block()); !it.Done(); it.Advance()) {
7419 if (ReplacePhiWith(phi, it.Current())) {
7420 return false;
7421 }
7422 }
7423
7424 phi->mark_alive();
7425 phi->block()->InsertPhi(phi);
7426 return true;
7427 }
7428
7429 // Phis have not yet been inserted into the graph but they have uses of
7430 // their inputs. Insert the non-redundant ones and clear the input uses
7431 // of the redundant ones.
7432 void EmitPhis() {
7433 // First eliminate all redundant phis.
7434 for (intptr_t i = 0; i < phis_.length(); i++) {
7435 PhiInstr* phi = phis_[i];
7436 if (!phi->HasUses() || EliminateRedundantPhi(phi)) {
7437 phi->UnuseAllInputs();
7438 phis_[i] = NULL;
7439 }
7440 }
7441
7442 // Now emit phis or replace them with equal phis already present in the
7443 // graph.
7444 for (intptr_t i = 0; i < phis_.length(); i++) {
7445 PhiInstr* phi = phis_[i];
7446 if ((phi != NULL) && (!phi->HasUses() || !EmitPhi(phi))) {
7447 phi->UnuseAllInputs();
7448 }
7449 }
7450 }
7451
7452 ZoneGrowableArray<Definition*>* CreateBlockOutValues() {
7453 ZoneGrowableArray<Definition*>* out =
7454 new(Z) ZoneGrowableArray<Definition*>(aliased_set_->max_place_id());
7455 for (intptr_t i = 0; i < aliased_set_->max_place_id(); i++) {
7456 out->Add(NULL);
7457 }
7458 return out;
7459 }
7460
7461 FlowGraph* graph_;
7462 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_;
7463
7464 // Mapping between field offsets in words and expression ids of loads from
7465 // that offset.
7466 AliasedSet* aliased_set_;
7467
7468 // Per block sets of expression ids for loads that are: incoming (available
7469 // on the entry), outgoing (available on the exit), generated and killed.
7470 GrowableArray<BitVector*> in_;
7471 GrowableArray<BitVector*> out_;
7472 GrowableArray<BitVector*> gen_;
7473 GrowableArray<BitVector*> kill_;
7474
7475 // Per block list of upwards exposed loads.
7476 GrowableArray<ZoneGrowableArray<Definition*>*> exposed_values_;
7477
7478 // Per block mappings between expression ids and outgoing definitions that
7479 // represent those ids.
7480 GrowableArray<ZoneGrowableArray<Definition*>*> out_values_;
7481
7482 // List of phis generated during ComputeOutValues and ForwardLoads.
7483 // Some of these phis might be redundant and thus a separate pass is
7484 // needed to emit only non-redundant ones.
7485 GrowableArray<PhiInstr*> phis_;
7486
7487 // Auxiliary worklist used by redundant phi elimination.
7488 GrowableArray<PhiInstr*> worklist_;
7489 GrowableArray<Definition*> congruency_worklist_;
7490 BitVector* in_worklist_;
7491
7492
7493 // True if any load was eliminated.
7494 bool forwarded_;
7495
7496 DISALLOW_COPY_AND_ASSIGN(LoadOptimizer);
7497 };
7498
7499
7500 class StoreOptimizer : public LivenessAnalysis {
7501 public:
7502 StoreOptimizer(FlowGraph* graph,
7503 AliasedSet* aliased_set,
7504 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map)
7505 : LivenessAnalysis(aliased_set->max_place_id(), graph->postorder()),
7506 graph_(graph),
7507 map_(map),
7508 aliased_set_(aliased_set),
7509 exposed_stores_(graph_->postorder().length()) {
7510 const intptr_t num_blocks = graph_->postorder().length();
7511 for (intptr_t i = 0; i < num_blocks; i++) {
7512 exposed_stores_.Add(NULL);
7513 }
7514 }
7515
7516 static void OptimizeGraph(FlowGraph* graph) {
7517 ASSERT(FLAG_load_cse);
7518 if (FLAG_trace_load_optimization) {
7519 FlowGraphPrinter::PrintGraph("Before StoreOptimizer", graph);
7520 }
7521
7522 DirectChainedHashMap<PointerKeyValueTrait<Place> > map;
7523 AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeStores);
7524 if ((aliased_set != NULL) && !aliased_set->IsEmpty()) {
7525 StoreOptimizer store_optimizer(graph, aliased_set, &map);
7526 store_optimizer.Optimize();
7527 }
7528 }
7529
7530 private:
7531 void Optimize() {
7532 Analyze();
7533 if (FLAG_trace_load_optimization) {
7534 Dump();
7535 }
7536 EliminateDeadStores();
7537 if (FLAG_trace_load_optimization) {
7538 FlowGraphPrinter::PrintGraph("After StoreOptimizer", graph_);
7539 }
7540 }
7541
7542 bool CanEliminateStore(Instruction* instr) {
7543 switch (instr->tag()) {
7544 case Instruction::kStoreInstanceField: {
7545 StoreInstanceFieldInstr* store_instance = instr->AsStoreInstanceField();
7546 // Can't eliminate stores that initialize fields.
7547 return !(store_instance->is_potential_unboxed_initialization() ||
7548 store_instance->is_object_reference_initialization());
7549 }
7550 case Instruction::kStoreIndexed:
7551 case Instruction::kStoreStaticField:
7552 return true;
7553 default:
7554 UNREACHABLE();
7555 return false;
7556 }
7557 }
7558
7559 virtual void ComputeInitialSets() {
7560 Zone* zone = graph_->zone();
7561 BitVector* all_places = new(zone) BitVector(zone,
7562 aliased_set_->max_place_id());
7563 all_places->SetAll();
7564 for (BlockIterator block_it = graph_->postorder_iterator();
7565 !block_it.Done();
7566 block_it.Advance()) {
7567 BlockEntryInstr* block = block_it.Current();
7568 const intptr_t postorder_number = block->postorder_number();
7569
7570 BitVector* kill = kill_[postorder_number];
7571 BitVector* live_in = live_in_[postorder_number];
7572 BitVector* live_out = live_out_[postorder_number];
7573
7574 ZoneGrowableArray<Instruction*>* exposed_stores = NULL;
7575
7576 // Iterate backwards starting at the last instruction.
7577 for (BackwardInstructionIterator instr_it(block);
7578 !instr_it.Done();
7579 instr_it.Advance()) {
7580 Instruction* instr = instr_it.Current();
7581
7582 bool is_load = false;
7583 bool is_store = false;
7584 Place place(instr, &is_load, &is_store);
7585 if (place.IsImmutableField()) {
7586 // Loads/stores of final fields do not participate.
7587 continue;
7588 }
7589
7590 // Handle stores.
7591 if (is_store) {
7592 if (kill->Contains(instr->place_id())) {
7593 if (!live_in->Contains(instr->place_id()) &&
7594 CanEliminateStore(instr)) {
7595 if (FLAG_trace_optimization) {
7596 THR_Print(
7597 "Removing dead store to place %" Pd " in block B%" Pd "\n",
7598 instr->place_id(), block->block_id());
7599 }
7600 instr_it.RemoveCurrentFromGraph();
7601 }
7602 } else if (!live_in->Contains(instr->place_id())) {
7603 // Mark this store as down-ward exposed: They are the only
7604 // candidates for the global store elimination.
7605 if (exposed_stores == NULL) {
7606 const intptr_t kMaxExposedStoresInitialSize = 5;
7607 exposed_stores = new(zone) ZoneGrowableArray<Instruction*>(
7608 Utils::Minimum(kMaxExposedStoresInitialSize,
7609 aliased_set_->max_place_id()));
7610 }
7611 exposed_stores->Add(instr);
7612 }
7613 // Interfering stores kill only loads from the same place.
7614 kill->Add(instr->place_id());
7615 live_in->Remove(instr->place_id());
7616 continue;
7617 }
7618
7619 // Handle side effects, deoptimization and function return.
7620 if (!instr->Effects().IsNone() ||
7621 instr->CanDeoptimize() ||
7622 instr->IsThrow() ||
7623 instr->IsReThrow() ||
7624 instr->IsReturn()) {
7625 // Instructions that return from the function, instructions with side
7626 // effects and instructions that can deoptimize are considered as
7627 // loads from all places.
7628 live_in->CopyFrom(all_places);
7629 if (instr->IsThrow() || instr->IsReThrow() || instr->IsReturn()) {
7630 // Initialize live-out for exit blocks since it won't be computed
7631 // otherwise during the fixed point iteration.
7632 live_out->CopyFrom(all_places);
7633 }
7634 continue;
7635 }
7636
7637 // Handle loads.
7638 Definition* defn = instr->AsDefinition();
7639 if ((defn != NULL) && IsLoadEliminationCandidate(defn)) {
7640 const intptr_t alias = aliased_set_->LookupAliasId(place.ToAlias());
7641 live_in->AddAll(aliased_set_->GetKilledSet(alias));
7642 continue;
7643 }
7644 }
7645 exposed_stores_[postorder_number] = exposed_stores;
7646 }
7647 if (FLAG_trace_load_optimization) {
7648 Dump();
7649 THR_Print("---\n");
7650 }
7651 }
7652
7653 void EliminateDeadStores() {
7654 // Iteration order does not matter here.
7655 for (BlockIterator block_it = graph_->postorder_iterator();
7656 !block_it.Done();
7657 block_it.Advance()) {
7658 BlockEntryInstr* block = block_it.Current();
7659 const intptr_t postorder_number = block->postorder_number();
7660
7661 BitVector* live_out = live_out_[postorder_number];
7662
7663 ZoneGrowableArray<Instruction*>* exposed_stores =
7664 exposed_stores_[postorder_number];
7665 if (exposed_stores == NULL) continue; // No exposed stores.
7666
7667 // Iterate over candidate stores.
7668 for (intptr_t i = 0; i < exposed_stores->length(); ++i) {
7669 Instruction* instr = (*exposed_stores)[i];
7670 bool is_load = false;
7671 bool is_store = false;
7672 Place place(instr, &is_load, &is_store);
7673 ASSERT(!is_load && is_store);
7674 if (place.IsImmutableField()) {
7675 // Final field do not participate in dead store elimination.
7676 continue;
7677 }
7678 // Eliminate a downward exposed store if the corresponding place is not
7679 // in live-out.
7680 if (!live_out->Contains(instr->place_id()) &&
7681 CanEliminateStore(instr)) {
7682 if (FLAG_trace_optimization) {
7683 THR_Print("Removing dead store to place %" Pd " block B%" Pd "\n",
7684 instr->place_id(), block->block_id());
7685 }
7686 instr->RemoveFromGraph(/* ignored */ false);
7687 }
7688 }
7689 }
7690 }
7691
7692 FlowGraph* graph_;
7693 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_;
7694
7695 // Mapping between field offsets in words and expression ids of loads from
7696 // that offset.
7697 AliasedSet* aliased_set_;
7698
7699 // Per block list of downward exposed stores.
7700 GrowableArray<ZoneGrowableArray<Instruction*>*> exposed_stores_;
7701
7702 DISALLOW_COPY_AND_ASSIGN(StoreOptimizer);
7703 };
7704
7705
7706 void DeadStoreElimination::Optimize(FlowGraph* graph) {
7707 if (FLAG_dead_store_elimination) {
7708 StoreOptimizer::OptimizeGraph(graph);
7709 }
7710 }
7711
7712
7713 // Returns true iff this definition is used in a non-phi instruction.
7714 static bool HasRealUse(Definition* def) {
7715 // Environment uses are real (non-phi) uses.
7716 if (def->env_use_list() != NULL) return true;
7717
7718 for (Value::Iterator it(def->input_use_list());
7719 !it.Done();
7720 it.Advance()) {
7721 if (!it.Current()->instruction()->IsPhi()) return true;
7722 }
7723 return false;
7724 }
7725
7726
7727 void DeadCodeElimination::EliminateDeadPhis(FlowGraph* flow_graph) {
7728 GrowableArray<PhiInstr*> live_phis;
7729 for (BlockIterator b = flow_graph->postorder_iterator();
7730 !b.Done();
7731 b.Advance()) {
7732 JoinEntryInstr* join = b.Current()->AsJoinEntry();
7733 if (join != NULL) {
7734 for (PhiIterator it(join); !it.Done(); it.Advance()) {
7735 PhiInstr* phi = it.Current();
7736 // Phis that have uses and phis inside try blocks are
7737 // marked as live.
7738 if (HasRealUse(phi) || join->InsideTryBlock()) {
7739 live_phis.Add(phi);
7740 phi->mark_alive();
7741 } else {
7742 phi->mark_dead();
7743 }
7744 }
7745 }
7746 }
7747
7748 while (!live_phis.is_empty()) {
7749 PhiInstr* phi = live_phis.RemoveLast();
7750 for (intptr_t i = 0; i < phi->InputCount(); i++) {
7751 Value* val = phi->InputAt(i);
7752 PhiInstr* used_phi = val->definition()->AsPhi();
7753 if ((used_phi != NULL) && !used_phi->is_alive()) {
7754 used_phi->mark_alive();
7755 live_phis.Add(used_phi);
7756 }
7757 }
7758 }
7759
7760 for (BlockIterator it(flow_graph->postorder_iterator());
7761 !it.Done();
7762 it.Advance()) {
7763 JoinEntryInstr* join = it.Current()->AsJoinEntry();
7764 if (join != NULL) {
7765 if (join->phis_ == NULL) continue;
7766
7767 // Eliminate dead phis and compact the phis_ array of the block.
7768 intptr_t to_index = 0;
7769 for (intptr_t i = 0; i < join->phis_->length(); ++i) {
7770 PhiInstr* phi = (*join->phis_)[i];
7771 if (phi != NULL) {
7772 if (!phi->is_alive()) {
7773 phi->ReplaceUsesWith(flow_graph->constant_null());
7774 phi->UnuseAllInputs();
7775 (*join->phis_)[i] = NULL;
7776 if (FLAG_trace_optimization) {
7777 THR_Print("Removing dead phi v%" Pd "\n", phi->ssa_temp_index());
7778 }
7779 } else if (phi->IsRedundant()) {
7780 phi->ReplaceUsesWith(phi->InputAt(0)->definition());
7781 phi->UnuseAllInputs();
7782 (*join->phis_)[i] = NULL;
7783 if (FLAG_trace_optimization) {
7784 THR_Print("Removing redundant phi v%" Pd "\n",
7785 phi->ssa_temp_index());
7786 }
7787 } else {
7788 (*join->phis_)[to_index++] = phi;
7789 }
7790 }
7791 }
7792 if (to_index == 0) {
7793 join->phis_ = NULL;
7794 } else {
7795 join->phis_->TruncateTo(to_index);
7796 }
7797 }
7798 }
7799 }
7800
7801
7802 class CSEInstructionMap : public ValueObject {
7803 public:
7804 // Right now CSE and LICM track a single effect: possible externalization of
7805 // strings.
7806 // Other effects like modifications of fields are tracked in a separate load
7807 // forwarding pass via Alias structure.
7808 COMPILE_ASSERT(EffectSet::kLastEffect == 1);
7809
7810 CSEInstructionMap() : independent_(), dependent_() { }
7811 explicit CSEInstructionMap(const CSEInstructionMap& other)
7812 : ValueObject(),
7813 independent_(other.independent_),
7814 dependent_(other.dependent_) {
7815 }
7816
7817 void RemoveAffected(EffectSet effects) {
7818 if (!effects.IsNone()) {
7819 dependent_.Clear();
7820 }
7821 }
7822
7823 Instruction* Lookup(Instruction* other) const {
7824 return GetMapFor(other)->Lookup(other);
7825 }
7826
7827 void Insert(Instruction* instr) {
7828 return GetMapFor(instr)->Insert(instr);
7829 }
7830
7831 private:
7832 typedef DirectChainedHashMap<PointerKeyValueTrait<Instruction> > Map;
7833
7834 Map* GetMapFor(Instruction* instr) {
7835 return instr->Dependencies().IsNone() ? &independent_ : &dependent_;
7836 }
7837
7838 const Map* GetMapFor(Instruction* instr) const {
7839 return instr->Dependencies().IsNone() ? &independent_ : &dependent_;
7840 }
7841
7842 // All computations that are not affected by any side-effect.
7843 // Majority of computations are not affected by anything and will be in
7844 // this map.
7845 Map independent_;
7846
7847 // All computations that are affected by side effect.
7848 Map dependent_;
7849 };
7850
7851
7852 bool DominatorBasedCSE::Optimize(FlowGraph* graph) {
7853 bool changed = false;
7854 if (FLAG_load_cse) {
7855 changed = LoadOptimizer::OptimizeGraph(graph) || changed;
7856 }
7857
7858 CSEInstructionMap map;
7859 changed = OptimizeRecursive(graph, graph->graph_entry(), &map) || changed;
7860
7861 return changed;
7862 }
7863
7864
7865 bool DominatorBasedCSE::OptimizeRecursive(
7866 FlowGraph* graph,
7867 BlockEntryInstr* block,
7868 CSEInstructionMap* map) {
7869 bool changed = false;
7870 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
7871 Instruction* current = it.Current();
7872 if (current->AllowsCSE()) {
7873 Instruction* replacement = map->Lookup(current);
7874 if ((replacement != NULL) &&
7875 graph->block_effects()->IsAvailableAt(replacement, block)) {
7876 // Replace current with lookup result.
7877 ReplaceCurrentInstruction(&it, current, replacement, graph);
7878 changed = true;
7879 continue;
7880 }
7881
7882 // For simplicity we assume that instruction either does not depend on
7883 // anything or does not affect anything. If this is not the case then
7884 // we should first remove affected instructions from the map and
7885 // then add instruction to the map so that it does not kill itself.
7886 ASSERT(current->Effects().IsNone() || current->Dependencies().IsNone());
7887 map->Insert(current);
7888 }
7889
7890 map->RemoveAffected(current->Effects());
7891 }
7892
7893 // Process children in the dominator tree recursively.
7894 intptr_t num_children = block->dominated_blocks().length();
7895 for (intptr_t i = 0; i < num_children; ++i) {
7896 BlockEntryInstr* child = block->dominated_blocks()[i];
7897 if (i < num_children - 1) {
7898 // Copy map.
7899 CSEInstructionMap child_map(*map);
7900 changed = OptimizeRecursive(graph, child, &child_map) || changed;
7901 } else {
7902 // Reuse map for the last child.
7903 changed = OptimizeRecursive(graph, child, map) || changed;
7904 }
7905 }
7906 return changed;
7907 }
7908
7909
7910 void FlowGraphOptimizer::EliminateEnvironments() { 5033 void FlowGraphOptimizer::EliminateEnvironments() {
7911 // After this pass we can no longer perform LICM and hoist instructions 5034 // After this pass we can no longer perform LICM and hoist instructions
7912 // that can deoptimize. 5035 // that can deoptimize.
7913 5036
7914 flow_graph_->disallow_licm(); 5037 flow_graph_->disallow_licm();
7915 for (intptr_t i = 0; i < block_order_.length(); ++i) { 5038 for (intptr_t i = 0; i < block_order_.length(); ++i) {
7916 BlockEntryInstr* block = block_order_[i]; 5039 BlockEntryInstr* block = block_order_[i];
7917 block->RemoveEnvironment(); 5040 block->RemoveEnvironment();
7918 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { 5041 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
7919 Instruction* current = it.Current(); 5042 Instruction* current = it.Current();
7920 if (!current->CanDeoptimize()) { 5043 if (!current->CanDeoptimize()) {
7921 // TODO(srdjan): --source-lines needs deopt environments to get at 5044 // TODO(srdjan): --source-lines needs deopt environments to get at
7922 // the code for this instruction, however, leaving the environment 5045 // the code for this instruction, however, leaving the environment
7923 // changes code. 5046 // changes code.
7924 current->RemoveEnvironment(); 5047 current->RemoveEnvironment();
7925 } 5048 }
7926 } 5049 }
7927 } 5050 }
7928 } 5051 }
7929 5052
7930 5053
7931 enum SafeUseCheck { kOptimisticCheck, kStrictCheck };
7932
7933 // Check if the use is safe for allocation sinking. Allocation sinking
7934 // candidates can only be used at store instructions:
7935 //
7936 // - any store into the allocation candidate itself is unconditionally safe
7937 // as it just changes the rematerialization state of this candidate;
7938 // - store into another object is only safe if another object is allocation
7939 // candidate.
7940 //
7941 // We use a simple fix-point algorithm to discover the set of valid candidates
7942 // (see CollectCandidates method), that's why this IsSafeUse can operate in two
7943 // modes:
7944 //
7945 // - optimistic, when every allocation is assumed to be an allocation
7946 // sinking candidate;
7947 // - strict, when only marked allocations are assumed to be allocation
7948 // sinking candidates.
7949 //
7950 // Fix-point algorithm in CollectCandiates first collects a set of allocations
7951 // optimistically and then checks each collected candidate strictly and unmarks
7952 // invalid candidates transitively until only strictly valid ones remain.
7953 static bool IsSafeUse(Value* use, SafeUseCheck check_type) {
7954 if (use->instruction()->IsMaterializeObject()) {
7955 return true;
7956 }
7957
7958 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
7959 if (store != NULL) {
7960 if (use == store->value()) {
7961 Definition* instance = store->instance()->definition();
7962 return instance->IsAllocateObject() &&
7963 ((check_type == kOptimisticCheck) ||
7964 instance->Identity().IsAllocationSinkingCandidate());
7965 }
7966 return true;
7967 }
7968
7969 return false;
7970 }
7971
7972
7973 // Right now we are attempting to sink allocation only into
7974 // deoptimization exit. So candidate should only be used in StoreInstanceField
7975 // instructions that write into fields of the allocated object.
7976 // We do not support materialization of the object that has type arguments.
7977 static bool IsAllocationSinkingCandidate(Definition* alloc,
7978 SafeUseCheck check_type) {
7979 for (Value* use = alloc->input_use_list();
7980 use != NULL;
7981 use = use->next_use()) {
7982 if (!IsSafeUse(use, check_type)) {
7983 if (FLAG_trace_optimization) {
7984 THR_Print("use of %s at %s is unsafe for allocation sinking\n",
7985 alloc->ToCString(),
7986 use->instruction()->ToCString());
7987 }
7988 return false;
7989 }
7990 }
7991
7992 return true;
7993 }
7994
7995
7996 // If the given use is a store into an object then return an object we are
7997 // storing into.
7998 static Definition* StoreInto(Value* use) {
7999 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
8000 if (store != NULL) {
8001 return store->instance()->definition();
8002 }
8003
8004 return NULL;
8005 }
8006
8007
8008 // Remove the given allocation from the graph. It is not observable.
8009 // If deoptimization occurs the object will be materialized.
8010 void AllocationSinking::EliminateAllocation(Definition* alloc) {
8011 ASSERT(IsAllocationSinkingCandidate(alloc, kStrictCheck));
8012
8013 if (FLAG_trace_optimization) {
8014 THR_Print("removing allocation from the graph: v%" Pd "\n",
8015 alloc->ssa_temp_index());
8016 }
8017
8018 // As an allocation sinking candidate it is only used in stores to its own
8019 // fields. Remove these stores.
8020 for (Value* use = alloc->input_use_list();
8021 use != NULL;
8022 use = alloc->input_use_list()) {
8023 use->instruction()->RemoveFromGraph();
8024 }
8025
8026 // There should be no environment uses. The pass replaced them with
8027 // MaterializeObject instructions.
8028 #ifdef DEBUG
8029 for (Value* use = alloc->env_use_list();
8030 use != NULL;
8031 use = use->next_use()) {
8032 ASSERT(use->instruction()->IsMaterializeObject());
8033 }
8034 #endif
8035 ASSERT(alloc->input_use_list() == NULL);
8036 alloc->RemoveFromGraph();
8037 if (alloc->ArgumentCount() > 0) {
8038 ASSERT(alloc->ArgumentCount() == 1);
8039 for (intptr_t i = 0; i < alloc->ArgumentCount(); ++i) {
8040 alloc->PushArgumentAt(i)->RemoveFromGraph();
8041 }
8042 }
8043 }
8044
8045
8046 // Find allocation instructions that can be potentially eliminated and
8047 // rematerialized at deoptimization exits if needed. See IsSafeUse
8048 // for the description of algorithm used below.
8049 void AllocationSinking::CollectCandidates() {
8050 // Optimistically collect all potential candidates.
8051 for (BlockIterator block_it = flow_graph_->reverse_postorder_iterator();
8052 !block_it.Done();
8053 block_it.Advance()) {
8054 BlockEntryInstr* block = block_it.Current();
8055 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
8056 { AllocateObjectInstr* alloc = it.Current()->AsAllocateObject();
8057 if ((alloc != NULL) &&
8058 IsAllocationSinkingCandidate(alloc, kOptimisticCheck)) {
8059 alloc->SetIdentity(AliasIdentity::AllocationSinkingCandidate());
8060 candidates_.Add(alloc);
8061 }
8062 }
8063 { AllocateUninitializedContextInstr* alloc =
8064 it.Current()->AsAllocateUninitializedContext();
8065 if ((alloc != NULL) &&
8066 IsAllocationSinkingCandidate(alloc, kOptimisticCheck)) {
8067 alloc->SetIdentity(AliasIdentity::AllocationSinkingCandidate());
8068 candidates_.Add(alloc);
8069 }
8070 }
8071 }
8072 }
8073
8074 // Transitively unmark all candidates that are not strictly valid.
8075 bool changed;
8076 do {
8077 changed = false;
8078 for (intptr_t i = 0; i < candidates_.length(); i++) {
8079 Definition* alloc = candidates_[i];
8080 if (alloc->Identity().IsAllocationSinkingCandidate()) {
8081 if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) {
8082 alloc->SetIdentity(AliasIdentity::Unknown());
8083 changed = true;
8084 }
8085 }
8086 }
8087 } while (changed);
8088
8089 // Shrink the list of candidates removing all unmarked ones.
8090 intptr_t j = 0;
8091 for (intptr_t i = 0; i < candidates_.length(); i++) {
8092 Definition* alloc = candidates_[i];
8093 if (alloc->Identity().IsAllocationSinkingCandidate()) {
8094 if (FLAG_trace_optimization) {
8095 THR_Print("discovered allocation sinking candidate: v%" Pd "\n",
8096 alloc->ssa_temp_index());
8097 }
8098
8099 if (j != i) {
8100 candidates_[j] = alloc;
8101 }
8102 j++;
8103 }
8104 }
8105 candidates_.TruncateTo(j);
8106 }
8107
8108
8109 // If materialization references an allocation sinking candidate then replace
8110 // this reference with a materialization which should have been computed for
8111 // this side-exit. CollectAllExits should have collected this exit.
8112 void AllocationSinking::NormalizeMaterializations() {
8113 for (intptr_t i = 0; i < candidates_.length(); i++) {
8114 Definition* alloc = candidates_[i];
8115
8116 Value* next_use;
8117 for (Value* use = alloc->input_use_list();
8118 use != NULL;
8119 use = next_use) {
8120 next_use = use->next_use();
8121 if (use->instruction()->IsMaterializeObject()) {
8122 use->BindTo(MaterializationFor(alloc, use->instruction()));
8123 }
8124 }
8125 }
8126 }
8127
8128
8129 // We transitively insert materializations at each deoptimization exit that
8130 // might see the given allocation (see ExitsCollector). Some of this
8131 // materializations are not actually used and some fail to compute because
8132 // they are inserted in the block that is not dominated by the allocation.
8133 // Remove them unused materializations from the graph.
8134 void AllocationSinking::RemoveUnusedMaterializations() {
8135 intptr_t j = 0;
8136 for (intptr_t i = 0; i < materializations_.length(); i++) {
8137 MaterializeObjectInstr* mat = materializations_[i];
8138 if ((mat->input_use_list() == NULL) && (mat->env_use_list() == NULL)) {
8139 // Check if this materialization failed to compute and remove any
8140 // unforwarded loads. There were no loads from any allocation sinking
8141 // candidate in the beggining so it is safe to assume that any encountered
8142 // load was inserted by CreateMaterializationAt.
8143 for (intptr_t i = 0; i < mat->InputCount(); i++) {
8144 LoadFieldInstr* load = mat->InputAt(i)->definition()->AsLoadField();
8145 if ((load != NULL) &&
8146 (load->instance()->definition() == mat->allocation())) {
8147 load->ReplaceUsesWith(flow_graph_->constant_null());
8148 load->RemoveFromGraph();
8149 }
8150 }
8151 mat->RemoveFromGraph();
8152 } else {
8153 if (j != i) {
8154 materializations_[j] = mat;
8155 }
8156 j++;
8157 }
8158 }
8159 materializations_.TruncateTo(j);
8160 }
8161
8162
8163 // Some candidates might stop being eligible for allocation sinking after
8164 // the load forwarding because they flow into phis that load forwarding
8165 // inserts. Discover such allocations and remove them from the list
8166 // of allocation sinking candidates undoing all changes that we did
8167 // in preparation for sinking these allocations.
8168 void AllocationSinking::DiscoverFailedCandidates() {
8169 // Transitively unmark all candidates that are not strictly valid.
8170 bool changed;
8171 do {
8172 changed = false;
8173 for (intptr_t i = 0; i < candidates_.length(); i++) {
8174 Definition* alloc = candidates_[i];
8175 if (alloc->Identity().IsAllocationSinkingCandidate()) {
8176 if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) {
8177 alloc->SetIdentity(AliasIdentity::Unknown());
8178 changed = true;
8179 }
8180 }
8181 }
8182 } while (changed);
8183
8184 // Remove all failed candidates from the candidates list.
8185 intptr_t j = 0;
8186 for (intptr_t i = 0; i < candidates_.length(); i++) {
8187 Definition* alloc = candidates_[i];
8188 if (!alloc->Identity().IsAllocationSinkingCandidate()) {
8189 if (FLAG_trace_optimization) {
8190 THR_Print("allocation v%" Pd " can't be eliminated\n",
8191 alloc->ssa_temp_index());
8192 }
8193
8194 #ifdef DEBUG
8195 for (Value* use = alloc->env_use_list();
8196 use != NULL;
8197 use = use->next_use()) {
8198 ASSERT(use->instruction()->IsMaterializeObject());
8199 }
8200 #endif
8201
8202 // All materializations will be removed from the graph. Remove inserted
8203 // loads first and detach materializations from allocation's environment
8204 // use list: we will reconstruct it when we start removing
8205 // materializations.
8206 alloc->set_env_use_list(NULL);
8207 for (Value* use = alloc->input_use_list();
8208 use != NULL;
8209 use = use->next_use()) {
8210 if (use->instruction()->IsLoadField()) {
8211 LoadFieldInstr* load = use->instruction()->AsLoadField();
8212 load->ReplaceUsesWith(flow_graph_->constant_null());
8213 load->RemoveFromGraph();
8214 } else {
8215 ASSERT(use->instruction()->IsMaterializeObject() ||
8216 use->instruction()->IsPhi() ||
8217 use->instruction()->IsStoreInstanceField());
8218 }
8219 }
8220 } else {
8221 if (j != i) {
8222 candidates_[j] = alloc;
8223 }
8224 j++;
8225 }
8226 }
8227
8228 if (j != candidates_.length()) { // Something was removed from candidates.
8229 intptr_t k = 0;
8230 for (intptr_t i = 0; i < materializations_.length(); i++) {
8231 MaterializeObjectInstr* mat = materializations_[i];
8232 if (!mat->allocation()->Identity().IsAllocationSinkingCandidate()) {
8233 // Restore environment uses of the allocation that were replaced
8234 // by this materialization and drop materialization.
8235 mat->ReplaceUsesWith(mat->allocation());
8236 mat->RemoveFromGraph();
8237 } else {
8238 if (k != i) {
8239 materializations_[k] = mat;
8240 }
8241 k++;
8242 }
8243 }
8244 materializations_.TruncateTo(k);
8245 }
8246
8247 candidates_.TruncateTo(j);
8248 }
8249
8250
8251 void AllocationSinking::Optimize() {
8252 CollectCandidates();
8253
8254 // Insert MaterializeObject instructions that will describe the state of the
8255 // object at all deoptimization points. Each inserted materialization looks
8256 // like this (where v_0 is allocation that we are going to eliminate):
8257 // v_1 <- LoadField(v_0, field_1)
8258 // ...
8259 // v_N <- LoadField(v_0, field_N)
8260 // v_{N+1} <- MaterializeObject(field_1 = v_1, ..., field_N = v_{N})
8261 for (intptr_t i = 0; i < candidates_.length(); i++) {
8262 InsertMaterializations(candidates_[i]);
8263 }
8264
8265 // Run load forwarding to eliminate LoadField instructions inserted above.
8266 // All loads will be successfully eliminated because:
8267 // a) they use fields (not offsets) and thus provide precise aliasing
8268 // information
8269 // b) candidate does not escape and thus its fields is not affected by
8270 // external effects from calls.
8271 LoadOptimizer::OptimizeGraph(flow_graph_);
8272
8273 NormalizeMaterializations();
8274
8275 RemoveUnusedMaterializations();
8276
8277 // If any candidates are no longer eligible for allocation sinking abort
8278 // the optimization for them and undo any changes we did in preparation.
8279 DiscoverFailedCandidates();
8280
8281 // At this point we have computed the state of object at each deoptimization
8282 // point and we can eliminate it. Loads inserted above were forwarded so there
8283 // are no uses of the allocation just as in the begging of the pass.
8284 for (intptr_t i = 0; i < candidates_.length(); i++) {
8285 EliminateAllocation(candidates_[i]);
8286 }
8287
8288 // Process materializations and unbox their arguments: materializations
8289 // are part of the environment and can materialize boxes for double/mint/simd
8290 // values when needed.
8291 // TODO(vegorov): handle all box types here.
8292 for (intptr_t i = 0; i < materializations_.length(); i++) {
8293 MaterializeObjectInstr* mat = materializations_[i];
8294 for (intptr_t j = 0; j < mat->InputCount(); j++) {
8295 Definition* defn = mat->InputAt(j)->definition();
8296 if (defn->IsBox()) {
8297 mat->InputAt(j)->BindTo(defn->InputAt(0)->definition());
8298 }
8299 }
8300 }
8301 }
8302
8303
8304 // Remove materializations from the graph. Register allocator will treat them
8305 // as part of the environment not as a real instruction.
8306 void AllocationSinking::DetachMaterializations() {
8307 for (intptr_t i = 0; i < materializations_.length(); i++) {
8308 materializations_[i]->previous()->LinkTo(materializations_[i]->next());
8309 }
8310 }
8311
8312
8313 // Add a field/offset to the list of fields if it is not yet present there.
8314 static bool AddSlot(ZoneGrowableArray<const Object*>* slots,
8315 const Object& slot) {
8316 for (intptr_t i = 0; i < slots->length(); i++) {
8317 if ((*slots)[i]->raw() == slot.raw()) {
8318 return false;
8319 }
8320 }
8321 slots->Add(&slot);
8322 return true;
8323 }
8324
8325
8326 // Find deoptimization exit for the given materialization assuming that all
8327 // materializations are emitted right before the instruction which is a
8328 // deoptimization exit.
8329 static Instruction* ExitForMaterialization(MaterializeObjectInstr* mat) {
8330 while (mat->next()->IsMaterializeObject()) {
8331 mat = mat->next()->AsMaterializeObject();
8332 }
8333 return mat->next();
8334 }
8335
8336
8337 // Given the deoptimization exit find first materialization that was inserted
8338 // before it.
8339 static Instruction* FirstMaterializationAt(Instruction* exit) {
8340 while (exit->previous()->IsMaterializeObject()) {
8341 exit = exit->previous();
8342 }
8343 return exit;
8344 }
8345
8346
8347 // Given the allocation and deoptimization exit try to find MaterializeObject
8348 // instruction corresponding to this allocation at this exit.
8349 MaterializeObjectInstr* AllocationSinking::MaterializationFor(
8350 Definition* alloc, Instruction* exit) {
8351 if (exit->IsMaterializeObject()) {
8352 exit = ExitForMaterialization(exit->AsMaterializeObject());
8353 }
8354
8355 for (MaterializeObjectInstr* mat = exit->previous()->AsMaterializeObject();
8356 mat != NULL;
8357 mat = mat->previous()->AsMaterializeObject()) {
8358 if (mat->allocation() == alloc) {
8359 return mat;
8360 }
8361 }
8362
8363 return NULL;
8364 }
8365
8366
8367 // Insert MaterializeObject instruction for the given allocation before
8368 // the given instruction that can deoptimize.
8369 void AllocationSinking::CreateMaterializationAt(
8370 Instruction* exit,
8371 Definition* alloc,
8372 const ZoneGrowableArray<const Object*>& slots) {
8373 ZoneGrowableArray<Value*>* values =
8374 new(Z) ZoneGrowableArray<Value*>(slots.length());
8375
8376 // All loads should be inserted before the first materialization so that
8377 // IR follows the following pattern: loads, materializations, deoptimizing
8378 // instruction.
8379 Instruction* load_point = FirstMaterializationAt(exit);
8380
8381 // Insert load instruction for every field.
8382 for (intptr_t i = 0; i < slots.length(); i++) {
8383 LoadFieldInstr* load = slots[i]->IsField()
8384 ? new(Z) LoadFieldInstr(
8385 new(Z) Value(alloc),
8386 &Field::Cast(*slots[i]),
8387 AbstractType::ZoneHandle(Z),
8388 alloc->token_pos())
8389 : new(Z) LoadFieldInstr(
8390 new(Z) Value(alloc),
8391 Smi::Cast(*slots[i]).Value(),
8392 AbstractType::ZoneHandle(Z),
8393 alloc->token_pos());
8394 flow_graph_->InsertBefore(
8395 load_point, load, NULL, FlowGraph::kValue);
8396 values->Add(new(Z) Value(load));
8397 }
8398
8399 MaterializeObjectInstr* mat = NULL;
8400 if (alloc->IsAllocateObject()) {
8401 mat = new(Z) MaterializeObjectInstr(
8402 alloc->AsAllocateObject(), slots, values);
8403 } else {
8404 ASSERT(alloc->IsAllocateUninitializedContext());
8405 mat = new(Z) MaterializeObjectInstr(
8406 alloc->AsAllocateUninitializedContext(), slots, values);
8407 }
8408
8409 flow_graph_->InsertBefore(exit, mat, NULL, FlowGraph::kValue);
8410
8411 // Replace all mentions of this allocation with a newly inserted
8412 // MaterializeObject instruction.
8413 // We must preserve the identity: all mentions are replaced by the same
8414 // materialization.
8415 for (Environment::DeepIterator env_it(exit->env());
8416 !env_it.Done();
8417 env_it.Advance()) {
8418 Value* use = env_it.CurrentValue();
8419 if (use->definition() == alloc) {
8420 use->RemoveFromUseList();
8421 use->set_definition(mat);
8422 mat->AddEnvUse(use);
8423 }
8424 }
8425
8426 // Mark MaterializeObject as an environment use of this allocation.
8427 // This will allow us to discover it when we are looking for deoptimization
8428 // exits for another allocation that potentially flows into this one.
8429 Value* val = new(Z) Value(alloc);
8430 val->set_instruction(mat);
8431 alloc->AddEnvUse(val);
8432
8433 // Record inserted materialization.
8434 materializations_.Add(mat);
8435 }
8436
8437
8438 // Add given instruction to the list of the instructions if it is not yet
8439 // present there.
8440 template<typename T>
8441 void AddInstruction(GrowableArray<T*>* list, T* value) {
8442 ASSERT(!value->IsGraphEntry());
8443 for (intptr_t i = 0; i < list->length(); i++) {
8444 if ((*list)[i] == value) {
8445 return;
8446 }
8447 }
8448 list->Add(value);
8449 }
8450
8451
8452 // Transitively collect all deoptimization exits that might need this allocation
8453 // rematerialized. It is not enough to collect only environment uses of this
8454 // allocation because it can flow into other objects that will be
8455 // dematerialized and that are referenced by deopt environments that
8456 // don't contain this allocation explicitly.
8457 void AllocationSinking::ExitsCollector::Collect(Definition* alloc) {
8458 for (Value* use = alloc->env_use_list();
8459 use != NULL;
8460 use = use->next_use()) {
8461 if (use->instruction()->IsMaterializeObject()) {
8462 AddInstruction(&exits_, ExitForMaterialization(
8463 use->instruction()->AsMaterializeObject()));
8464 } else {
8465 AddInstruction(&exits_, use->instruction());
8466 }
8467 }
8468
8469 // Check if this allocation is stored into any other allocation sinking
8470 // candidate and put it on worklist so that we conservatively collect all
8471 // exits for that candidate as well because they potentially might see
8472 // this object.
8473 for (Value* use = alloc->input_use_list();
8474 use != NULL;
8475 use = use->next_use()) {
8476 Definition* obj = StoreInto(use);
8477 if ((obj != NULL) && (obj != alloc)) {
8478 AddInstruction(&worklist_, obj);
8479 }
8480 }
8481 }
8482
8483
8484 void AllocationSinking::ExitsCollector::CollectTransitively(Definition* alloc) {
8485 exits_.TruncateTo(0);
8486 worklist_.TruncateTo(0);
8487
8488 worklist_.Add(alloc);
8489
8490 // Note: worklist potentially will grow while we are iterating over it.
8491 // We are not removing allocations from the worklist not to waste space on
8492 // the side maintaining BitVector of already processed allocations: worklist
8493 // is expected to be very small thus linear search in it is just as effecient
8494 // as a bitvector.
8495 for (intptr_t i = 0; i < worklist_.length(); i++) {
8496 Collect(worklist_[i]);
8497 }
8498 }
8499
8500
8501 void AllocationSinking::InsertMaterializations(Definition* alloc) {
8502 // Collect all fields that are written for this instance.
8503 ZoneGrowableArray<const Object*>* slots =
8504 new(Z) ZoneGrowableArray<const Object*>(5);
8505
8506 for (Value* use = alloc->input_use_list();
8507 use != NULL;
8508 use = use->next_use()) {
8509 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
8510 if ((store != NULL) && (store->instance()->definition() == alloc)) {
8511 if (!store->field().IsNull()) {
8512 AddSlot(slots, store->field());
8513 } else {
8514 AddSlot(slots, Smi::ZoneHandle(Z, Smi::New(store->offset_in_bytes())));
8515 }
8516 }
8517 }
8518
8519 if (alloc->ArgumentCount() > 0) {
8520 AllocateObjectInstr* alloc_object = alloc->AsAllocateObject();
8521 ASSERT(alloc_object->ArgumentCount() == 1);
8522 intptr_t type_args_offset =
8523 alloc_object->cls().type_arguments_field_offset();
8524 AddSlot(slots, Smi::ZoneHandle(Z, Smi::New(type_args_offset)));
8525 }
8526
8527 // Collect all instructions that mention this object in the environment.
8528 exits_collector_.CollectTransitively(alloc);
8529
8530 // Insert materializations at environment uses.
8531 for (intptr_t i = 0; i < exits_collector_.exits().length(); i++) {
8532 CreateMaterializationAt(
8533 exits_collector_.exits()[i], alloc, *slots);
8534 }
8535 }
8536
8537
8538 } // namespace dart 5054 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/flow_graph_optimizer.h ('k') | runtime/vm/redundancy_elimination.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698