Index: src/compiler/store-store-elimination.cc |
diff --git a/src/compiler/store-store-elimination.cc b/src/compiler/store-store-elimination.cc |
index a469b209820ad5dab95c257504b38cbc1b2ada3b..3b7e3e053e018e1ee194cafcb02845d528857051 100644 |
--- a/src/compiler/store-store-elimination.cc |
+++ b/src/compiler/store-store-elimination.cc |
@@ -42,9 +42,9 @@ namespace compiler { |
// upwards. While walking, we maintain a map {futureStore} from offsets to |
// nodes; initially it is empty. As we walk the effect chain upwards, if |
// futureStore[off] = n, then any store to node {n} with offset {off} is |
-// guaranteed to be useless because we do a full-width[1] store to that offset |
-// of that object in the near future anyway. For example, for this effect |
-// chain |
+// guaranteed to be useless because we do a tagged-width[2] store to that |
+// offset of that object in the near future anyway. For example, for this |
+// effect chain |
// |
// 71: StoreField(60, 0) |
// 72: StoreField(65, 8) |
@@ -58,7 +58,7 @@ namespace compiler { |
// |
// - We are at the end of a sequence of consecutive StoreFields. |
// - We start out with futureStore = empty. |
-// - We then walk the effect chain upwards to find the next StoreField [2]. |
+// - We then walk the effect chain upwards to find the next StoreField [1]. |
// |
// 1. If the offset is not a key of {futureStore} yet, we put it in. |
// 2. If the offset is a key of {futureStore}, but futureStore[offset] is a |
@@ -70,17 +70,45 @@ namespace compiler { |
// output, and as long as its opcode is StoreField, we keep traversing |
// upwards. |
// |
-// [1] This optimization is unsound if we optimize away a store to an offset |
-// because we store to the same offset in the future, even though the future |
-// store is narrower than the store we optimize away. Therefore, in case (1) |
-// and (2) we only add/overwrite to the dictionary when the field access has |
-// maximal size. For simplicity of implementation, we do not try to detect |
-// case (3). |
// |
-// [2] We make sure that we only traverse the linear part, that is, the part |
+// |
+// footnotes: |
+// |
+// [1] We make sure that we only traverse the linear part, that is, the part |
// where every node has exactly one incoming and one outgoing effect edge. |
// Also, we only keep walking upwards as long as we keep finding consecutive |
// StoreFields on the same node. |
+// |
+// [2] This optimization is sound only in certain cases. Specifically, the |
+// presence of a future store to {off} by itself does not automatically mean |
+// that earlier stores to {off} are superfluous: a future narrow store does |
+// not obviate an earlier wide store. However, future stores of a certain |
+// size do obviate stores to the same offset of lesser or equal size. |
+// |
+// It turns out that we are most interested in stores of "tagged" size, |
+// which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In |
+// {futureStore}, we record future writes that are of at least this size. |
+// The three cases are actually a bit more subtle. |
+// |
+// 1. If the offset is not a key of {futureStore} and the StoreField is of |
+// "tagged" size or wider, then we put it in. |
+// 2. If the offset is present in {futureStore} but the value is different, |
+// then we overwrite the value if the current StoreField is of "tagged" |
+// size or wider. |
+// 3. If the offset is present and the value matches the node, and the |
+// current StoreField is AT MOST of "tagged" size, then we eliminate this |
+// StoreField. |
+// |
+// Examples of stores that we do not detect as superfluous: 2-byte stores |
+// followed by 2-byte stores to the same offset; 16-byte stores followed by |
+// 16-byte stores to the same offset. On ia32, we do not detect consecutive |
+// float64 stores as superfluous, and on x86 we do not detect consecutive |
+// int32 stores as superfluous. |
+ |
+// At a late stage, we realized that this code is more complicated than it |
+// needs to be: if we store a set of pairs (offset, node), the code simplifies |
+// to 3 cases instead of 6. We could even store a map from nodes to sets of |
+// bytes. |
StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) |
: jsgraph_(js_graph), temp_zone_(temp_zone) {} |
@@ -179,6 +207,14 @@ size_t rep_size_of(FieldAccess access) { |
return rep_size_of(access.machine_type.representation()); |
} |
+bool AtMostTagged(FieldAccess access) { |
+ return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
+} |
+ |
+bool AtLeastTagged(FieldAccess access) { |
+ return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
+} |
+ |
} // namespace |
bool StoreStoreElimination::IsEligibleNode(Node* node) { |
@@ -209,44 +245,65 @@ void StoreStoreElimination::ReduceEligibleNode(Node* node) { |
Node* previous = PreviousEffectBeforeStoreField(current_node); |
- CHECK(rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged)); |
- if (rep_size_of(access) == rep_size_of(MachineRepresentation::kTagged)) { |
- // Try to insert. If it was present, this will preserve the original |
- // value. |
- auto insert_result = |
- futureStore.insert(std::make_pair(offset, object_input)); |
- if (insert_result.second) { |
- // Key was not present. This means that there is no matching |
- // StoreField to this offset in the future, so we cannot optimize |
- // current_node away. However, we will record the current StoreField |
- // in futureStore, and continue ascending up the chain. |
- TRACE("#%d[[+%d]] -- wide, key not present", current_node->id(), |
- offset); |
- } else if (insert_result.first->second != object_input) { |
- // Key was present, and the value did not equal object_input. This |
- // means |
- // that there is a StoreField to this offset in the future, but the |
- // object instance comes from a different Node. We pessimistically |
- // assume that we cannot optimize current_node away. However, we will |
- // record the current StoreField in futureStore, and continue |
- // ascending up the chain. |
- insert_result.first->second = object_input; |
- TRACE("#%d[[+%d]] -- wide, diff object", current_node->id(), offset); |
- } else { |
- // Key was present, and the value equalled object_input. This means |
- // that soon after in the effect chain, we will do a StoreField to the |
- // same object with the same offset, therefore current_node can be |
- // optimized away. We don't need to update futureStore. |
- |
- Node* previous_effect = NodeProperties::GetEffectInput(current_node); |
- |
- NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, |
- nullptr, nullptr); |
- current_node->Kill(); |
- TRACE("#%d[[+%d]] -- wide, eliminated", current_node->id(), offset); |
- } |
+ // Find the map entry. |
+ ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); |
+ |
+ bool present = find_result != futureStore.end(); |
+ Node* value = present ? find_result->second : nullptr; |
+ |
+ if (present && value == object_input && AtMostTagged(access)) { |
+ // Key was present, and the value equalled object_input. This means |
+ // that soon after in the effect chain, we will do a StoreField to the |
+ // same object with the same offset, therefore current_node can be |
+ // optimized away. Also, the future StoreField is at least as big as this |
+ // one. |
+ // |
+ // We don't need to update futureStore. |
+ |
+ Node* previous_effect = NodeProperties::GetEffectInput(current_node); |
+ |
+ NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, |
+ nullptr, nullptr); |
+ current_node->Kill(); |
+ TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", |
+ current_node->id(), offset, |
+ MachineReprToString(access.machine_type.representation()), |
+ object_input->id()); |
+ } else if (present && value == object_input && !AtMostTagged(access)) { |
+ TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", |
+ current_node->id(), offset, |
+ MachineReprToString(access.machine_type.representation()), |
+ object_input->id()); |
+ } else if (present && value != object_input && AtLeastTagged(access)) { |
+ // Key was present, and the value did not equal object_input. This means |
+ // that there is a StoreField to this offset in the future, but the |
+ // object instance comes from a different Node. We pessimistically |
+ // assume that we cannot optimize current_node away. However, we will |
+ // guess that the current StoreField is more relevant than the future |
+ // one, record the current StoreField in futureStore instead, and |
+ // continue ascending up the chain. |
+ find_result->second = object_input; |
+ TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", |
+ current_node->id(), offset, |
+ MachineReprToString(access.machine_type.representation()), |
+ object_input->id()); |
+ } else if (!present && AtLeastTagged(access)) { |
+ // Key was not present. This means that there is no matching |
+ // StoreField to this offset in the future, so we cannot optimize |
+ // current_node away. However, we will record the current StoreField |
+ // in futureStore, and continue ascending up the chain. |
+ futureStore.insert(std::make_pair(offset, object_input)); |
+ TRACE( |
+ "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", |
+ current_node->id(), offset, |
+ MachineReprToString(access.machine_type.representation()), |
+ object_input->id()); |
+ } else if (!AtLeastTagged(access)) { |
+ TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), |
+ offset, MachineReprToString(access.machine_type.representation()), |
+ object_input->id()); |
} else { |
- TRACE("#%d[[+%d]] -- narrow, not eliminated", current_node->id(), offset); |
+ UNREACHABLE(); |
} |
// Regardless of whether we eliminated node {current}, we want to |