Chromium Code Reviews| Index: src/compiler/store-store-elimination.cc |
| diff --git a/src/compiler/store-store-elimination.cc b/src/compiler/store-store-elimination.cc |
| index a469b209820ad5dab95c257504b38cbc1b2ada3b..99e66138b8dec522dfc95dbcd65cec3a9a65ac71 100644 |
| --- a/src/compiler/store-store-elimination.cc |
| +++ b/src/compiler/store-store-elimination.cc |
| @@ -42,9 +42,9 @@ namespace compiler { |
| // upwards. While walking, we maintain a map {futureStore} from offsets to |
| // nodes; initially it is empty. As we walk the effect chain upwards, if |
| // futureStore[off] = n, then any store to node {n} with offset {off} is |
| -// guaranteed to be useless because we do a full-width[1] store to that offset |
| -// of that object in the near future anyway. For example, for this effect |
| -// chain |
| +// guaranteed to be useless because we do a tagged-width[2] store to that |
| +// offset of that object in the near future anyway. For example, for this |
| +// effect chain |
| // |
| // 71: StoreField(60, 0) |
| // 72: StoreField(65, 8) |
| @@ -58,7 +58,7 @@ namespace compiler { |
| // |
| // - We are at the end of a sequence of consecutive StoreFields. |
| // - We start out with futureStore = empty. |
| -// - We then walk the effect chain upwards to find the next StoreField [2]. |
| +// - We then walk the effect chain upwards to find the next StoreField [1]. |
| // |
| // 1. If the offset is not a key of {futureStore} yet, we put it in. |
| // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a |
| @@ -70,17 +70,45 @@ namespace compiler { |
| // output, and as long as its opcode is StoreField, we keep traversing |
| // upwards. |
| // |
| -// [1] This optimization is unsound if we optimize away a store to an offset |
| -// because we store to the same offset in the future, even though the future |
| -// store is narrower than the store we optimize away. Therefore, in case (1) |
| -// and (2) we only add/overwrite to the dictionary when the field access has |
| -// maximal size. For simplicity of implementation, we do not try to detect |
| -// case (3). |
| // |
| -// [2] We make sure that we only traverse the linear part, that is, the part |
| +// |
| +// footnotes: |
| +// |
| +// [1] We make sure that we only traverse the linear part, that is, the part |
| // where every node has exactly one incoming and one outgoing effect edge. |
| // Also, we only keep walking upwards as long as we keep finding consecutive |
| // StoreFields on the same node. |
| +// |
| +// [2] This optimization is sound only in certain cases. Specifically, the |
| +// presence of a future store to {off} by itself does not automatically mean |
| +// that earlier stores to {off} are superfluous: a future narrow store does |
| +// not obviate an earlier wide store. However, future stores of a certain |
| +// size do obviate stores to the same offset of lesser or equal size. |
| +// |
| +// It turns out that we are most interested in stores of "tagged" size, |
| +// which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In |
| +// {futureStore}, we record future writes that are of at least this size. |
| +// The three cases are actually a bit more subtle. |
| +// |
| +// 1. If the offset is not a key of {futureStore} and the StoreField is of |
| +// "tagged" size or wider, then we put it in. |
| +// 2. If the offset is present in {futureStore} but the value is different, |
| +// then we overwrite the value if the current StoreField is of "tagged" |
| +// size or wider. |
| +// 3. If the offset is present and the value matches the node, and the |
| +// current StoreField is AT MOST of "tagged" size, then we eliminate this |
| +// StoreField. |
| +// |
| +// Examples of stores that we do not detect as superfluous: 2-byte stores |
| +// followed by 2-byte stores to the same offset; 16-byte stores followed by |
| +// 16-byte stores to the same offset. On ia32, we do not detect consecutive |
| +// float64 stores as superfluous, and on x86 we do not detect consecutive |
| +// int32 stores as superfluous. |
| + |
| +// At a late stage, we realized that this code is more complicated than it |
| +// needs to be: if we store a set of pairs (offset, node), the code simplifies |
| +// to 3 cases instead of 6. We could even store a map from nodes to sets of |
| +// bytes. |
| StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) |
| : jsgraph_(js_graph), temp_zone_(temp_zone) {} |
| @@ -179,6 +207,14 @@ size_t rep_size_of(FieldAccess access) { |
| return rep_size_of(access.machine_type.representation()); |
| } |
| +bool LeqTagged(FieldAccess access) { |
|
Jarin
2016/06/28 21:30:55
How about AtMostTagged and AtLeastTagged?
bgeron
2016/06/29 08:31:00
Good idea. Changed.
|
| + return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
| +} |
| + |
| +bool GeqTagged(FieldAccess access) { |
| + return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
| +} |
| + |
| } // namespace |
| bool StoreStoreElimination::IsEligibleNode(Node* node) { |
| @@ -209,44 +245,64 @@ void StoreStoreElimination::ReduceEligibleNode(Node* node) { |
| Node* previous = PreviousEffectBeforeStoreField(current_node); |
| - CHECK(rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged)); |
| - if (rep_size_of(access) == rep_size_of(MachineRepresentation::kTagged)) { |
| - // Try to insert. If it was present, this will preserve the original |
| - // value. |
| - auto insert_result = |
| - futureStore.insert(std::make_pair(offset, object_input)); |
| - if (insert_result.second) { |
| - // Key was not present. This means that there is no matching |
| - // StoreField to this offset in the future, so we cannot optimize |
| - // current_node away. However, we will record the current StoreField |
| - // in futureStore, and continue ascending up the chain. |
| - TRACE("#%d[[+%d]] -- wide, key not present", current_node->id(), |
| - offset); |
| - } else if (insert_result.first->second != object_input) { |
| - // Key was present, and the value did not equal object_input. This |
| - // means |
| - // that there is a StoreField to this offset in the future, but the |
| - // object instance comes from a different Node. We pessimistically |
| - // assume that we cannot optimize current_node away. However, we will |
| - // record the current StoreField in futureStore, and continue |
| - // ascending up the chain. |
| - insert_result.first->second = object_input; |
| - TRACE("#%d[[+%d]] -- wide, diff object", current_node->id(), offset); |
| - } else { |
| - // Key was present, and the value equalled object_input. This means |
| - // that soon after in the effect chain, we will do a StoreField to the |
| - // same object with the same offset, therefore current_node can be |
| - // optimized away. We don't need to update futureStore. |
| - |
| - Node* previous_effect = NodeProperties::GetEffectInput(current_node); |
| - |
| - NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, |
| - nullptr, nullptr); |
| - current_node->Kill(); |
| - TRACE("#%d[[+%d]] -- wide, eliminated", current_node->id(), offset); |
| - } |
| + // Find the map entry. |
| + ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); |
| + |
| + bool present = find_result != futureStore.end(); |
| + Node* value = present ? find_result->second : nullptr; |
| + |
| + if (present && value == object_input && LeqTagged(access)) { |
| + // Key was present, and the value equalled object_input. This means |
| + // that soon after in the effect chain, we will do a StoreField to the |
| + // same object with the same offset, therefore current_node can be |
| + // optimized away. Also, the future StoreField is at least as big as this |
| + // one. |
| + // |
| + // We don't need to update futureStore. |
| + |
| + Node* previous_effect = NodeProperties::GetEffectInput(current_node); |
| + |
| + NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, |
| + nullptr, nullptr); |
| + current_node->Kill(); |
| + TRACE("#%d[[+%d,%s]](#%d) -- tagged size, eliminated", current_node->id(), |
| + offset, MachineReprToString(access.machine_type.representation()), |
| + object_input->id()); |
| + } else if (present && value == object_input && !LeqTagged(access)) { |
| + TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", |
| + current_node->id(), offset, |
| + MachineReprToString(access.machine_type.representation()), |
| + object_input->id()); |
| + } else if (present && value != object_input && GeqTagged(access)) { |
| + // Key was present, and the value did not equal object_input. This means |
| + // that there is a StoreField to this offset in the future, but the |
| + // object instance comes from a different Node. We pessimistically |
| + // assume that we cannot optimize current_node away. However, we will |
| + // guess that the current StoreField is more relevant than the future |
| + // one, record the current StoreField in futureStore instead, and |
| + // continue ascending up the chain. |
| + find_result->second = object_input; |
| + TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", |
| + current_node->id(), offset, |
| + MachineReprToString(access.machine_type.representation()), |
| + object_input->id()); |
| + } else if (!present && GeqTagged(access)) { |
| + // Key was not present. This means that there is no matching |
| + // StoreField to this offset in the future, so we cannot optimize |
| + // current_node away. However, we will record the current StoreField |
| + // in futureStore, and continue ascending up the chain. |
| + futureStore.insert(std::make_pair(offset, object_input)); |
| + TRACE( |
| + "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", |
| + current_node->id(), offset, |
| + MachineReprToString(access.machine_type.representation()), |
| + object_input->id()); |
| + } else if (!GeqTagged(access)) { |
| + TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), |
| + offset, MachineReprToString(access.machine_type.representation()), |
| + object_input->id()); |
| } else { |
| - TRACE("#%d[[+%d]] -- narrow, not eliminated", current_node->id(), offset); |
| + UNREACHABLE(); |
| } |
| // Regardless of whether we eliminated node {current}, we want to |