OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/store-store-elimination.h" | 5 #include "src/compiler/store-store-elimination.h" |
6 | 6 |
7 #include "src/compiler/all-nodes.h" | 7 #include "src/compiler/all-nodes.h" |
8 #include "src/compiler/js-graph.h" | 8 #include "src/compiler/js-graph.h" |
9 #include "src/compiler/node-properties.h" | 9 #include "src/compiler/node-properties.h" |
10 #include "src/compiler/simplified-operator.h" | 10 #include "src/compiler/simplified-operator.h" |
(...skipping 24 matching lines...) Expand all Loading... |
35 // same object will be stored to in the future. If off_i == off_j and xi == xj | 35 // same object will be stored to in the future. If off_i == off_j and xi == xj |
36 // and i < j, then we optimize the i'th StoreField away. | 36 // and i < j, then we optimize the i'th StoreField away. |
37 // | 37 // |
38 // This optimization should be initiated on the last StoreField in such a | 38 // This optimization should be initiated on the last StoreField in such a |
39 // sequence. | 39 // sequence. |
40 // | 40 // |
41 // The algorithm works by walking the effect chain from the last StoreField | 41 // The algorithm works by walking the effect chain from the last StoreField |
42 // upwards. While walking, we maintain a map {futureStore} from offsets to | 42 // upwards. While walking, we maintain a map {futureStore} from offsets to |
43 // nodes; initially it is empty. As we walk the effect chain upwards, if | 43 // nodes; initially it is empty. As we walk the effect chain upwards, if |
44 // futureStore[off] = n, then any store to node {n} with offset {off} is | 44 // futureStore[off] = n, then any store to node {n} with offset {off} is |
45 // guaranteed to be useless because we do a full-width[1] store to that offset | 45 // guaranteed to be useless because we do a tagged-width[2] store to that |
46 // of that object in the near future anyway. For example, for this effect | 46 // offset of that object in the near future anyway. For example, for this |
47 // chain | 47 // effect chain |
48 // | 48 // |
49 // 71: StoreField(60, 0) | 49 // 71: StoreField(60, 0) |
50 // 72: StoreField(65, 8) | 50 // 72: StoreField(65, 8) |
51 // 73: StoreField(63, 8) | 51 // 73: StoreField(63, 8) |
52 // 74: StoreField(65, 16) | 52 // 74: StoreField(65, 16) |
53 // 75: StoreField(62, 8) | 53 // 75: StoreField(62, 8) |
54 // | 54 // |
55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. | 55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. |
56 // | 56 // |
57 // Here is the complete process. | 57 // Here is the complete process. |
58 // | 58 // |
59 // - We are at the end of a sequence of consecutive StoreFields. | 59 // - We are at the end of a sequence of consecutive StoreFields. |
60 // - We start out with futureStore = empty. | 60 // - We start out with futureStore = empty. |
61 // - We then walk the effect chain upwards to find the next StoreField [2]. | 61 // - We then walk the effect chain upwards to find the next StoreField [1]. |
62 // | 62 // |
63 // 1. If the offset is not a key of {futureStore} yet, we put it in. | 63 // 1. If the offset is not a key of {futureStore} yet, we put it in. |
64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a | 64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a |
65 // different node, we overwrite futureStore[offset] with the current node. | 65 // different node, we overwrite futureStore[offset] with the current node. |
66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals | 66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals |
67 // this node, we eliminate this StoreField. | 67 // this node, we eliminate this StoreField. |
68 // | 68 // |
69 // As long as the current effect input points to a node with a single effect | 69 // As long as the current effect input points to a node with a single effect |
70 // output, and as long as its opcode is StoreField, we keep traversing | 70 // output, and as long as its opcode is StoreField, we keep traversing |
71 // upwards. | 71 // upwards. |
72 // | 72 // |
73 // [1] This optimization is unsound if we optimize away a store to an offset | |
74 // because we store to the same offset in the future, even though the future | |
75 // store is narrower than the store we optimize away. Therefore, in case (1) | |
76 // and (2) we only add/overwrite to the dictionary when the field access has | |
77 // maximal size. For simplicity of implementation, we do not try to detect | |
78 // case (3). | |
79 // | 73 // |
80 // [2] We make sure that we only traverse the linear part, that is, the part | 74 // |
| 75 // footnotes: |
| 76 // |
| 77 // [1] We make sure that we only traverse the linear part, that is, the part |
81 // where every node has exactly one incoming and one outgoing effect edge. | 78 // where every node has exactly one incoming and one outgoing effect edge. |
82 // Also, we only keep walking upwards as long as we keep finding consecutive | 79 // Also, we only keep walking upwards as long as we keep finding consecutive |
83 // StoreFields on the same node. | 80 // StoreFields on the same node. |
| 81 // |
| 82 // [2] This optimization is sound only in certain cases. Specifically, the |
| 83 // presence of a future store to {off} by itself does not automatically mean |
| 84 // that earlier stores to {off} are superfluous: a future narrow store does |
| 85 // not obviate an earlier wide store. However, future stores of a certain |
| 86 // size do obviate stores to the same offset of lesser or equal size. |
| 87 // |
| 88 // It turns out that we are most interested in stores of "tagged" size, |
| 89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In |
| 90 // {futureStore}, we record future writes that are of at least this size. |
| 91 // The three cases are actually a bit more subtle. |
| 92 // |
| 93 // 1. If the offset is not a key of {futureStore} and the StoreField is of |
| 94 // "tagged" size or wider, then we put it in. |
| 95 // 2. If the offset is present in {futureStore} but the value is different, |
| 96 // then we overwrite the value if the current StoreField is of "tagged" |
| 97 // size or wider. |
| 98 // 3. If the offset is present and the value matches the node, and the |
| 99 // current StoreField is AT MOST of "tagged" size, then we eliminate this |
| 100 // StoreField. |
| 101 // |
| 102 // Examples of stores that we do not detect as superfluous: 2-byte stores |
| 103 // followed by 2-byte stores to the same offset; 16-byte stores followed by |
| 104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive |
| 105 // float64 stores as superfluous, and on x86 we do not detect consecutive |
| 106 // int32 stores as superfluous. |
| 107 |
| 108 // At a late stage, we realized that this code is more complicated than it |
| 109 // needs to be: if we store a set of pairs (offset, node), the code simplifies |
| 110 // to 3 cases instead of 6. We could even store a map from nodes to sets of |
| 111 // bytes. |
84 | 112 |
85 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) | 113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) |
86 : jsgraph_(js_graph), temp_zone_(temp_zone) {} | 114 : jsgraph_(js_graph), temp_zone_(temp_zone) {} |
87 | 115 |
88 StoreStoreElimination::~StoreStoreElimination() {} | 116 StoreStoreElimination::~StoreStoreElimination() {} |
89 | 117 |
90 void StoreStoreElimination::Run() { | 118 void StoreStoreElimination::Run() { |
91 // The store-store elimination performs work on chains of certain types of | 119 // The store-store elimination performs work on chains of certain types of |
92 // nodes. The elimination must be invoked on the lowest node in such a | 120 // nodes. The elimination must be invoked on the lowest node in such a |
93 // chain; we have a helper function IsEligibleNode that returns true | 121 // chain; we have a helper function IsEligibleNode that returns true |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
172 } | 200 } |
173 } | 201 } |
174 | 202 |
175 size_t rep_size_of(MachineRepresentation rep) { | 203 size_t rep_size_of(MachineRepresentation rep) { |
176 return ((size_t)1) << ElementSizeLog2Of(rep); | 204 return ((size_t)1) << ElementSizeLog2Of(rep); |
177 } | 205 } |
178 size_t rep_size_of(FieldAccess access) { | 206 size_t rep_size_of(FieldAccess access) { |
179 return rep_size_of(access.machine_type.representation()); | 207 return rep_size_of(access.machine_type.representation()); |
180 } | 208 } |
181 | 209 |
| 210 bool AtMostTagged(FieldAccess access) { |
| 211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
| 212 } |
| 213 |
| 214 bool AtLeastTagged(FieldAccess access) { |
| 215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
| 216 } |
| 217 |
182 } // namespace | 218 } // namespace |
183 | 219 |
184 bool StoreStoreElimination::IsEligibleNode(Node* node) { | 220 bool StoreStoreElimination::IsEligibleNode(Node* node) { |
185 return (node->op()->opcode() == IrOpcode::kStoreField) && | 221 return (node->op()->opcode() == IrOpcode::kStoreField) && |
186 IsEndOfStoreFieldChain(node); | 222 IsEndOfStoreFieldChain(node); |
187 } | 223 } |
188 | 224 |
189 void StoreStoreElimination::ReduceEligibleNode(Node* node) { | 225 void StoreStoreElimination::ReduceEligibleNode(Node* node) { |
190 DCHECK(IsEligibleNode(node)); | 226 DCHECK(IsEligibleNode(node)); |
191 | 227 |
(...skipping 10 matching lines...) Expand all Loading... |
202 | 238 |
203 Node* current_node = node; | 239 Node* current_node = node; |
204 | 240 |
205 do { | 241 do { |
206 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); | 242 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); |
207 Offset offset = ToOffset(access); | 243 Offset offset = ToOffset(access); |
208 Node* object_input = current_node->InputAt(0); | 244 Node* object_input = current_node->InputAt(0); |
209 | 245 |
210 Node* previous = PreviousEffectBeforeStoreField(current_node); | 246 Node* previous = PreviousEffectBeforeStoreField(current_node); |
211 | 247 |
212 CHECK(rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged)); | 248 // Find the map entry. |
213 if (rep_size_of(access) == rep_size_of(MachineRepresentation::kTagged)) { | 249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); |
214 // Try to insert. If it was present, this will preserve the original | |
215 // value. | |
216 auto insert_result = | |
217 futureStore.insert(std::make_pair(offset, object_input)); | |
218 if (insert_result.second) { | |
219 // Key was not present. This means that there is no matching | |
220 // StoreField to this offset in the future, so we cannot optimize | |
221 // current_node away. However, we will record the current StoreField | |
222 // in futureStore, and continue ascending up the chain. | |
223 TRACE("#%d[[+%d]] -- wide, key not present", current_node->id(), | |
224 offset); | |
225 } else if (insert_result.first->second != object_input) { | |
226 // Key was present, and the value did not equal object_input. This | |
227 // means | |
228 // that there is a StoreField to this offset in the future, but the | |
229 // object instance comes from a different Node. We pessimistically | |
230 // assume that we cannot optimize current_node away. However, we will | |
231 // record the current StoreField in futureStore, and continue | |
232 // ascending up the chain. | |
233 insert_result.first->second = object_input; | |
234 TRACE("#%d[[+%d]] -- wide, diff object", current_node->id(), offset); | |
235 } else { | |
236 // Key was present, and the value equalled object_input. This means | |
237 // that soon after in the effect chain, we will do a StoreField to the | |
238 // same object with the same offset, therefore current_node can be | |
239 // optimized away. We don't need to update futureStore. | |
240 | 250 |
241 Node* previous_effect = NodeProperties::GetEffectInput(current_node); | 251 bool present = find_result != futureStore.end(); |
| 252 Node* value = present ? find_result->second : nullptr; |
242 | 253 |
243 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, | 254 if (present && value == object_input && AtMostTagged(access)) { |
244 nullptr, nullptr); | 255 // Key was present, and the value equalled object_input. This means |
245 current_node->Kill(); | 256 // that soon after in the effect chain, we will do a StoreField to the |
246 TRACE("#%d[[+%d]] -- wide, eliminated", current_node->id(), offset); | 257 // same object with the same offset, therefore current_node can be |
247 } | 258 // optimized away. Also, the future StoreField is at least as big as this |
| 259 // one. |
| 260 // |
| 261 // We don't need to update futureStore. |
| 262 |
| 263 Node* previous_effect = NodeProperties::GetEffectInput(current_node); |
| 264 |
| 265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, |
| 266 nullptr, nullptr); |
| 267 current_node->Kill(); |
| 268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", |
| 269 current_node->id(), offset, |
| 270 MachineReprToString(access.machine_type.representation()), |
| 271 object_input->id()); |
| 272 } else if (present && value == object_input && !AtMostTagged(access)) { |
| 273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", |
| 274 current_node->id(), offset, |
| 275 MachineReprToString(access.machine_type.representation()), |
| 276 object_input->id()); |
| 277 } else if (present && value != object_input && AtLeastTagged(access)) { |
| 278 // Key was present, and the value did not equal object_input. This means |
| 279 // that there is a StoreField to this offset in the future, but the |
| 280 // object instance comes from a different Node. We pessimistically |
| 281 // assume that we cannot optimize current_node away. However, we will |
| 282 // guess that the current StoreField is more relevant than the future |
| 283 // one, record the current StoreField in futureStore instead, and |
| 284 // continue ascending up the chain. |
| 285 find_result->second = object_input; |
| 286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", |
| 287 current_node->id(), offset, |
| 288 MachineReprToString(access.machine_type.representation()), |
| 289 object_input->id()); |
| 290 } else if (!present && AtLeastTagged(access)) { |
| 291 // Key was not present. This means that there is no matching |
| 292 // StoreField to this offset in the future, so we cannot optimize |
| 293 // current_node away. However, we will record the current StoreField |
| 294 // in futureStore, and continue ascending up the chain. |
| 295 futureStore.insert(std::make_pair(offset, object_input)); |
| 296 TRACE( |
| 297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", |
| 298 current_node->id(), offset, |
| 299 MachineReprToString(access.machine_type.representation()), |
| 300 object_input->id()); |
| 301 } else if (!AtLeastTagged(access)) { |
| 302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), |
| 303 offset, MachineReprToString(access.machine_type.representation()), |
| 304 object_input->id()); |
248 } else { | 305 } else { |
249 TRACE("#%d[[+%d]] -- narrow, not eliminated", current_node->id(), offset); | 306 UNREACHABLE(); |
250 } | 307 } |
251 | 308 |
252 // Regardless of whether we eliminated node {current}, we want to | 309 // Regardless of whether we eliminated node {current}, we want to |
253 // continue walking up the effect chain. | 310 // continue walking up the effect chain. |
254 | 311 |
255 current_node = previous; | 312 current_node = previous; |
256 } while (current_node != nullptr && | 313 } while (current_node != nullptr && |
257 current_node->op()->opcode() == IrOpcode::kStoreField); | 314 current_node->op()->opcode() == IrOpcode::kStoreField); |
258 | 315 |
259 TRACE("finished"); | 316 TRACE("finished"); |
260 } | 317 } |
261 | 318 |
262 } // namespace compiler | 319 } // namespace compiler |
263 } // namespace internal | 320 } // namespace internal |
264 } // namespace v8 | 321 } // namespace v8 |
OLD | NEW |