Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <iterator> | |
| 6 | |
| 5 #include "src/compiler/store-store-elimination.h" | 7 #include "src/compiler/store-store-elimination.h" |
| 6 | 8 |
| 7 #include "src/compiler/all-nodes.h" | 9 #include "src/compiler/all-nodes.h" |
| 8 #include "src/compiler/js-graph.h" | 10 #include "src/compiler/js-graph.h" |
| 9 #include "src/compiler/node-properties.h" | 11 #include "src/compiler/node-properties.h" |
| 10 #include "src/compiler/simplified-operator.h" | 12 #include "src/compiler/simplified-operator.h" |
| 11 | 13 |
| 12 namespace v8 { | 14 namespace v8 { |
| 13 namespace internal { | 15 namespace internal { |
| 14 namespace compiler { | 16 namespace compiler { |
| 15 | 17 |
| 16 #define TRACE(fmt, ...) \ | 18 #define TRACE(fmt, ...) \ |
| 17 do { \ | 19 do { \ |
| 18 if (FLAG_trace_store_elimination) { \ | 20 if (FLAG_trace_store_elimination) { \ |
| 19 PrintF("StoreStoreElimination::ReduceEligibleNode: " fmt "\n", \ | 21 PrintF("RedundantStoreFinder: " fmt "\n", ##__VA_ARGS__); \ |
| 20 ##__VA_ARGS__); \ | 22 } \ |
| 21 } \ | |
| 22 } while (false) | 23 } while (false) |
| 23 | 24 |
| 24 // A simple store-store elimination. When the effect chain contains the | 25 // CHECK_EXTRA is like CHECK, but has two or more arguments: a boolean |
| 25 // following sequence, | 26 // expression, a format string, and any number of extra arguments. The boolean |
| 26 // | 27 // expression will be evaluated at runtime. If it evaluates to false, then an |
| 27 // - StoreField[[+off_1]](x1, y1) | 28 // error message will be shown containing the condition, as well as the extra |
| 28 // - StoreField[[+off_2]](x2, y2) | 29 // info formatted like with printf. |
| 29 // - StoreField[[+off_3]](x3, y3) | 30 #define CHECK_EXTRA(condition, fmt, ...) \ |
| 30 // ... | 31 do { \ |
| 31 // - StoreField[[+off_n]](xn, yn) | 32 if (V8_UNLIKELY(!(condition))) { \ |
| 32 // | 33 V8_Fatal(__FILE__, __LINE__, "Check failed: %s. Extra info: " fmt, \ |
| 33 // where the xes are the objects and the ys are the values to be stored, then | 34 #condition, ##__VA_ARGS__); \ |
| 34 // we are going to say that a store is superfluous if the same offset of the | 35 } \ |
| 35 // same object will be stored to in the future. If off_i == off_j and xi == xj | 36 } while (0) |
| 36 // and i < j, then we optimize the i'th StoreField away. | 37 |
| 37 // | 38 #ifdef DEBUG |
| 38 // This optimization should be initiated on the last StoreField in such a | 39 #define DCHECK_EXTRA(condition, fmt, ...) \ |
| 39 // sequence. | 40 CHECK_EXTRA(condition, fmt, ##__VA_ARGS__) |
| 40 // | 41 #else |
| 41 // The algorithm works by walking the effect chain from the last StoreField | 42 #define DCHECK_EXTRA(condition, fmt, ...) ((void)0) |
| 42 // upwards. While walking, we maintain a map {futureStore} from offsets to | 43 #endif |
| 43 // nodes; initially it is empty. As we walk the effect chain upwards, if | 44 |
| 44 // futureStore[off] = n, then any store to node {n} with offset {off} is | 45 // Store-store elimination. |
| 45 // guaranteed to be useless because we do a tagged-width[2] store to that | 46 // |
| 46 // offset of that object in the near future anyway. For example, for this | 47 // The aim of this optimization is to detect the following pattern in the |
| 47 // effect chain | 48 // effect graph: |
| 48 // | 49 // |
| 49 // 71: StoreField(60, 0) | 50 // - StoreField[+24, kRepTagged](263, ...) |
| 50 // 72: StoreField(65, 8) | 51 // |
| 51 // 73: StoreField(63, 8) | 52 // ... lots of nodes from which the field at offset 24 of the object |
| 52 // 74: StoreField(65, 16) | 53 // returned by node #263 cannot be observed ... |
| 53 // 75: StoreField(62, 8) | 54 // |
| 54 // | 55 // - StoreField[+24, kRepTagged](263, ...) |
| 55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. | 56 // |
| 56 // | 57 // In such situations, the earlier StoreField cannot be observed, and can be |
| 57 // Here is the complete process. | 58 // eliminated. This optimization should work for any offset and input node, of |
| 58 // | 59 // course. |
| 59 // - We are at the end of a sequence of consecutive StoreFields. | 60 // |
| 60 // - We start out with futureStore = empty. | 61 // The optimization also works across splits. It currently does not work for |
| 61 // - We then walk the effect chain upwards to find the next StoreField [1]. | 62 // loops, because we tend to put a stack check in loops, and like deopts, |
| 62 // | 63 // stack checks can observe anything. |
| 63 // 1. If the offset is not a key of {futureStore} yet, we put it in. | 64 |
| 64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a | 65 // Assumption: every byte of a JS object is only ever accessed through one |
| 65 // different node, we overwrite futureStore[offset] with the current node. | 66 // offset. For instance, byte 15 of a given object may be accessed using a |
| 66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals | 67 // two-byte read at offset 14, or a four-byte read at offset 12, but never |
| 67 // this node, we eliminate this StoreField. | 68 // both in the same program. |
| 68 // | 69 // |
| 69 // As long as the current effect input points to a node with a single effect | 70 // This implementation needs all dead nodes removed from the graph, and the |
| 70 // output, and as long as its opcode is StoreField, we keep traversing | 71 // graph should be trimmed. |
| 71 // upwards. | |
| 72 // | |
| 73 // | |
| 74 // | |
| 75 // footnotes: | |
| 76 // | |
| 77 // [1] We make sure that we only traverse the linear part, that is, the part | |
| 78 // where every node has exactly one incoming and one outgoing effect edge. | |
| 79 // Also, we only keep walking upwards as long as we keep finding consecutive | |
| 80 // StoreFields on the same node. | |
| 81 // | |
| 82 // [2] This optimization is sound only in certain cases. Specifically, the | |
| 83 // presence of a future store to {off} by itself does not automatically mean | |
| 84 // that earlier stores to {off} are superfluous: a future narrow store does | |
| 85 // not obviate an earlier wide store. However, future stores of a certain | |
| 86 // size do obviate stores to the same offset of lesser or equal size. | |
| 87 // | |
| 88 // It turns out that we are most interested in stores of "tagged" size, | |
| 89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In | |
| 90 // {futureStore}, we record future writes that are of at least this size. | |
| 91 // The three cases are actually a bit more subtle. | |
| 92 // | |
| 93 // 1. If the offset is not a key of {futureStore} and the StoreField is of | |
| 94 // "tagged" size or wider, then we put it in. | |
| 95 // 2. If the offset is present in {futureStore} but the value is different, | |
| 96 // then we overwrite the value if the current StoreField is of "tagged" | |
| 97 // size or wider. | |
| 98 // 3. If the offset is present and the value matches the node, and the | |
| 99 // current StoreField is AT MOST of "tagged" size, then we eliminate this | |
| 100 // StoreField. | |
| 101 // | |
| 102 // Examples of stores that we do not detect as superfluous: 2-byte stores | |
| 103 // followed by 2-byte stores to the same offset; 16-byte stores followed by | |
| 104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive | |
| 105 // float64 stores as superfluous, and on x86 we do not detect consecutive | |
| 106 // int32 stores as superfluous. | |
| 107 | |
| 108 // At a late stage, we realized that this code is more complicated than it | |
| 109 // needs to be: if we store a set of pairs (offset, node), the code simplifies | |
| 110 // to 3 cases instead of 6. We could even store a map from nodes to sets of | |
| 111 // bytes. | |
| 112 | |
| 113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) | |
| 114 : jsgraph_(js_graph), temp_zone_(temp_zone) {} | |
| 115 | |
| 116 StoreStoreElimination::~StoreStoreElimination() {} | |
| 117 | |
| 118 void StoreStoreElimination::Run() { | |
| 119 // The store-store elimination performs work on chains of certain types of | |
| 120 // nodes. The elimination must be invoked on the lowest node in such a | |
| 121 // chain; we have a helper function IsEligibleNode that returns true | |
| 122 // precisely on the lowest node in such a chain. | |
| 123 // | |
| 124 // Because the elimination removes nodes from the graph, even remove nodes | |
| 125 // that the elimination was not invoked on, we cannot use a normal | |
| 126 // AdvancedReducer but we manually find which nodes to invoke the | |
| 127 // elimination on. Then in a next step, we invoke the elimination for each | |
| 128 // node that was eligible. | |
| 129 | |
| 130 NodeVector eligible(temp_zone()); // loops over all nodes | |
| 131 AllNodes all(temp_zone(), jsgraph()->graph()); | |
| 132 | |
| 133 for (Node* node : all.live) { | |
| 134 if (IsEligibleNode(node)) { | |
| 135 eligible.push_back(node); | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 for (Node* node : eligible) { | |
| 140 ReduceEligibleNode(node); | |
| 141 } | |
| 142 } | |
| 143 | 72 |
| 144 namespace { | 73 namespace { |
| 145 | 74 |
| 146 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too | 75 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too |
| 147 // few. | 76 // few. |
| 148 typedef uint16_t Offset; | 77 typedef uint16_t StoreOffset; |
| 78 | |
| 79 struct UnobservableStore { | |
| 80 NodeId id_; | |
| 81 StoreOffset offset_; | |
| 82 | |
| 83 bool operator==(const UnobservableStore) const; | |
| 84 bool operator!=(const UnobservableStore) const; | |
| 85 bool operator<(const UnobservableStore) const; | |
| 86 }; | |
| 87 | |
| 88 } // namespace | |
| 89 | |
| 90 namespace { | |
| 91 | |
| 92 // Instances of UnobservablesSet are immutable. They represent either a set of | |
| 93 // UnobservableStores, or the "unvisited empty set". | |
| 94 // | |
| 95 // We apply some sharing to save memory. The class UnobservablesSet is only a | |
| 96 // pointer wide, and a copy does not use any heap (or temp_zone) memory. Most | |
| 97 // changes to an UnobservablesSet might allocate in the temp_zone. | |
| 98 // | |
| 99 // The size of an instance should be the size of a pointer, plus additional | |
| 100 // space in the zone in the case of non-unvisited UnobservablesSets. Copying | |
| 101 // an UnobservablesSet allocates no memory. | |
| 102 class UnobservablesSet final { | |
| 103 public: | |
| 104 static UnobservablesSet Unvisited(); | |
| 105 static UnobservablesSet VisitedEmpty(Zone* zone); | |
| 106 UnobservablesSet(); // unvisited | |
| 107 UnobservablesSet(const UnobservablesSet& other) : set_(other.set_) {} | |
| 108 | |
| 109 UnobservablesSet Intersect(UnobservablesSet other, Zone* zone) const; | |
| 110 UnobservablesSet Add(UnobservableStore obs, Zone* zone) const; | |
| 111 UnobservablesSet RemoveSameOffset(StoreOffset off, Zone* zone) const; | |
| 112 | |
| 113 const ZoneSet<UnobservableStore>* set() const { return set_; } | |
| 114 | |
| 115 bool IsUnvisited() const { return set_ == nullptr; } | |
| 116 bool IsEmpty() const { return set_ == nullptr || set_->empty(); } | |
| 117 bool Contains(UnobservableStore obs) const { | |
| 118 return set_ != nullptr && (set_->find(obs) != set_->end()); | |
| 119 } | |
| 120 | |
| 121 bool operator==(const UnobservablesSet&) const; | |
| 122 bool operator!=(const UnobservablesSet&) const; | |
| 123 | |
| 124 private: | |
| 125 explicit UnobservablesSet(const ZoneSet<UnobservableStore>* set) | |
| 126 : set_(set) {} | |
| 127 const ZoneSet<UnobservableStore>* set_; | |
| 128 }; | |
| 129 | |
| 130 } // namespace | |
| 131 | |
| 132 namespace { | |
| 133 | |
| 134 class RedundantStoreFinder final { | |
| 135 public: | |
| 136 RedundantStoreFinder(JSGraph* js_graph, Zone* temp_zone); | |
| 137 | |
| 138 void Find(); | |
| 139 | |
| 140 const ZoneSet<Node*>& to_remove_const() { return to_remove_; } | |
| 141 | |
| 142 virtual void Visit(Node* node); | |
| 143 | |
| 144 private: | |
| 145 static bool IsEffectful(Node* node); | |
| 146 void VisitEffectfulNode(Node* node); | |
| 147 UnobservablesSet RecomputeUseIntersection(Node* node); | |
| 148 UnobservablesSet RecomputeSet(Node* node, UnobservablesSet uses); | |
| 149 static bool CannotObserveStoreField(Node* node); | |
| 150 static bool CanObserveAnything(Node* node); | |
| 151 | |
| 152 void MarkForRevisit(Node* node); | |
| 153 bool HasBeenVisited(Node* node); | |
| 154 | |
| 155 JSGraph* jsgraph() const { return jsgraph_; } | |
| 156 Zone* temp_zone() const { return temp_zone_; } | |
| 157 ZoneVector<UnobservablesSet>& unobservable() { return unobservable_; } | |
| 158 UnobservablesSet& unobservable_for_id(NodeId id) { | |
| 159 DCHECK_LT(id, unobservable().size()); | |
| 160 return unobservable()[id]; | |
| 161 } | |
| 162 ZoneSet<Node*>& to_remove() { return to_remove_; } | |
| 163 | |
| 164 JSGraph* const jsgraph_; | |
| 165 Zone* const temp_zone_; | |
| 166 | |
| 167 ZoneStack<Node*> revisit_; | |
| 168 ZoneVector<bool> in_revisit_; | |
| 169 // Maps node IDs to UnobservableNodeSets. | |
| 170 ZoneVector<UnobservablesSet> unobservable_; | |
| 171 ZoneSet<Node*> to_remove_; | |
| 172 }; | |
| 149 | 173 |
| 150 // To safely cast an offset from a FieldAccess, which has a wider range | 174 // To safely cast an offset from a FieldAccess, which has a wider range |
| 151 // (namely int). | 175 // (namely int). |
| 152 Offset ToOffset(int offset) { | 176 StoreOffset ToOffset(int offset) { |
| 153 CHECK(0 <= offset && offset < (1 << 8 * sizeof(Offset))); | 177 CHECK(0 <= offset && offset < (1 << 8 * sizeof(StoreOffset))); |
| 154 return (Offset)offset; | 178 return (StoreOffset)offset; |
| 155 } | 179 } |
| 156 | 180 |
| 157 Offset ToOffset(const FieldAccess& access) { return ToOffset(access.offset); } | 181 StoreOffset ToOffset(const FieldAccess& access) { |
| 158 | 182 return ToOffset(access.offset); |
| 159 // If node has a single effect use, return that node. If node has no or | 183 } |
| 160 // multiple effect uses, return nullptr. | 184 |
| 161 Node* SingleEffectUse(Node* node) { | 185 size_t RepSizeOf(MachineRepresentation rep) { |
| 162 Node* last_use = nullptr; | 186 return ((size_t)1) << ElementSizeLog2Of(rep); |
|
Jarin
2016/08/01 08:25:43
((size_t)1) -> 1u
bgeron
2016/08/04 12:31:02
Hm, I think that would be an unsigned int, which i
| |
| 187 } | |
| 188 size_t RepSizeOf(FieldAccess access) { | |
| 189 return RepSizeOf(access.machine_type.representation()); | |
| 190 } | |
| 191 | |
| 192 bool AtMostTagged(FieldAccess access) { | |
| 193 return RepSizeOf(access) <= RepSizeOf(MachineRepresentation::kTagged); | |
| 194 } | |
| 195 | |
| 196 bool AtLeastTagged(FieldAccess access) { | |
| 197 return RepSizeOf(access) >= RepSizeOf(MachineRepresentation::kTagged); | |
| 198 } | |
| 199 | |
| 200 } // namespace | |
| 201 | |
| 202 void RedundantStoreFinder::Find() { | |
| 203 Visit(jsgraph()->graph()->end()); | |
| 204 | |
| 205 while (!revisit_.empty()) { | |
| 206 Node* next = revisit_.top(); | |
| 207 revisit_.pop(); | |
| 208 DCHECK_LT(next->id(), in_revisit_.size()); | |
| 209 in_revisit_[next->id()] = false; | |
| 210 Visit(next); | |
| 211 } | |
| 212 | |
| 213 #ifdef DEBUG | |
| 214 // Check that we visited all the StoreFields | |
| 215 AllNodes all(temp_zone(), jsgraph()->graph()); | |
| 216 for (Node* node : all.live) { | |
| 217 if (node->op()->opcode() == IrOpcode::kStoreField) { | |
| 218 DCHECK_EXTRA(HasBeenVisited(node), "#%d:%s", node->id(), | |
| 219 node->op()->mnemonic()); | |
| 220 } | |
| 221 } | |
| 222 #endif | |
| 223 } | |
| 224 | |
| 225 void RedundantStoreFinder::MarkForRevisit(Node* node) { | |
| 226 DCHECK_LT(node->id(), in_revisit_.size()); | |
| 227 if (!in_revisit_[node->id()]) { | |
| 228 revisit_.push(node); | |
| 229 in_revisit_[node->id()] = true; | |
| 230 } | |
| 231 } | |
| 232 | |
| 233 bool RedundantStoreFinder::HasBeenVisited(Node* node) { | |
| 234 return !unobservable_for_id(node->id()).IsUnvisited(); | |
| 235 } | |
| 236 | |
| 237 void StoreStoreElimination::Run(JSGraph* js_graph, Zone* temp_zone) { | |
| 238 // Find superfluous nodes | |
| 239 RedundantStoreFinder finder(js_graph, temp_zone); | |
| 240 finder.Find(); | |
| 241 | |
| 242 // Remove superfluous nodes | |
| 243 | |
| 244 for (Node* node : finder.to_remove_const()) { | |
| 245 if (FLAG_trace_store_elimination) { | |
| 246 PrintF("StoreStoreElimination::Run: Eliminating node #%d:%s\n", | |
| 247 node->id(), node->op()->mnemonic()); | |
| 248 } | |
| 249 Node* previous_effect = NodeProperties::GetEffectInput(node); | |
| 250 NodeProperties::ReplaceUses(node, nullptr, previous_effect, nullptr, | |
| 251 nullptr); | |
| 252 node->Kill(); | |
| 253 } | |
| 254 } | |
| 255 | |
| 256 bool RedundantStoreFinder::IsEffectful(Node* node) { | |
| 257 return (node->op()->EffectInputCount() >= 1); | |
| 258 } | |
| 259 | |
| 260 // Recompute unobservables-set for a node. Will also mark superfluous nodes | |
| 261 // as to be removed. | |
| 262 | |
| 263 UnobservablesSet RedundantStoreFinder::RecomputeSet(Node* node, | |
| 264 UnobservablesSet uses) { | |
| 265 switch (node->op()->opcode()) { | |
| 266 case IrOpcode::kStoreField: { | |
| 267 Node* stored_to = node->InputAt(0); | |
| 268 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
| 269 StoreOffset offset = ToOffset(access); | |
| 270 | |
| 271 UnobservableStore observation = {stored_to->id(), offset}; | |
| 272 bool isNotObservable = uses.Contains(observation); | |
| 273 | |
| 274 if (isNotObservable && AtMostTagged(access)) { | |
| 275 TRACE(" #%d is StoreField[+%d,%s](#%d), unobservable", node->id(), | |
| 276 offset, MachineReprToString(access.machine_type.representation()), | |
| 277 stored_to->id()); | |
| 278 to_remove().insert(node); | |
| 279 return uses; | |
| 280 } else if (isNotObservable && !AtMostTagged(access)) { | |
| 281 TRACE( | |
| 282 " #%d is StoreField[+%d,%s](#%d), repeated in future but too " | |
| 283 "big to optimize away", | |
| 284 node->id(), offset, | |
| 285 MachineReprToString(access.machine_type.representation()), | |
| 286 stored_to->id()); | |
| 287 return uses; | |
| 288 } else if (!isNotObservable && AtLeastTagged(access)) { | |
| 289 TRACE(" #%d is StoreField[+%d,%s](#%d), observable, recording in set", | |
| 290 node->id(), offset, | |
| 291 MachineReprToString(access.machine_type.representation()), | |
| 292 stored_to->id()); | |
| 293 return uses.Add(observation, temp_zone()); | |
| 294 } else if (!isNotObservable && !AtLeastTagged(access)) { | |
| 295 TRACE( | |
| 296 " #%d is StoreField[+%d,%s](#%d), observable but too small to " | |
| 297 "record", | |
| 298 node->id(), offset, | |
| 299 MachineReprToString(access.machine_type.representation()), | |
| 300 stored_to->id()); | |
| 301 return uses; | |
| 302 } else { | |
| 303 UNREACHABLE(); | |
| 304 } | |
| 305 break; | |
| 306 } | |
| 307 case IrOpcode::kLoadField: { | |
| 308 Node* loaded_from = node->InputAt(0); | |
| 309 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
| 310 StoreOffset offset = ToOffset(access); | |
| 311 | |
| 312 TRACE( | |
| 313 " #%d is LoadField[+%d,%s](#%d), removing all offsets [+%d] from " | |
| 314 "set", | |
| 315 node->id(), offset, | |
| 316 MachineReprToString(access.machine_type.representation()), | |
| 317 loaded_from->id(), offset); | |
| 318 | |
| 319 return uses.RemoveSameOffset(offset, temp_zone()); | |
| 320 break; | |
| 321 } | |
| 322 default: | |
| 323 if (CannotObserveStoreField(node)) { | |
| 324 TRACE(" #%d:%s can observe nothing, set stays unchanged", node->id(), | |
| 325 node->op()->mnemonic()); | |
| 326 return uses; | |
| 327 } else if (CanObserveAnything(node)) { | |
| 328 TRACE(" #%d:%s can observe everything, recording empty set", | |
| 329 node->id(), node->op()->mnemonic()); | |
| 330 return UnobservablesSet::VisitedEmpty(temp_zone()); | |
| 331 } else { | |
| 332 // It is safe to turn this check off in the future, but it is better | |
| 333 // to list opcodes in CannotObserveStoreField, in CanObserveAnything, | |
| 334 // or if you don't know, to add another case inside this DCHECK_EXTRA. | |
| 335 DCHECK_EXTRA(node->op()->opcode() == IrOpcode::kCall, "%s", | |
| 336 node->op()->mnemonic()); | |
| 337 TRACE( | |
| 338 " cannot determine unobservables-set for #%d:%s; " | |
| 339 "conservatively recording empty set", | |
| 340 node->id(), node->op()->mnemonic()); | |
| 341 return UnobservablesSet::VisitedEmpty(temp_zone()); | |
| 342 } | |
| 343 } | |
| 344 UNREACHABLE(); | |
| 345 return UnobservablesSet::Unvisited(); | |
| 346 } | |
| 347 | |
| 348 bool RedundantStoreFinder::CannotObserveStoreField(Node* node) { | |
| 349 Operator::Properties mask = | |
| 350 Operator::kNoRead | Operator::kNoDeopt | Operator::kNoThrow; | |
| 351 | |
| 352 return (node->op()->properties() & mask) == mask || | |
| 353 node->opcode() == IrOpcode::kAllocate || | |
| 354 node->opcode() == IrOpcode::kCheckedLoad || | |
| 355 node->opcode() == IrOpcode::kLoadElement || | |
| 356 node->opcode() == IrOpcode::kLoad; | |
| 357 } | |
| 358 | |
| 359 bool RedundantStoreFinder::CanObserveAnything(Node* node) { | |
| 360 return !node->op()->HasProperty(Operator::kNoThrow) || | |
| 361 !node->op()->HasProperty(Operator::kNoDeopt); | |
| 362 } | |
| 363 | |
| 364 // Initialize unobservable_ with js_graph->graph->NodeCount() empty sets. | |
| 365 RedundantStoreFinder::RedundantStoreFinder(JSGraph* js_graph, Zone* temp_zone) | |
| 366 : jsgraph_(js_graph), | |
| 367 temp_zone_(temp_zone), | |
| 368 revisit_(temp_zone), | |
| 369 in_revisit_(js_graph->graph()->NodeCount(), temp_zone), | |
| 370 unobservable_(js_graph->graph()->NodeCount(), | |
| 371 UnobservablesSet::Unvisited(), temp_zone), | |
| 372 to_remove_(temp_zone) {} | |
| 373 | |
| 374 void RedundantStoreFinder::Visit(Node* node) { | |
| 375 // All effectful nodes should be reachable from End via a sequence of | |
| 376 // control, then a sequence of effect edges. In VisitEffectfulNode we mark | |
| 377 // all effect inputs for revisiting (if they might have stale state); here | |
| 378 // we mark all control inputs at least once. | |
| 379 | |
| 380 if (!HasBeenVisited(node)) { | |
| 381 for (Edge edge : node->input_edges()) { | |
|
Jarin
2016/08/01 08:25:43
You can also say:
for (int i = 0; i < node->op()-
bgeron
2016/08/04 12:31:02
Is the speed the reason that that fragment might b
Jarin
2016/08/05 13:49:49
Yes. Also consistency with the rest of the codebas
bgeron
2016/08/09 18:52:52
Changed to your suggestion; we can refactor all th
| |
| 382 if (NodeProperties::IsControlEdge(edge) && !HasBeenVisited(edge.to())) { | |
| 383 MarkForRevisit(edge.to()); | |
| 384 } | |
| 385 } | |
| 386 } | |
| 387 | |
| 388 bool isEffectful = (node->op()->EffectInputCount() >= 1); | |
| 389 if (isEffectful) { | |
| 390 VisitEffectfulNode(node); | |
| 391 DCHECK(HasBeenVisited(node)); | |
| 392 } | |
| 393 | |
| 394 if (!HasBeenVisited(node)) { | |
| 395 // Mark as visited. | |
| 396 unobservable_for_id(node->id()) = | |
| 397 UnobservablesSet::VisitedEmpty(temp_zone()); | |
|
Jarin
2016/08/01 08:25:43
Could this be moved to the control node case? (Aft
bgeron
2016/08/04 12:31:02
Not that I know of. In some cases, VisitEffectfulN
Jarin
2016/08/05 13:49:49
Acknowledged.
| |
| 398 } | |
| 399 } | |
| 400 | |
| 401 void RedundantStoreFinder::VisitEffectfulNode(Node* node) { | |
| 402 if (HasBeenVisited(node)) { | |
| 403 TRACE("- Revisiting: #%d:%s", node->id(), node->op()->mnemonic()); | |
| 404 } | |
| 405 UnobservablesSet after_set = RecomputeUseIntersection(node); | |
| 406 UnobservablesSet before_set = RecomputeSet(node, after_set); | |
| 407 DCHECK(!before_set.IsUnvisited()); | |
| 408 | |
| 409 UnobservablesSet stored_for_node = unobservable_for_id(node->id()); | |
| 410 bool cur_set_changed = | |
| 411 (stored_for_node.IsUnvisited() || stored_for_node != before_set); | |
| 412 if (!cur_set_changed) { | |
| 413 // We will not be able to update the part of this chain above any more. | |
| 414 // Exit. | |
| 415 TRACE("+ No change: stabilized. Not visiting effect inputs."); | |
| 416 } else { | |
| 417 unobservable_for_id(node->id()) = before_set; | |
| 418 | |
| 419 // Mark effect inputs for visiting. | |
| 420 for (int i = 0; i < node->op()->EffectInputCount(); i++) { | |
| 421 Node* input = NodeProperties::GetEffectInput(node, i); | |
| 422 if (!CanObserveAnything(input) || !HasBeenVisited(input)) { | |
|
Jarin
2016/08/01 08:25:43
Is this condition necessary? If the node can obser
bgeron
2016/08/04 12:31:02
No. :) Well, it's not strictly necessary because t
Jarin
2016/08/05 13:49:49
Yeah, I was confused. Thanks for the explanation.
| |
| 423 TRACE(" marking #%d:%s for revisit", input->id(), | |
| 424 input->op()->mnemonic()); | |
| 425 MarkForRevisit(input); | |
| 426 } | |
| 427 } | |
| 428 } | |
| 429 } | |
| 430 | |
| 431 // Compute the intersection of the UnobservablesSets of all effect uses and | |
| 432 // return it. This function only works if {node} has an effect use. | |
| 433 // | |
| 434 // The result UnobservablesSet will always be visited. | |
| 435 UnobservablesSet RedundantStoreFinder::RecomputeUseIntersection(Node* node) { | |
| 436 // {first} == true indicates that we haven't looked at any elements yet. | |
| 437 // {first} == false indicates that cur_set is the intersection of at least one | |
| 438 // thing. | |
| 439 | |
| 440 bool first = true; | |
| 441 UnobservablesSet cur_set = UnobservablesSet::Unvisited(); // irrelevant | |
| 442 | |
| 163 for (Edge edge : node->use_edges()) { | 443 for (Edge edge : node->use_edges()) { |
| 444 // Skip non-effect edges | |
| 164 if (!NodeProperties::IsEffectEdge(edge)) { | 445 if (!NodeProperties::IsEffectEdge(edge)) { |
| 165 continue; | 446 continue; |
| 166 } | 447 } |
| 167 if (last_use != nullptr) { | 448 |
| 168 // more than one | 449 Node* use = edge.from(); |
| 169 return nullptr; | 450 UnobservablesSet new_set = unobservable_for_id(use->id()); |
| 170 } | 451 // Include new_set in the intersection. |
| 171 last_use = edge.from(); | 452 if (first) { |
| 172 DCHECK_NOT_NULL(last_use); | 453 // Intersection of a one-element set is that one element |
| 173 } | 454 first = false; |
| 174 return last_use; | 455 cur_set = new_set; |
| 175 } | 456 } else { |
| 176 | 457 // Take the intersection of cur_set and new_set. |
| 177 // Return true if node is the last consecutive StoreField node in a linear | 458 cur_set = cur_set.Intersect(new_set, temp_zone()); |
| 178 // part of the effect chain. | 459 } |
| 179 bool IsEndOfStoreFieldChain(Node* node) { | 460 } |
| 180 Node* next_on_chain = SingleEffectUse(node); | 461 |
| 181 return (next_on_chain == nullptr || | 462 if (first) { |
| 182 next_on_chain->op()->opcode() != IrOpcode::kStoreField); | 463 // There were no effect uses. |
| 183 } | 464 auto opcode = node->op()->opcode(); |
| 184 | 465 // List of opcodes that may end this effect chain. The opcodes are not |
| 185 // The argument must be a StoreField node. If there is a node before it in the | 466 // important to the soundness of this optimization; this serves as a |
| 186 // effect chain, and if this part of the effect chain is linear (no other | 467 // general sanity check. Add opcodes to this list as it suits you. |
| 187 // effect uses of that previous node), then return that previous node. | 468 // |
| 188 // Otherwise, return nullptr. | 469 // Everything is observable after these opcodes; return the empty set. |
| 189 // | 470 DCHECK_EXTRA( |
| 190 // The returned node need not be a StoreField. | 471 opcode == IrOpcode::kReturn || opcode == IrOpcode::kTerminate || |
| 191 Node* PreviousEffectBeforeStoreField(Node* node) { | 472 opcode == IrOpcode::kDeoptimize || opcode == IrOpcode::kThrow, |
| 192 DCHECK_EQ(node->op()->opcode(), IrOpcode::kStoreField); | 473 "for #%d:%s", node->id(), node->op()->mnemonic()); |
| 193 DCHECK_EQ(node->op()->EffectInputCount(), 1); | 474 USE(opcode); // silence warning about unused variable in release mode |
| 194 | 475 |
| 195 Node* previous = NodeProperties::GetEffectInput(node); | 476 return UnobservablesSet::VisitedEmpty(temp_zone()); |
| 196 if (previous != nullptr && node == SingleEffectUse(previous)) { | |
| 197 return previous; | |
| 198 } else { | 477 } else { |
| 199 return nullptr; | 478 if (cur_set.IsUnvisited()) { |
| 200 } | 479 cur_set = UnobservablesSet::VisitedEmpty(temp_zone()); |
| 201 } | 480 } |
| 202 | 481 |
| 203 size_t rep_size_of(MachineRepresentation rep) { | 482 return cur_set; |
| 204 return ((size_t)1) << ElementSizeLog2Of(rep); | 483 } |
| 205 } | 484 } |
| 206 size_t rep_size_of(FieldAccess access) { | 485 |
| 207 return rep_size_of(access.machine_type.representation()); | 486 UnobservablesSet UnobservablesSet::Unvisited() { return UnobservablesSet(); } |
| 208 } | 487 |
| 209 | 488 UnobservablesSet::UnobservablesSet() : set_(nullptr) {} |
| 210 bool AtMostTagged(FieldAccess access) { | 489 |
| 211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); | 490 UnobservablesSet UnobservablesSet::VisitedEmpty(Zone* zone) { |
| 212 } | 491 // Create a new empty UnobservablesSet. This allocates in the zone, and |
| 213 | 492 // can probably be optimized to use a global singleton. |
|
Jarin
2016/08/01 08:25:43
Global singletons are forbidden, but you might wan
bgeron
2016/08/04 12:31:02
I know, it's silly right? But if we memoize a Visi
Jarin
2016/08/05 13:49:49
graph()->zone()
bgeron
2016/08/09 18:52:52
Done, as discussed.
| |
| 214 bool AtLeastTagged(FieldAccess access) { | 493 ZoneSet<UnobservableStore>* empty_set = |
| 215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); | 494 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) |
| 216 } | 495 ZoneSet<UnobservableStore>(zone); |
| 217 | 496 return UnobservablesSet(empty_set); |
| 218 } // namespace | 497 } |
| 219 | 498 |
| 220 bool StoreStoreElimination::IsEligibleNode(Node* node) { | 499 // Computes the intersection of two UnobservablesSets. May return |
| 221 return (node->op()->opcode() == IrOpcode::kStoreField) && | 500 // UnobservablesSet::Unvisited() instead of an empty UnobservablesSet for |
| 222 IsEndOfStoreFieldChain(node); | 501 // speed. |
| 223 } | 502 UnobservablesSet UnobservablesSet::Intersect(UnobservablesSet other, |
| 224 | 503 Zone* zone) const { |
| 225 void StoreStoreElimination::ReduceEligibleNode(Node* node) { | 504 if (set() == nullptr || other.set() == nullptr) { |
| 226 DCHECK(IsEligibleNode(node)); | 505 return Unvisited(); |
| 227 | 506 } else if (other.set() == nullptr) { |
|
Jarin
2016/08/01 08:25:43
This case is unreachable, no? (It is already handl
bgeron
2016/08/04 12:31:02
Just checking if you were paying attention, of cou
| |
| 228 // if (FLAG_trace_store_elimination) { | 507 return *this; |
| 229 // PrintF("** StoreStoreElimination::ReduceEligibleNode: activated: | 508 } else { |
| 230 // #%d\n", | 509 ZoneSet<UnobservableStore>* intersection = |
| 231 // node->id()); | 510 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) |
| 232 // } | 511 ZoneSet<UnobservableStore>(zone); |
| 233 | 512 // Put the intersection of set() and other.set() in intersection. |
| 234 TRACE("activated: #%d", node->id()); | 513 set_intersection(set()->begin(), set()->end(), other.set()->begin(), |
| 235 | 514 other.set()->end(), |
| 236 // Initialize empty futureStore. | 515 std::inserter(*intersection, intersection->end())); |
| 237 ZoneMap<Offset, Node*> futureStore(temp_zone()); | 516 |
| 238 | 517 return UnobservablesSet(intersection); |
| 239 Node* current_node = node; | 518 } |
| 240 | 519 } |
| 241 do { | 520 |
| 242 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); | 521 UnobservablesSet UnobservablesSet::Add(UnobservableStore obs, |
| 243 Offset offset = ToOffset(access); | 522 Zone* zone) const { |
| 244 Node* object_input = current_node->InputAt(0); | 523 bool present = (set()->find(obs) != set()->end()); |
| 245 | 524 if (present) { |
| 246 Node* previous = PreviousEffectBeforeStoreField(current_node); | 525 return *this; |
| 247 | 526 } else { |
| 248 // Find the map entry. | 527 // Make a new empty set. |
| 249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); | 528 ZoneSet<UnobservableStore>* new_set = |
| 250 | 529 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) |
| 251 bool present = find_result != futureStore.end(); | 530 ZoneSet<UnobservableStore>(zone); |
| 252 Node* value = present ? find_result->second : nullptr; | 531 // Copy the old elements over. |
| 253 | 532 *new_set = *set(); |
| 254 if (present && value == object_input && AtMostTagged(access)) { | 533 // Add the new element. |
| 255 // Key was present, and the value equalled object_input. This means | 534 bool inserted = new_set->insert(obs).second; |
| 256 // that soon after in the effect chain, we will do a StoreField to the | 535 DCHECK(inserted); |
| 257 // same object with the same offset, therefore current_node can be | 536 USE(inserted); // silence warning about unused variable |
| 258 // optimized away. Also, the future StoreField is at least as big as this | 537 |
| 259 // one. | 538 return UnobservablesSet(new_set); |
| 260 // | 539 } |
| 261 // We don't need to update futureStore. | 540 } |
| 262 | 541 |
| 263 Node* previous_effect = NodeProperties::GetEffectInput(current_node); | 542 UnobservablesSet UnobservablesSet::RemoveSameOffset(StoreOffset offset, |
| 264 | 543 Zone* zone) const { |
| 265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, | 544 // Make a new empty set. |
| 266 nullptr, nullptr); | 545 ZoneSet<UnobservableStore>* new_set = |
| 267 current_node->Kill(); | 546 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) |
| 268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", | 547 ZoneSet<UnobservableStore>(zone); |
| 269 current_node->id(), offset, | 548 // Copy all elements over that have a different offset. |
| 270 MachineReprToString(access.machine_type.representation()), | 549 for (auto obs : *set()) { |
| 271 object_input->id()); | 550 if (obs.offset_ != offset) { |
| 272 } else if (present && value == object_input && !AtMostTagged(access)) { | 551 new_set->insert(obs); |
| 273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", | 552 } |
| 274 current_node->id(), offset, | 553 } |
| 275 MachineReprToString(access.machine_type.representation()), | 554 |
| 276 object_input->id()); | 555 return UnobservablesSet(new_set); |
| 277 } else if (present && value != object_input && AtLeastTagged(access)) { | 556 } |
| 278 // Key was present, and the value did not equal object_input. This means | 557 |
| 279 // that there is a StoreField to this offset in the future, but the | 558 // Used for debugging. |
| 280 // object instance comes from a different Node. We pessimistically | 559 bool UnobservablesSet::operator==(const UnobservablesSet& other) const { |
| 281 // assume that we cannot optimize current_node away. However, we will | 560 if (IsUnvisited() || other.IsUnvisited()) { |
| 282 // guess that the current StoreField is more relevant than the future | 561 return IsEmpty() && other.IsEmpty(); |
| 283 // one, record the current StoreField in futureStore instead, and | 562 } else { |
| 284 // continue ascending up the chain. | 563 // Both pointers guaranteed not to be nullptrs. |
| 285 find_result->second = object_input; | 564 return *set() == *other.set(); |
| 286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", | 565 } |
| 287 current_node->id(), offset, | 566 } |
| 288 MachineReprToString(access.machine_type.representation()), | 567 |
| 289 object_input->id()); | 568 bool UnobservablesSet::operator!=(const UnobservablesSet& other) const { |
| 290 } else if (!present && AtLeastTagged(access)) { | 569 return !(*this == other); |
| 291 // Key was not present. This means that there is no matching | 570 } |
| 292 // StoreField to this offset in the future, so we cannot optimize | 571 |
| 293 // current_node away. However, we will record the current StoreField | 572 bool UnobservableStore::operator==(const UnobservableStore other) const { |
| 294 // in futureStore, and continue ascending up the chain. | 573 return (id_ == other.id_) && (offset_ == other.offset_); |
| 295 futureStore.insert(std::make_pair(offset, object_input)); | 574 } |
| 296 TRACE( | 575 |
| 297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", | 576 bool UnobservableStore::operator!=(const UnobservableStore other) const { |
| 298 current_node->id(), offset, | 577 return !(*this == other); |
| 299 MachineReprToString(access.machine_type.representation()), | 578 } |
| 300 object_input->id()); | 579 |
| 301 } else if (!AtLeastTagged(access)) { | 580 bool UnobservableStore::operator<(const UnobservableStore other) const { |
| 302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), | 581 return (id_ < other.id_) || (id_ == other.id_ && offset_ < other.offset_); |
| 303 offset, MachineReprToString(access.machine_type.representation()), | |
| 304 object_input->id()); | |
| 305 } else { | |
| 306 UNREACHABLE(); | |
| 307 } | |
| 308 | |
| 309 // Regardless of whether we eliminated node {current}, we want to | |
| 310 // continue walking up the effect chain. | |
| 311 | |
| 312 current_node = previous; | |
| 313 } while (current_node != nullptr && | |
| 314 current_node->op()->opcode() == IrOpcode::kStoreField); | |
| 315 | |
| 316 TRACE("finished"); | |
| 317 } | 582 } |
| 318 | 583 |
| 319 } // namespace compiler | 584 } // namespace compiler |
| 320 } // namespace internal | 585 } // namespace internal |
| 321 } // namespace v8 | 586 } // namespace v8 |
| OLD | NEW |