OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <iterator> | |
6 | |
5 #include "src/compiler/store-store-elimination.h" | 7 #include "src/compiler/store-store-elimination.h" |
6 | 8 |
7 #include "src/compiler/all-nodes.h" | 9 #include "src/compiler/all-nodes.h" |
8 #include "src/compiler/js-graph.h" | 10 #include "src/compiler/js-graph.h" |
9 #include "src/compiler/node-properties.h" | 11 #include "src/compiler/node-properties.h" |
10 #include "src/compiler/simplified-operator.h" | 12 #include "src/compiler/simplified-operator.h" |
11 | 13 |
12 namespace v8 { | 14 namespace v8 { |
13 namespace internal { | 15 namespace internal { |
14 namespace compiler { | 16 namespace compiler { |
15 | 17 |
16 #define TRACE(fmt, ...) \ | 18 #define TRACE1(fmt, ...) \ |
17 do { \ | 19 do { \ |
18 if (FLAG_trace_store_elimination) { \ | 20 if (FLAG_trace_store_elimination) { \ |
19 PrintF("StoreStoreElimination::ReduceEligibleNode: " fmt "\n", \ | 21 PrintF("StoreStoreElimination::Run: " fmt "\n", ##__VA_ARGS__); \ |
20 ##__VA_ARGS__); \ | 22 } \ |
21 } \ | |
22 } while (false) | 23 } while (false) |
23 | 24 |
24 // A simple store-store elimination. When the effect chain contains the | 25 #define TRACE2(level, fmt, ...) \ |
25 // following sequence, | 26 do { \ |
26 // | 27 if (FLAG_trace_store_elimination && \ |
27 // - StoreField[[+off_1]](x1, y1) | 28 FLAG_trace_store_elimination_level >= (level)) { \ |
28 // - StoreField[[+off_2]](x2, y2) | 29 PrintF("StoreStoreFinder: [%d] " fmt "\n", level, ##__VA_ARGS__); \ |
29 // - StoreField[[+off_3]](x3, y3) | 30 } \ |
30 // ... | 31 } while (false) |
31 // - StoreField[[+off_n]](xn, yn) | 32 |
32 // | 33 // CHECK_EXTRA is like CHECK, but has two or more arguments: a boolean |
33 // where the xes are the objects and the ys are the values to be stored, then | 34 // expression, a format string, and any number of extra arguments. The boolean |
34 // we are going to say that a store is superfluous if the same offset of the | 35 // expression will be evaluated at runtime. If it evaluates to false, then an |
35 // same object will be stored to in the future. If off_i == off_j and xi == xj | 36 // error message will be shown containing the condition, as well as the extra |
36 // and i < j, then we optimize the i'th StoreField away. | 37 // info formatted like with printf. |
37 // | 38 #define CHECK_EXTRA(condition, fmt, ...) \ |
38 // This optimization should be initiated on the last StoreField in such a | 39 do { \ |
39 // sequence. | 40 if (V8_UNLIKELY(!(condition))) { \ |
40 // | 41 V8_Fatal(__FILE__, __LINE__, "Check failed: %s. Extra info: " fmt, \ |
41 // The algorithm works by walking the effect chain from the last StoreField | 42 #condition, ##__VA_ARGS__); \ |
42 // upwards. While walking, we maintain a map {futureStore} from offsets to | 43 } \ |
43 // nodes; initially it is empty. As we walk the effect chain upwards, if | 44 } while (0) |
44 // futureStore[off] = n, then any store to node {n} with offset {off} is | 45 |
45 // guaranteed to be useless because we do a tagged-width[2] store to that | 46 #ifdef DEBUG |
46 // offset of that object in the near future anyway. For example, for this | 47 #define DCHECK_EXTRA(condition, fmt, ...) \ |
47 // effect chain | 48 CHECK_EXTRA(condition, fmt, ##__VA_ARGS__) |
48 // | 49 #else |
49 // 71: StoreField(60, 0) | 50 #define DCHECK_EXTRA(condition, fmt, ...) ((void)0) |
50 // 72: StoreField(65, 8) | 51 #endif |
51 // 73: StoreField(63, 8) | 52 |
52 // 74: StoreField(65, 16) | 53 // Store-store elimination. |
53 // 75: StoreField(62, 8) | 54 // |
54 // | 55 // The aim of this optimization is to detect the following pattern in the |
55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. | 56 // effect graph: |
56 // | 57 // |
57 // Here is the complete process. | 58 // - StoreField[+24, kRepTagged](263, ...) |
58 // | 59 // |
59 // - We are at the end of a sequence of consecutive StoreFields. | 60 // ... lots of nodes from which the field at offset 24 of the object |
60 // - We start out with futureStore = empty. | 61 // returned by node #263 cannot be observed ... |
61 // - We then walk the effect chain upwards to find the next StoreField [1]. | 62 // |
62 // | 63 // - StoreField[+24, kRepTagged](263, ...) |
63 // 1. If the offset is not a key of {futureStore} yet, we put it in. | 64 // |
64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a | 65 // In such situations, the earlier StoreField cannot be observed, and can be |
65 // different node, we overwrite futureStore[offset] with the current node. | 66 // eliminated. This optimization should work for any offset and input node, of |
66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals | 67 // course. |
67 // this node, we eliminate this StoreField. | 68 // |
68 // | 69 // The optimization also works across splits. It currently does not work for |
69 // As long as the current effect input points to a node with a single effect | 70 // loops, because we tend to put a stack check in loops, and like deopts, |
70 // output, and as long as its opcode is StoreField, we keep traversing | 71 // stack checks can observe anything. |
71 // upwards. | 72 // |
72 // | 73 // The implementation consists of two phases. The first phase |
73 // | 74 // (StoreStoreFinder) analyzes the graph, and at every point determines what |
74 // | 75 // fields are guaranteed not to be observed in the future. The second phase |
75 // footnotes: | 76 // then eliminates all StoreField nodes that are known to be unobservable. |
76 // | 77 // |
77 // [1] We make sure that we only traverse the linear part, that is, the part | 78 // Per node, we store a data structure called the UnobservablesSet, which is a |
78 // where every node has exactly one incoming and one outgoing effect edge. | 79 // set of pairs of nodes and offsets. For instance, a Return always has an |
79 // Also, we only keep walking upwards as long as we keep finding consecutive | 80 // empty UnobservablesSet, as does a deoptimization, throw, or stack check. |
80 // StoreFields on the same node. | 81 // The UnobservablesSet of a StoreField consists of the node and the offset |
81 // | 82 // that it stores to, as well as the UnobservablesSet of the next node in the |
82 // [2] This optimization is sound only in certain cases. Specifically, the | 83 // effect chain. When there are multiple succeeding nodes in the effect graph, |
83 // presence of a future store to {off} by itself does not automatically mean | 84 // then the intersection of their UnobservablesSet is taken, and then the |
84 // that earlier stores to {off} are superfluous: a future narrow store does | 85 // offset of that StoreField is added. |
85 // not obviate an earlier wide store. However, future stores of a certain | 86 // |
86 // size do obviate stores to the same offset of lesser or equal size. | 87 // To allow for cycles in the effect graph, the implementation does a fixpoint |
87 // | 88 // computation. Unfortunately, we compute a least fixpoint not a greatest |
88 // It turns out that we are most interested in stores of "tagged" size, | 89 // fixpoint. In practical terms this means that if we have a loop without any |
89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In | 90 // deopt or stack check, and a StoreField inside and after the loop, then we |
90 // {futureStore}, we record future writes that are of at least this size. | 91 // will not detect that the StoreField inside the loop is unobservable. This |
91 // The three cases are actually a bit more subtle. | 92 // means that theoretically this implementation is slightly suboptimal. |
92 // | 93 // However, in practice we always put a stack check inside a loop, so the |
93 // 1. If the offset is not a key of {futureStore} and the StoreField is of | 94 // implementation should be optimal in practice. |
94 // "tagged" size or wider, then we put it in. | 95 // |
95 // 2. If the offset is present in {futureStore} but the value is different, | 96 // To implement this fixpoint, we use a special value |
96 // then we overwrite the value if the current StoreField is of "tagged" | 97 // UnobservablesSet::Undetermined(), which is functionally empty. When a |
97 // size or wider. | 98 // node's UnobservablesSet is undetermined, then that means that we have no |
98 // 3. If the offset is present and the value matches the node, and the | 99 // knowledge about what fields are unobservable at that point. In contrast, |
99 // current StoreField is AT MOST of "tagged" size, then we eliminate this | 100 // when a node's UnobservablesSet is determined but empty, and the node is not |
100 // StoreField. | 101 // marked for revisiting, then the node's effect input is determined and up- |
101 // | 102 // to-date. Initially, the UnobservablesSet of all nodes is set to |
102 // Examples of stores that we do not detect as superfluous: 2-byte stores | 103 // undetermined. |
103 // followed by 2-byte stores to the same offset; 16-byte stores followed by | 104 // |
104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive | 105 // An ideal store-store elimination would keep for every object a set of byte |
105 // float64 stores as superfluous, and on x86 we do not detect consecutive | 106 // ranges into that object which are unobservable. We skimp on this, and only |
106 // int32 stores as superfluous. | 107 // store a set of offsets. If offset {off} is in the set, then that means that |
107 | 108 // bytes {off} up to but excluding {off+taggedSize} are unobservable, where |
108 // At a late stage, we realized that this code is more complicated than it | 109 // {taggedSize} is the size in bytes of a tagged value. We don't record that |
109 // needs to be: if we store a set of pairs (offset, node), the code simplifies | 110 // writes smaller than taggedSize are unobservable, and we don't optimize away |
110 // to 3 cases instead of 6. We could even store a map from nodes to sets of | 111 // writes larger than taggedSize. |
111 // bytes. | 112 // |
112 | 113 // The result should be that this store-store elimination is fast enough, and |
113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) | 114 // also optimizes away most superfluous stores. |
114 : jsgraph_(js_graph), temp_zone_(temp_zone) {} | 115 |
115 | 116 // Assumption: every byte of a JS object is only ever accessed through one |
116 StoreStoreElimination::~StoreStoreElimination() {} | 117 // offset. For instance, byte 15 of a given object may be accessed using a |
117 | 118 // two-byte read at offset 14, or a four-byte read at offset 12, but never |
118 void StoreStoreElimination::Run() { | 119 // both in the same program. |
119 // The store-store elimination performs work on chains of certain types of | 120 |
120 // nodes. The elimination must be invoked on the lowest node in such a | 121 // This implementation needs all dead nodes removed from the graph, and the |
121 // chain; we have a helper function IsEligibleNode that returns true | 122 // graph should be trimmed. |
122 // precisely on the lowest node in such a chain. | |
123 // | |
124 // Because the elimination removes nodes from the graph, even remove nodes | |
125 // that the elimination was not invoked on, we cannot use a normal | |
126 // AdvancedReducer but we manually find which nodes to invoke the | |
127 // elimination on. Then in a next step, we invoke the elimination for each | |
128 // node that was eligible. | |
129 | |
130 NodeVector eligible(temp_zone()); // loops over all nodes | |
131 AllNodes all(temp_zone(), jsgraph()->graph()); | |
132 | |
133 for (Node* node : all.live) { | |
134 if (IsEligibleNode(node)) { | |
135 eligible.push_back(node); | |
136 } | |
137 } | |
138 | |
139 for (Node* node : eligible) { | |
140 ReduceEligibleNode(node); | |
141 } | |
142 } | |
143 | 123 |
144 namespace { | 124 namespace { |
145 | 125 |
146 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too | 126 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too |
147 // few. | 127 // few. |
148 typedef uint16_t Offset; | 128 typedef uint16_t StoreOffset; |
129 | |
130 struct UnobservableStore { | |
131 NodeId id_; | |
132 StoreOffset offset_; | |
133 | |
134 bool operator==(const UnobservableStore) const; | |
135 bool operator!=(const UnobservableStore) const; | |
136 bool operator<(const UnobservableStore) const; | |
137 }; | |
138 | |
139 } // namespace | |
140 | |
141 std::ostream& operator<<(std::ostream&, const UnobservableStore); | |
142 | |
143 namespace { | |
144 | |
145 // Instances of UnobservablesSet are immutable. They represent either a set of | |
146 // UnobservableStores, or the "undetermined empty set". | |
147 // | |
148 // We apply some sharing to save memory. The class UnobservablesSet is only a | |
149 // pointer wide, and a copy does not use any heap (or temp_zone) memory. Most | |
150 // changes to an UnobservablesSet might allocate in the temp_zone. | |
151 // | |
152 // The size of an instance should be the size of a pointer, plus additional | |
153 // space in the zone for determined UnobservablesSets. Copying an | |
154 // UnobservablesSet allocates no memory. | |
155 class UnobservablesSet final { | |
156 public: | |
157 static UnobservablesSet Undetermined(); | |
158 static UnobservablesSet DeterminedEmpty(Zone* zone); | |
159 UnobservablesSet(); // undetermined | |
160 UnobservablesSet(const UnobservablesSet& other) : set_(other.set_) {} | |
161 | |
162 UnobservablesSet Intersect(UnobservablesSet other, Zone* zone) const; | |
163 UnobservablesSet Add(UnobservableStore obs, Zone* zone) const; | |
164 UnobservablesSet RemoveSameOffset(StoreOffset off, Zone* zone) const; | |
165 | |
166 const ZoneSet<UnobservableStore>* set() const { return set_; } | |
167 | |
168 bool IsUndetermined() const { return set_ == nullptr; } | |
169 bool IsEmpty() const { return set_ == nullptr || set_->empty(); } | |
170 bool Contains(UnobservableStore obs) const { | |
171 return set_ != nullptr && (set_->find(obs) != set_->end()); | |
172 } | |
173 | |
174 bool operator==(const UnobservablesSet&) const; | |
175 bool operator!=(const UnobservablesSet&) const; | |
176 | |
177 private: | |
178 explicit UnobservablesSet(const ZoneSet<UnobservableStore>* set) | |
179 : set_(set) {} | |
180 const ZoneSet<UnobservableStore>* set_; | |
181 }; | |
182 | |
183 } // namespace | |
184 | |
185 std::ostream& operator<<(std::ostream& os, UnobservablesSet set); | |
186 | |
187 namespace { | |
188 | |
189 class StoreStoreFinder final { | |
190 public: | |
191 StoreStoreFinder(JSGraph* js_graph, Zone* temp_zone); | |
192 | |
193 void Find(); | |
194 | |
195 const ZoneSet<Node*>& to_remove_const() { return to_remove_; } | |
196 | |
197 virtual void Visit(Node* node); | |
198 | |
199 private: | |
200 static bool IsEligibleNode(Node* node); | |
201 void VisitEligibleNode(Node* node); | |
202 UnobservablesSet RecomputeUseIntersection(Node* node); | |
203 UnobservablesSet RecomputeSet(Node* node, UnobservablesSet uses); | |
204 static bool CanObserveNothing(Node* node); | |
205 static bool CanObserveAnything(Node* node); | |
206 | |
207 void MarkForRevisit(Node* node); | |
208 | |
209 JSGraph* jsgraph() const { return jsgraph_; } | |
210 Zone* temp_zone() const { return temp_zone_; } | |
211 ZoneVector<UnobservablesSet>& unobservable() { return unobservable_; } | |
212 UnobservablesSet& unobservable_for_id(NodeId id) { | |
213 return unobservable().at(id); | |
214 } | |
215 ZoneSet<Node*>& to_remove() { return to_remove_; } | |
216 | |
217 JSGraph* const jsgraph_; | |
218 Zone* const temp_zone_; | |
219 | |
220 ZoneStack<Node*> revisit_; | |
221 ZoneVector<bool> in_revisit_; | |
222 // Maps node IDs to UnobservableNodeSets. | |
223 ZoneVector<UnobservablesSet> unobservable_; | |
224 ZoneSet<Node*> to_remove_; | |
225 }; | |
149 | 226 |
150 // To safely cast an offset from a FieldAccess, which has a wider range | 227 // To safely cast an offset from a FieldAccess, which has a wider range |
151 // (namely int). | 228 // (namely int). |
152 Offset ToOffset(int offset) { | 229 StoreOffset ToOffset(int offset) { |
153 CHECK(0 <= offset && offset < (1 << 8 * sizeof(Offset))); | 230 CHECK(0 <= offset && offset < (1 << 8 * sizeof(StoreOffset))); |
154 return (Offset)offset; | 231 return (StoreOffset)offset; |
155 } | 232 } |
156 | 233 |
157 Offset ToOffset(const FieldAccess& access) { return ToOffset(access.offset); } | 234 StoreOffset ToOffset(const FieldAccess& access) { |
235 return ToOffset(access.offset); | |
236 } | |
158 | 237 |
159 // If node has a single effect use, return that node. If node has no or | 238 // If node has a single effect use, return that node. If node has no or |
160 // multiple effect uses, return nullptr. | 239 // multiple effect uses, return nullptr. |
161 Node* SingleEffectUse(Node* node) { | 240 Node* SingleEffectUse(Node* node) { |
162 Node* last_use = nullptr; | 241 Node* last_use = nullptr; |
163 for (Edge edge : node->use_edges()) { | 242 for (Edge edge : node->use_edges()) { |
164 if (!NodeProperties::IsEffectEdge(edge)) { | 243 if (!NodeProperties::IsEffectEdge(edge)) { |
165 continue; | 244 continue; |
166 } | 245 } |
167 if (last_use != nullptr) { | 246 if (last_use != nullptr) { |
168 // more than one | 247 // more than one |
169 return nullptr; | 248 return nullptr; |
170 } | 249 } |
171 last_use = edge.from(); | 250 last_use = edge.from(); |
172 DCHECK_NOT_NULL(last_use); | 251 DCHECK_NOT_NULL(last_use); |
173 } | 252 } |
174 return last_use; | 253 return last_use; |
175 } | 254 } |
176 | 255 |
177 // Return true if node is the last consecutive StoreField node in a linear | 256 // If there is a node before {node} in the effect chain, and if this part of |
178 // part of the effect chain. | 257 // the effect chain is linear (no other effect uses of that previous node), |
179 bool IsEndOfStoreFieldChain(Node* node) { | 258 // then return that previous node. Otherwise, return nullptr. |
180 Node* next_on_chain = SingleEffectUse(node); | 259 Node* PreviousEffectInChain(Node* node) { |
181 return (next_on_chain == nullptr || | 260 if (node->op()->EffectInputCount() == 1) { |
182 next_on_chain->op()->opcode() != IrOpcode::kStoreField); | 261 Node* previous = NodeProperties::GetEffectInput(node); |
183 } | 262 if (previous != nullptr && node == SingleEffectUse(previous)) { |
184 | 263 return previous; |
185 // The argument must be a StoreField node. If there is a node before it in the | 264 } else { |
186 // effect chain, and if this part of the effect chain is linear (no other | 265 return nullptr; |
187 // effect uses of that previous node), then return that previous node. | 266 } |
188 // Otherwise, return nullptr. | |
189 // | |
190 // The returned node need not be a StoreField. | |
191 Node* PreviousEffectBeforeStoreField(Node* node) { | |
192 DCHECK_EQ(node->op()->opcode(), IrOpcode::kStoreField); | |
193 DCHECK_EQ(node->op()->EffectInputCount(), 1); | |
194 | |
195 Node* previous = NodeProperties::GetEffectInput(node); | |
196 if (previous != nullptr && node == SingleEffectUse(previous)) { | |
197 return previous; | |
198 } else { | 267 } else { |
199 return nullptr; | 268 return nullptr; |
200 } | 269 } |
201 } | 270 } |
202 | 271 |
203 size_t rep_size_of(MachineRepresentation rep) { | 272 size_t rep_size_of(MachineRepresentation rep) { |
204 return ((size_t)1) << ElementSizeLog2Of(rep); | 273 return ((size_t)1) << ElementSizeLog2Of(rep); |
205 } | 274 } |
206 size_t rep_size_of(FieldAccess access) { | 275 size_t rep_size_of(FieldAccess access) { |
207 return rep_size_of(access.machine_type.representation()); | 276 return rep_size_of(access.machine_type.representation()); |
208 } | 277 } |
209 | 278 |
210 bool AtMostTagged(FieldAccess access) { | 279 bool AtMostTagged(FieldAccess access) { |
211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); | 280 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
212 } | 281 } |
213 | 282 |
214 bool AtLeastTagged(FieldAccess access) { | 283 bool AtLeastTagged(FieldAccess access) { |
215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); | 284 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
216 } | 285 } |
217 | 286 |
287 int EffectUseCount(Node* node) { | |
288 int uses = 0; | |
289 for (const Edge edge : node->use_edges()) { | |
290 if (NodeProperties::IsEffectEdge(edge)) { | |
291 uses++; | |
292 } | |
293 } | |
294 return uses; | |
295 } | |
296 | |
218 } // namespace | 297 } // namespace |
219 | 298 |
220 bool StoreStoreElimination::IsEligibleNode(Node* node) { | 299 void StoreStoreFinder::Find() { |
221 return (node->op()->opcode() == IrOpcode::kStoreField) && | 300 Visit(jsgraph()->graph()->end()); |
222 IsEndOfStoreFieldChain(node); | 301 |
223 } | 302 while (!revisit_.empty()) { |
224 | 303 Node* next = revisit_.top(); |
225 void StoreStoreElimination::ReduceEligibleNode(Node* node) { | 304 revisit_.pop(); |
226 DCHECK(IsEligibleNode(node)); | 305 in_revisit_.at(next->id()) = false; |
227 | 306 Visit(next); |
228 // if (FLAG_trace_store_elimination) { | 307 } |
229 // PrintF("** StoreStoreElimination::ReduceEligibleNode: activated: | 308 |
230 // #%d\n", | 309 #ifdef DEBUG |
231 // node->id()); | 310 // Check that we visited all the StoreFields |
232 // } | 311 AllNodes all(temp_zone(), jsgraph()->graph()); |
233 | 312 for (Node* node : all.live) { |
234 TRACE("activated: #%d", node->id()); | 313 if (node->op()->opcode() == IrOpcode::kStoreField) { |
235 | 314 UnobservablesSet node_unobservable = unobservable_for_id(node->id()); |
236 // Initialize empty futureStore. | 315 DCHECK_EXTRA(!node_unobservable.IsUndetermined(), "#%d:%s", node->id(), |
237 ZoneMap<Offset, Node*> futureStore(temp_zone()); | 316 node->op()->mnemonic()); |
238 | 317 } |
239 Node* current_node = node; | 318 } |
319 #endif | |
320 } | |
321 | |
322 void StoreStoreFinder::MarkForRevisit(Node* node) { | |
323 if (!in_revisit_.at(node->id())) { | |
324 revisit_.push(node); | |
325 in_revisit_[node->id()] = true; | |
326 } | |
327 } | |
328 | |
329 void StoreStoreElimination::Run(JSGraph* js_graph, Zone* temp_zone) { | |
330 // Find superfluous nodes | |
331 StoreStoreFinder finder(js_graph, temp_zone); | |
332 finder.Find(); | |
333 | |
334 // Remove superfluous nodes | |
335 TRACE1("Eliminating %d nodes", (int)finder.to_remove_const().size()); | |
336 for (Node* node : finder.to_remove_const()) { | |
337 TRACE1(" Eliminating node: #%d:%s", node->id(), node->op()->mnemonic()); | |
338 Node* previous_effect = NodeProperties::GetEffectInput(node); | |
339 NodeProperties::ReplaceUses(node, nullptr, previous_effect, nullptr, | |
340 nullptr); | |
341 node->Kill(); | |
342 } | |
343 } | |
344 | |
345 bool StoreStoreFinder::IsEligibleNode(Node* node) { | |
346 DCHECK_LE(node->op()->EffectOutputCount(), 1); | |
347 | |
348 TRACE2(5, "%d e-inputs, %d e-outputs, %d e-uses for node %d:%s", | |
349 node->op()->EffectInputCount(), node->op()->EffectOutputCount(), | |
350 EffectUseCount(node), node->id(), node->op()->mnemonic()); | |
351 | |
352 bool isEffectful = (node->op()->EffectInputCount() >= 1); | |
353 bool endsEffectChain = (EffectUseCount(node) != 1) || | |
354 (SingleEffectUse(node)->op()->EffectInputCount() >= 2); | |
355 if (endsEffectChain && EffectUseCount(node) >= 1) { | |
356 DCHECK(isEffectful); | |
bgeron
2016/07/20 17:29:47
Tests currently fail on this. Checking this only f
| |
357 } | |
358 return isEffectful && endsEffectChain; | |
359 } | |
360 | |
361 // Recompute unobservables-set for a node. Will also mark superfluous nodes | |
362 // as to be removed. | |
363 | |
364 UnobservablesSet StoreStoreFinder::RecomputeSet(Node* node, | |
365 UnobservablesSet uses) { | |
366 // Usually, we decide using the operator properties that an operator | |
367 // observes everything or observes nothing (see CanObserveAnything, | |
368 // CanObserveNothing), but there are some opcodes we treat specially. | |
369 switch (node->op()->opcode()) { | |
370 case IrOpcode::kStoreField: { | |
371 Node* stored_to = node->InputAt(0); | |
372 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
373 StoreOffset offset = ToOffset(access); | |
374 | |
375 UnobservableStore observation = {stored_to->id(), offset}; | |
376 bool presentInSet = uses.Contains(observation); | |
377 | |
378 if (presentInSet && AtMostTagged(access)) { | |
379 TRACE2(1, " #%d is StoreField[+%d,%s](#%d), unobservable", node->id(), | |
380 offset, | |
381 MachineReprToString(access.machine_type.representation()), | |
382 stored_to->id()); | |
383 to_remove().insert(node); | |
384 return uses; | |
385 } else if (presentInSet && !AtMostTagged(access)) { | |
386 TRACE2(1, | |
387 " #%d is StoreField[+%d,%s](#%d), repeated in future but too " | |
388 "big to optimize away", | |
389 node->id(), offset, | |
390 MachineReprToString(access.machine_type.representation()), | |
391 stored_to->id()); | |
392 return uses; | |
393 } else if (!presentInSet && AtLeastTagged(access)) { | |
394 TRACE2(1, | |
395 " #%d is StoreField[+%d,%s](#%d), observable, recording in set", | |
396 node->id(), offset, | |
397 MachineReprToString(access.machine_type.representation()), | |
398 stored_to->id()); | |
399 return uses.Add(observation, temp_zone()); | |
400 } else if (!presentInSet && !AtLeastTagged(access)) { | |
401 TRACE2(1, | |
402 " #%d is StoreField[+%d,%s](#%d), observable but too small to " | |
403 "record", | |
404 node->id(), offset, | |
405 MachineReprToString(access.machine_type.representation()), | |
406 stored_to->id()); | |
407 return uses; | |
408 } else { | |
409 UNREACHABLE(); | |
410 } | |
411 break; | |
412 } | |
413 case IrOpcode::kLoadField: { | |
414 Node* loaded_from = node->InputAt(0); | |
415 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
416 StoreOffset offset = ToOffset(access); | |
417 | |
418 TRACE2(1, | |
419 " #%d is LoadField[+%d,%s](#%d), removing all offsets [+%d] from " | |
420 "set", | |
421 node->id(), offset, | |
422 MachineReprToString(access.machine_type.representation()), | |
423 loaded_from->id(), offset); | |
424 | |
425 return uses.RemoveSameOffset(offset, temp_zone()); | |
426 break; | |
427 } | |
428 default: | |
429 if (CanObserveNothing(node)) { | |
430 TRACE2(1, " #%d:%s can observe nothing, set stays unchanged", | |
431 node->id(), node->op()->mnemonic()); | |
432 return uses; | |
433 } else if (CanObserveAnything(node)) { | |
434 TRACE2(1, " #%d:%s can observe everything, recording empty set", | |
435 node->id(), node->op()->mnemonic()); | |
436 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
437 } else { | |
438 // It is safe to turn this check off in the future, but it is better | |
439 // to list opcodes in CanObserveNothing, in CanObserveAnything, or if | |
440 // you don't know, to add another case inside this DCHECK_EXTRA. | |
441 DCHECK_EXTRA(node->op()->opcode() == IrOpcode::kCall, "%s", | |
442 node->op()->mnemonic()); | |
443 TRACE2(1, | |
444 " cannot determine unobservables-set for #%d:%s; " | |
445 "conservatively recording empty set", | |
446 node->id(), node->op()->mnemonic()); | |
447 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
448 } | |
449 } | |
450 UNREACHABLE(); | |
451 return UnobservablesSet::Undetermined(); | |
452 } | |
453 | |
454 bool StoreStoreFinder::CanObserveNothing(Node* node) { | |
455 Operator::Properties mask = | |
456 Operator::kNoRead | Operator::kNoDeopt | Operator::kNoThrow; | |
457 | |
458 return (node->op()->properties() & mask) == mask || | |
459 node->opcode() == IrOpcode::kAllocate || | |
460 node->opcode() == IrOpcode::kCheckedLoad || | |
461 node->opcode() == IrOpcode::kLoadElement; | |
462 } | |
463 | |
464 bool StoreStoreFinder::CanObserveAnything(Node* node) { | |
465 const Operator* op = node->op(); | |
466 auto opcode = op->opcode(); | |
467 if (opcode == IrOpcode::kLoad) { | |
468 return true; | |
469 } | |
470 return !op->HasProperty(Operator::kNoThrow) || | |
471 !op->HasProperty(Operator::kNoDeopt); | |
472 } | |
473 | |
474 // Initialize unobservable_ with js_graph->graph->NodeCount() empty sets. | |
475 StoreStoreFinder::StoreStoreFinder(JSGraph* js_graph, Zone* temp_zone) | |
476 : jsgraph_(js_graph), | |
477 temp_zone_(temp_zone), | |
478 revisit_(temp_zone), | |
479 in_revisit_(js_graph->graph()->NodeCount(), temp_zone), | |
480 unobservable_(js_graph->graph()->NodeCount(), | |
481 UnobservablesSet::Undetermined(), temp_zone), | |
482 to_remove_(temp_zone) {} | |
483 | |
484 void StoreStoreFinder::Visit(Node* node) { | |
485 TRACE2(2, "Visiting #%d:%s", node->id(), node->op()->mnemonic()); | |
486 | |
487 // All eligible nodes should be reachable from the control graph. If this | |
488 // node was not visited (unobservable_for_id(node->id()).IsUndetermined()), | |
489 // then visit the control inputs and mark as visited | |
490 // (unobservable_for_id(node->id() = DeterminedEmpty(..)). | |
491 | |
492 bool isEffectful = (node->op()->EffectInputCount() >= 1); | |
493 | |
494 if (IsEligibleNode(node)) { | |
495 VisitEligibleNode(node); | |
496 } else if (!isEffectful) { | |
497 TRACE2(2, | |
498 " node not in effect chain, marking its control inputs for visit"); | |
499 | |
500 if (unobservable_for_id(node->id()).IsUndetermined()) { | |
501 for (Edge edge : node->input_edges()) { | |
502 if (NodeProperties::IsControlEdge(edge)) { | |
503 MarkForRevisit(edge.to()); | |
504 } | |
505 } | |
506 } | |
507 | |
508 // Mark as visited | |
509 unobservable_for_id(node->id()) = | |
510 UnobservablesSet::DeterminedEmpty(temp_zone()); | |
511 } | |
512 } | |
513 | |
514 void StoreStoreFinder::VisitEligibleNode(Node* node) { | |
515 TRACE2(1, "Found eligible node: %4d:%s", node->id(), node->op()->mnemonic()); | |
516 | |
517 Node* cur = node; | |
518 TRACE2(1, " Recomputing unobservable-use-intersection for #%d:%s", cur->id(), | |
519 cur->op()->mnemonic()); | |
520 UnobservablesSet after_set = RecomputeUseIntersection(cur); | |
521 bool cur_set_changed; | |
240 | 522 |
241 do { | 523 do { |
242 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); | 524 UnobservablesSet before_set = RecomputeSet(cur, after_set); |
243 Offset offset = ToOffset(access); | 525 |
244 Node* object_input = current_node->InputAt(0); | 526 DCHECK(!before_set.IsUndetermined()); |
245 | 527 TRACE2(2, " %d UnobservableStores in new set", |
246 Node* previous = PreviousEffectBeforeStoreField(current_node); | 528 (int)before_set.set()->size()); |
247 | 529 |
248 // Find the map entry. | 530 UnobservablesSet* stored_for_node = &unobservable_for_id(cur->id()); |
249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); | 531 |
250 | 532 cur_set_changed = |
251 bool present = find_result != futureStore.end(); | 533 (stored_for_node->IsUndetermined() || *stored_for_node != before_set); |
252 Node* value = present ? find_result->second : nullptr; | 534 |
253 | 535 if (!cur_set_changed) { |
254 if (present && value == object_input && AtMostTagged(access)) { | 536 // We will not be able to update the part of this chain above any more. |
255 // Key was present, and the value equalled object_input. This means | 537 // Exit. |
256 // that soon after in the effect chain, we will do a StoreField to the | 538 TRACE2(1, "+ No change: stabilized. Stopping this chain."); |
257 // same object with the same offset, therefore current_node can be | 539 break; |
258 // optimized away. Also, the future StoreField is at least as big as this | 540 } else if (cur_set_changed && before_set.IsEmpty()) { |
259 // one. | 541 DCHECK(before_set.IsEmpty()); |
260 // | 542 TRACE2(2, " Still empty set, but marking determined. Walking up."); |
261 // We don't need to update futureStore. | 543 } else if (cur_set_changed && !before_set.IsEmpty()) { |
262 | 544 TRACE2(2, " Growing unreachable-set and walking up."); |
263 Node* previous_effect = NodeProperties::GetEffectInput(current_node); | |
264 | |
265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, | |
266 nullptr, nullptr); | |
267 current_node->Kill(); | |
268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", | |
269 current_node->id(), offset, | |
270 MachineReprToString(access.machine_type.representation()), | |
271 object_input->id()); | |
272 } else if (present && value == object_input && !AtMostTagged(access)) { | |
273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", | |
274 current_node->id(), offset, | |
275 MachineReprToString(access.machine_type.representation()), | |
276 object_input->id()); | |
277 } else if (present && value != object_input && AtLeastTagged(access)) { | |
278 // Key was present, and the value did not equal object_input. This means | |
279 // that there is a StoreField to this offset in the future, but the | |
280 // object instance comes from a different Node. We pessimistically | |
281 // assume that we cannot optimize current_node away. However, we will | |
282 // guess that the current StoreField is more relevant than the future | |
283 // one, record the current StoreField in futureStore instead, and | |
284 // continue ascending up the chain. | |
285 find_result->second = object_input; | |
286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", | |
287 current_node->id(), offset, | |
288 MachineReprToString(access.machine_type.representation()), | |
289 object_input->id()); | |
290 } else if (!present && AtLeastTagged(access)) { | |
291 // Key was not present. This means that there is no matching | |
292 // StoreField to this offset in the future, so we cannot optimize | |
293 // current_node away. However, we will record the current StoreField | |
294 // in futureStore, and continue ascending up the chain. | |
295 futureStore.insert(std::make_pair(offset, object_input)); | |
296 TRACE( | |
297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", | |
298 current_node->id(), offset, | |
299 MachineReprToString(access.machine_type.representation()), | |
300 object_input->id()); | |
301 } else if (!AtLeastTagged(access)) { | |
302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), | |
303 offset, MachineReprToString(access.machine_type.representation()), | |
304 object_input->id()); | |
305 } else { | 545 } else { |
306 UNREACHABLE(); | 546 UNREACHABLE(); |
307 } | 547 } |
308 | 548 |
309 // Regardless of whether we eliminated node {current}, we want to | 549 // Overwrite vector in-place. |
310 // continue walking up the effect chain. | 550 *stored_for_node = before_set; |
311 | 551 |
312 current_node = previous; | 552 Node* previous = PreviousEffectInChain(cur); |
313 } while (current_node != nullptr && | 553 if (previous == nullptr && cur_set_changed) { |
314 current_node->op()->opcode() == IrOpcode::kStoreField); | 554 TRACE2(1, |
315 | 555 "- Reached top of chain; marking effect inputs for revisiting."); |
316 TRACE("finished"); | 556 for (int i = 0; i < cur->op()->EffectInputCount(); i++) { |
557 Node* input = NodeProperties::GetEffectInput(cur, i); | |
558 if (!CanObserveAnything(input) || | |
559 unobservable_for_id(input->id()).IsUndetermined()) { | |
560 MarkForRevisit(input); | |
561 } | |
562 } | |
563 | |
564 cur = nullptr; | |
565 } else if (previous == nullptr && !cur_set_changed) { | |
566 TRACE2(1, "+ Reached top of chain and stabilized."); | |
567 cur = nullptr; | |
568 } else { | |
569 // Update variables for next loop iteration | |
570 cur = previous; | |
571 DCHECK(EffectUseCount(previous) == 1); | |
572 after_set = before_set; | |
573 if (FLAG_turbo_verify_store_elimination) { | |
574 DCHECK(after_set == RecomputeUseIntersection(cur)); | |
575 } | |
576 DCHECK_NOT_NULL(cur); | |
577 } | |
578 } while (cur != nullptr); | |
579 } | |
580 | |
581 // Compute the intersection of the UnobservablesSets of all effect uses and | |
582 // return it. This function only works if {node} has an effect use. | |
583 // | |
584 // The result UnobservablesSet will always be determined. | |
585 UnobservablesSet StoreStoreFinder::RecomputeUseIntersection(Node* node) { | |
586 TRACE2(4, " recomputing use intersections of #%d:%s", node->id(), | |
587 node->op()->mnemonic()); | |
588 | |
589 // {first} == true indicates that we haven't looked at any elements yet. | |
590 // {first} == false indicates that cur_set is the intersection of at least one | |
591 // thing. | |
592 | |
593 bool first = true; | |
594 UnobservablesSet cur_set = UnobservablesSet::Undetermined(); // irrelevant | |
595 | |
596 for (Edge edge : node->use_edges()) { | |
597 // Skip non-effect edges | |
598 if (!NodeProperties::IsEffectEdge(edge)) { | |
599 continue; | |
600 } | |
601 | |
602 Node* use = edge.from(); | |
603 TRACE2(4, " found use %d", use->id()); | |
604 UnobservablesSet new_set = unobservable_for_id(use->id()); | |
605 // Include new_set in the intersection. | |
606 if (first) { | |
607 // Intersection of a one-element set is that one element | |
608 first = false; | |
609 cur_set = new_set; | |
610 } else { | |
611 // Take the intersection of cur_set and new_set. | |
612 cur_set = cur_set.Intersect(new_set, temp_zone()); | |
613 } | |
614 | |
615 if (FLAG_trace_store_elimination) { | |
616 // Serialise the UnobservablesSet. | |
617 std::ostringstream os; | |
618 os << "intersected with " << new_set << ", current intersection is " | |
619 << cur_set; | |
620 std::string msg = os.str(); | |
621 TRACE2(4, " %s", msg.c_str()); | |
622 } | |
623 } | |
624 | |
625 if (first) { | |
626 // There were no effect uses. | |
627 auto opcode = node->op()->opcode(); | |
628 // List of opcodes that may end this effect chain. The opcodes are not | |
629 // important to the soundness of this optimization; this serves as a | |
630 // general sanity check. Add opcodes to this list as it suits you. | |
631 // | |
632 // Everything is observable after these opcodes; return the empty set. | |
633 DCHECK_EXTRA( | |
634 opcode == IrOpcode::kReturn || opcode == IrOpcode::kTerminate || | |
635 opcode == IrOpcode::kDeoptimize || opcode == IrOpcode::kThrow, | |
636 "for #%d:%s", node->id(), node->op()->mnemonic()); | |
637 USE(opcode); // silence warning about unused variable | |
638 | |
639 TRACE2(3, " ..no effect uses, so unobservables-set = []"); | |
640 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
641 } else { | |
642 if (cur_set.IsUndetermined()) { | |
643 cur_set = UnobservablesSet::DeterminedEmpty(temp_zone()); | |
644 } | |
645 | |
646 if (FLAG_trace_store_elimination) { | |
647 // Serialise the UnobservablesSet. | |
648 std::ostringstream os; | |
649 os << cur_set; | |
650 std::string msg = os.str(); | |
651 TRACE2(2, " ..use-intersection: %s", msg.c_str()); | |
652 } | |
653 | |
654 return cur_set; | |
655 } | |
656 } | |
657 | |
658 UnobservablesSet UnobservablesSet::Undetermined() { return UnobservablesSet(); } | |
659 | |
660 UnobservablesSet::UnobservablesSet() : set_(nullptr) {} | |
661 | |
662 UnobservablesSet UnobservablesSet::DeterminedEmpty(Zone* zone) { | |
663 // Create a new empty UnobservablesSet. This allocates in the zone, and | |
664 // can probably be optimized to use a global singleton. | |
665 ZoneSet<UnobservableStore>* empty_set = | |
666 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) | |
667 ZoneSet<UnobservableStore>(zone); | |
668 return UnobservablesSet(empty_set); | |
669 } | |
670 | |
671 // Computes the intersection of two UnobservablesSets. May return | |
672 // UnobservablesSet::Undetermined() instead of an empty UnobservablesSet for | |
673 // speed. | |
674 UnobservablesSet UnobservablesSet::Intersect(UnobservablesSet other, | |
675 Zone* zone) const { | |
676 if (set() == nullptr || other.set() == nullptr) { | |
677 return Undetermined(); | |
678 } else if (other.set() == nullptr) { | |
679 return *this; | |
680 } else { | |
681 ZoneSet<UnobservableStore>* intersection = | |
682 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) | |
683 ZoneSet<UnobservableStore>(zone); | |
684 // Put the intersection of set() and other.set() in intersection. | |
685 set_intersection(set()->begin(), set()->end(), other.set()->begin(), | |
686 other.set()->end(), | |
687 std::inserter(*intersection, intersection->end())); | |
688 TRACE2(3, "intersected; result:"); | |
689 for (UnobservableStore obs : *intersection) { | |
690 std::ostringstream os; | |
691 os << obs; | |
692 std::string msg = os.str(); | |
693 TRACE2(3, "- %s", msg.c_str()); | |
694 } | |
695 return UnobservablesSet(intersection); | |
696 } | |
697 } | |
698 | |
699 UnobservablesSet UnobservablesSet::Add(UnobservableStore obs, | |
700 Zone* zone) const { | |
701 bool present = (set()->find(obs) != set()->end()); | |
702 if (present) { | |
703 return *this; | |
704 } else { | |
705 // Make a new empty set. | |
706 ZoneSet<UnobservableStore>* new_set = | |
707 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) | |
708 ZoneSet<UnobservableStore>(zone); | |
709 // Copy the old elements over. | |
710 *new_set = *set(); | |
711 // Add the new element. | |
712 bool inserted = new_set->insert(obs).second; | |
713 DCHECK(inserted); | |
714 USE(inserted); // silence warning about unused variable | |
715 | |
716 return UnobservablesSet(new_set); | |
717 } | |
718 } | |
719 | |
720 UnobservablesSet UnobservablesSet::RemoveSameOffset(StoreOffset offset, | |
721 Zone* zone) const { | |
722 // Make a new empty set. | |
723 ZoneSet<UnobservableStore>* new_set = | |
724 new (zone->New(sizeof(ZoneSet<UnobservableStore>))) | |
725 ZoneSet<UnobservableStore>(zone); | |
726 // Copy all elements over that have a different offset. | |
727 for (auto obs : *set()) { | |
728 if (obs.offset_ != offset) { | |
729 new_set->insert(obs); | |
730 } | |
731 } | |
732 | |
733 return UnobservablesSet(new_set); | |
734 } | |
735 | |
736 // Used for debugging. | |
737 std::ostream& operator<<(std::ostream& os, UnobservablesSet set) { | |
738 if (set.set() == nullptr) { | |
739 os << "(undetermined)"; | |
740 } else { | |
741 os << "["; | |
742 bool first = true; | |
743 for (UnobservableStore obs : *set.set()) { | |
744 if (!first) { | |
745 os << ","; | |
746 } else { | |
747 first = false; | |
748 } | |
749 os << obs; | |
750 } | |
751 os << "]"; | |
752 } | |
753 return os; | |
754 } | |
755 | |
756 bool UnobservablesSet::operator==(const UnobservablesSet& other) const { | |
757 if (IsUndetermined() || other.IsUndetermined()) { | |
758 TRACE2(4, " (either is undetermined)"); | |
759 return IsEmpty() && other.IsEmpty(); | |
760 } else { | |
761 TRACE2(4, " (neither is undetermined)"); | |
762 // Both pointers guaranteed not to be nullptrs. | |
763 return *set() == *other.set(); | |
764 } | |
765 } | |
766 | |
767 bool UnobservablesSet::operator!=(const UnobservablesSet& other) const { | |
768 return !(*this == other); | |
769 } | |
770 | |
771 bool UnobservableStore::operator==(const UnobservableStore other) const { | |
772 return (id_ == other.id_) && (offset_ == other.offset_); | |
773 } | |
774 | |
775 bool UnobservableStore::operator!=(const UnobservableStore other) const { | |
776 return !(*this == other); | |
777 } | |
778 | |
779 bool UnobservableStore::operator<(const UnobservableStore other) const { | |
780 return (id_ < other.id_) || (id_ == other.id_ && offset_ < other.offset_); | |
781 } | |
782 | |
783 std::ostream& operator<<(std::ostream& os, UnobservableStore obs) { | |
784 os << "#" << obs.id_ << "[+" << obs.offset_ << "]"; | |
785 return os; | |
317 } | 786 } |
318 | 787 |
319 } // namespace compiler | 788 } // namespace compiler |
320 } // namespace internal | 789 } // namespace internal |
321 } // namespace v8 | 790 } // namespace v8 |
OLD | NEW |