OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/store-store-elimination.h" | 5 #include "src/compiler/store-store-elimination.h" |
6 | 6 |
7 #include "src/compiler/all-nodes.h" | 7 #include "src/compiler/all-nodes.h" |
8 #include "src/compiler/js-graph.h" | 8 #include "src/compiler/js-graph.h" |
9 #include "src/compiler/node-properties.h" | 9 #include "src/compiler/node-properties.h" |
10 #include "src/compiler/simplified-operator.h" | 10 #include "src/compiler/simplified-operator.h" |
11 | 11 |
12 namespace v8 { | 12 namespace v8 { |
13 namespace internal { | 13 namespace internal { |
14 namespace compiler { | 14 namespace compiler { |
15 | 15 |
16 #define TRACE(fmt, ...) \ | 16 #define TRACE1(fmt, ...) \ |
17 do { \ | 17 do { \ |
18 if (FLAG_trace_store_elimination) { \ | 18 if (FLAG_trace_store_elimination) { \ |
19 PrintF("StoreStoreElimination::ReduceEligibleNode: " fmt "\n", \ | 19 PrintF("StoreStoreElimination::Run: " fmt "\n", ##__VA_ARGS__); \ |
20 ##__VA_ARGS__); \ | 20 } \ |
21 } \ | |
22 } while (false) | 21 } while (false) |
23 | 22 |
24 // A simple store-store elimination. When the effect chain contains the | 23 #define TRACE2(level, fmt, ...) \ |
25 // following sequence, | 24 do { \ |
25 if (FLAG_trace_store_elimination && \ | |
26 FLAG_trace_store_elimination_level >= (level)) { \ | |
27 PrintF("StoreStoreFinder: [%d] " fmt "\n", level, ##__VA_ARGS__); \ | |
28 } \ | |
29 } while (false) | |
Jarin
2016/07/20 11:43:33
This feels a bit over-engineered - normally, you w
bgeron
2016/07/20 16:27:16
Because trace level 1 is ~37% as big as trace leve
| |
30 | |
31 // CHECK_EXTRA is like CHECK, but has two or more arguments: a boolean | |
32 // expression, a format string, and any number of extra arguments. The boolean | |
33 // expression will be evaluated at runtime. If it evaluates to false, then an | |
34 // error message will be shown containing the condition, as well as the extra | |
35 // info formatted like with printf. | |
36 #define CHECK_EXTRA(condition, fmt, ...) \ | |
37 do { \ | |
38 if (V8_UNLIKELY(!(condition))) { \ | |
39 V8_Fatal(__FILE__, __LINE__, "Check failed: %s. Extra info: " fmt, \ | |
40 #condition, ##__VA_ARGS__); \ | |
41 } \ | |
42 } while (0) | |
43 | |
44 #ifdef DEBUG | |
45 #define DCHECK_EXTRA(condition, fmt, ...) \ | |
46 CHECK_EXTRA(condition, fmt, ##__VA_ARGS__) | |
47 #else | |
48 #define DCHECK_EXTRA(condition, fmt, ...) ((void)0) | |
49 #endif | |
50 | |
51 // Store-store elimination. | |
26 // | 52 // |
27 // - StoreField[[+off_1]](x1, y1) | 53 // The aim of this optimization is to detect the following pattern in the |
28 // - StoreField[[+off_2]](x2, y2) | 54 // effect graph: |
29 // - StoreField[[+off_3]](x3, y3) | |
30 // ... | |
31 // - StoreField[[+off_n]](xn, yn) | |
32 // | 55 // |
33 // where the xes are the objects and the ys are the values to be stored, then | 56 // - StoreField[+24, kRepTagged](263, ...) |
34 // we are going to say that a store is superfluous if the same offset of the | |
35 // same object will be stored to in the future. If off_i == off_j and xi == xj | |
36 // and i < j, then we optimize the i'th StoreField away. | |
37 // | 57 // |
38 // This optimization should be initiated on the last StoreField in such a | 58 // ... lots of nodes from which the field at offset 24 of the object |
39 // sequence. | 59 // returned by node #263 cannot be observed ... |
40 // | 60 // |
41 // The algorithm works by walking the effect chain from the last StoreField | 61 // - StoreField[+24, kRepTagged](263, ...) |
42 // upwards. While walking, we maintain a map {futureStore} from offsets to | |
43 // nodes; initially it is empty. As we walk the effect chain upwards, if | |
44 // futureStore[off] = n, then any store to node {n} with offset {off} is | |
45 // guaranteed to be useless because we do a tagged-width[2] store to that | |
46 // offset of that object in the near future anyway. For example, for this | |
47 // effect chain | |
48 // | 62 // |
49 // 71: StoreField(60, 0) | 63 // In such situations, the earlier StoreField cannot be observed, and can be |
50 // 72: StoreField(65, 8) | 64 // eliminated. This optimization should work for any offset and input node, of |
51 // 73: StoreField(63, 8) | 65 // course. |
52 // 74: StoreField(65, 16) | |
53 // 75: StoreField(62, 8) | |
54 // | 66 // |
55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. | 67 // The optimization also works across splits. It currently does not work for |
68 // loops, because we tend to put a stack check in loops, and like deopts, | |
69 // stack checks can observe anything. | |
56 // | 70 // |
57 // Here is the complete process. | 71 // The implementation consists of two phases. The first phase |
72 // (StoreStoreFinder) analyzes the graph, and at every point determines what | |
73 // fields are guaranteed not to be observed in the future. The second phase | |
74 // then eliminates all StoreField nodes that are known to be unobservable. | |
58 // | 75 // |
59 // - We are at the end of a sequence of consecutive StoreFields. | 76 // Per node, we store a data structure called the UnobservablesSet, which is a |
60 // - We start out with futureStore = empty. | 77 // set of pairs of nodes and offsets. For instance, a Return always has an |
61 // - We then walk the effect chain upwards to find the next StoreField [1]. | 78 // empty UnobservablesSet, as does a deoptimization, throw, or stack check. |
79 // The UnobservablesSet of a StoreField consists of the node and the offset | |
80 // that it stores to, as well as the UnobservablesSet of the next node in the | |
81 // effect chain. When there are multiple succeeding nodes in the effect graph, | |
82 // then the intersection of their UnobservablesSet is taken, and then the | |
83 // offset of that StoreField is added. | |
62 // | 84 // |
63 // 1. If the offset is not a key of {futureStore} yet, we put it in. | 85 // To allow for cycles in the effect graph, the implementation does a fixpoint |
64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a | 86 // computation. Unfortunately, we compute a least fixpoint not a greatest |
65 // different node, we overwrite futureStore[offset] with the current node. | 87 // fixpoint. In practical terms this means that if we have a loop without any |
66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals | 88 // deopt or stack check, and a StoreField inside and after the loop, then we |
67 // this node, we eliminate this StoreField. | 89 // will not detect that the StoreField inside the loop is unobservable. This |
90 // means that theoretically this implementation is slightly suboptimal. | |
91 // However, in practice we always put a stack check inside a loop, so the | |
92 // implementation should be optimal in practice. | |
68 // | 93 // |
69 // As long as the current effect input points to a node with a single effect | 94 // To implement this fixpoint, we use a special value |
70 // output, and as long as its opcode is StoreField, we keep traversing | 95 // UnobservablesSet::Undetermined(), which is functionally empty. When a |
71 // upwards. | 96 // node's UnobservablesSet is undetermined, then that means that we have no |
97 // knowledge about what fields are unobservable at that point. In contrast, | |
98 // when a node's UnobservablesSet is determined but empty, and the node is not | |
99 // marked for revisiting, then the node's effect input is determined and up- | |
100 // to-date. Initially, the UnobservablesSet of all nodes is set to | |
101 // undetermined. | |
72 // | 102 // |
103 // We apply some sharing to save memory. The class UnobservablesSet is only a | |
104 // pointer wide, and a copy does not use any heap (or temp_zone) memory. Most | |
105 // changes to an UnobservablesSet allocate in the temp_zone. | |
Jarin
2016/07/20 11:43:33
This blurb should go into the UnobservablesSet cla
bgeron
2016/07/20 16:27:16
Done.
| |
106 | |
107 // An ideal store-store elimination would keep for every object a set of byte | |
108 // ranges into that object which are unobservable. We skimp on this, and only | |
109 // store a set of offsets. If offset {off} is in the set, then that means that | |
110 // bytes {off} up to but excluding {off+taggedSize} are unobservable, where | |
111 // {taggedSize} is the size in bytes of a tagged value. We don't record that | |
112 // writes smaller than taggedSize are unobservable, and we don't optimize away | |
113 // writes larger than taggedSize. | |
73 // | 114 // |
74 // | 115 // The result should be that this store-store elimination is fast enough, and |
75 // footnotes: | 116 // also optimizes away most superfluous stores. |
76 // | |
77 // [1] We make sure that we only traverse the linear part, that is, the part | |
78 // where every node has exactly one incoming and one outgoing effect edge. | |
79 // Also, we only keep walking upwards as long as we keep finding consecutive | |
80 // StoreFields on the same node. | |
81 // | |
82 // [2] This optimization is sound only in certain cases. Specifically, the | |
83 // presence of a future store to {off} by itself does not automatically mean | |
84 // that earlier stores to {off} are superfluous: a future narrow store does | |
85 // not obviate an earlier wide store. However, future stores of a certain | |
86 // size do obviate stores to the same offset of lesser or equal size. | |
87 // | |
88 // It turns out that we are most interested in stores of "tagged" size, | |
89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In | |
90 // {futureStore}, we record future writes that are of at least this size. | |
91 // The three cases are actually a bit more subtle. | |
92 // | |
93 // 1. If the offset is not a key of {futureStore} and the StoreField is of | |
94 // "tagged" size or wider, then we put it in. | |
95 // 2. If the offset is present in {futureStore} but the value is different, | |
96 // then we overwrite the value if the current StoreField is of "tagged" | |
97 // size or wider. | |
98 // 3. If the offset is present and the value matches the node, and the | |
99 // current StoreField is AT MOST of "tagged" size, then we eliminate this | |
100 // StoreField. | |
101 // | |
102 // Examples of stores that we do not detect as superfluous: 2-byte stores | |
103 // followed by 2-byte stores to the same offset; 16-byte stores followed by | |
104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive | |
105 // float64 stores as superfluous, and on x86 we do not detect consecutive | |
106 // int32 stores as superfluous. | |
107 | 117 |
108 // At a late stage, we realized that this code is more complicated than it | 118 // Assumption: every byte of a JS object is only ever accessed through one |
109 // needs to be: if we store a set of pairs (offset, node), the code simplifies | 119 // offset. For instance, byte 15 of a given object may be accessed using a |
110 // to 3 cases instead of 6. We could even store a map from nodes to sets of | 120 // two-byte read at offset 14, or a four-byte read at offset 12, but never |
111 // bytes. | 121 // both in the same program. |
112 | 122 |
113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) | 123 // This implementation needs all dead nodes removed from the graph, and the |
114 : jsgraph_(js_graph), temp_zone_(temp_zone) {} | 124 // graph should be trimmed. |
115 | |
116 StoreStoreElimination::~StoreStoreElimination() {} | |
117 | |
118 void StoreStoreElimination::Run() { | |
119 // The store-store elimination performs work on chains of certain types of | |
120 // nodes. The elimination must be invoked on the lowest node in such a | |
121 // chain; we have a helper function IsEligibleNode that returns true | |
122 // precisely on the lowest node in such a chain. | |
123 // | |
124 // Because the elimination removes nodes from the graph, even remove nodes | |
125 // that the elimination was not invoked on, we cannot use a normal | |
126 // AdvancedReducer but we manually find which nodes to invoke the | |
127 // elimination on. Then in a next step, we invoke the elimination for each | |
128 // node that was eligible. | |
129 | |
130 NodeVector eligible(temp_zone()); // loops over all nodes | |
131 AllNodes all(temp_zone(), jsgraph()->graph()); | |
132 | |
133 for (Node* node : all.live) { | |
134 if (IsEligibleNode(node)) { | |
135 eligible.push_back(node); | |
136 } | |
137 } | |
138 | |
139 for (Node* node : eligible) { | |
140 ReduceEligibleNode(node); | |
141 } | |
142 } | |
143 | 125 |
144 namespace { | 126 namespace { |
145 | 127 |
146 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too | |
147 // few. | |
148 typedef uint16_t Offset; | |
149 | |
150 // To safely cast an offset from a FieldAccess, which has a wider range | 128 // To safely cast an offset from a FieldAccess, which has a wider range |
151 // (namely int). | 129 // (namely int). |
152 Offset ToOffset(int offset) { | 130 StoreOffset to_offset(int offset) { |
Jarin
2016/07/20 11:43:33
Function names should be CamelCase. (Here and belo
bgeron
2016/07/20 16:27:16
Yeah, you're right. I was thinking of the "simple
| |
153 CHECK(0 <= offset && offset < (1 << 8 * sizeof(Offset))); | 131 CHECK(0 <= offset && offset < (1 << 8 * sizeof(StoreOffset))); |
154 return (Offset)offset; | 132 return (StoreOffset)offset; |
155 } | 133 } |
156 | 134 |
157 Offset ToOffset(const FieldAccess& access) { return ToOffset(access.offset); } | 135 StoreOffset to_offset(const FieldAccess& access) { |
136 return to_offset(access.offset); | |
137 } | |
158 | 138 |
159 // If node has a single effect use, return that node. If node has no or | 139 // If node has a single effect use, return that node. If node has no or |
160 // multiple effect uses, return nullptr. | 140 // multiple effect uses, return nullptr. |
161 Node* SingleEffectUse(Node* node) { | 141 Node* single_effect_use(Node* node) { |
162 Node* last_use = nullptr; | 142 Node* last_use = nullptr; |
163 for (Edge edge : node->use_edges()) { | 143 for (Edge edge : node->use_edges()) { |
164 if (!NodeProperties::IsEffectEdge(edge)) { | 144 if (!NodeProperties::IsEffectEdge(edge)) { |
165 continue; | 145 continue; |
166 } | 146 } |
167 if (last_use != nullptr) { | 147 if (last_use != nullptr) { |
168 // more than one | 148 // more than one |
169 return nullptr; | 149 return nullptr; |
170 } | 150 } |
171 last_use = edge.from(); | 151 last_use = edge.from(); |
172 DCHECK_NOT_NULL(last_use); | 152 DCHECK_NOT_NULL(last_use); |
173 } | 153 } |
174 return last_use; | 154 return last_use; |
175 } | 155 } |
176 | 156 |
177 // Return true if node is the last consecutive StoreField node in a linear | 157 // If there is a node before {node} in the effect chain, and if this part of |
178 // part of the effect chain. | 158 // the effect chain is linear (no other effect uses of that previous node), |
179 bool IsEndOfStoreFieldChain(Node* node) { | 159 // then return that previous node. Otherwise, return nullptr. |
180 Node* next_on_chain = SingleEffectUse(node); | 160 Node* previous_effect_in_chain(Node* node) { |
181 return (next_on_chain == nullptr || | 161 if (node->op()->EffectInputCount() == 1) { |
182 next_on_chain->op()->opcode() != IrOpcode::kStoreField); | 162 Node* previous = NodeProperties::GetEffectInput(node); |
183 } | 163 if (previous != nullptr && node == single_effect_use(previous)) { |
184 | 164 return previous; |
185 // The argument must be a StoreField node. If there is a node before it in the | 165 } else { |
186 // effect chain, and if this part of the effect chain is linear (no other | 166 return nullptr; |
187 // effect uses of that previous node), then return that previous node. | 167 } |
188 // Otherwise, return nullptr. | |
189 // | |
190 // The returned node need not be a StoreField. | |
191 Node* PreviousEffectBeforeStoreField(Node* node) { | |
192 DCHECK_EQ(node->op()->opcode(), IrOpcode::kStoreField); | |
193 DCHECK_EQ(node->op()->EffectInputCount(), 1); | |
194 | |
195 Node* previous = NodeProperties::GetEffectInput(node); | |
196 if (previous != nullptr && node == SingleEffectUse(previous)) { | |
197 return previous; | |
198 } else { | 168 } else { |
199 return nullptr; | 169 return nullptr; |
200 } | 170 } |
201 } | 171 } |
202 | 172 |
203 size_t rep_size_of(MachineRepresentation rep) { | 173 size_t rep_size_of(MachineRepresentation rep) { |
204 return ((size_t)1) << ElementSizeLog2Of(rep); | 174 return ((size_t)1) << ElementSizeLog2Of(rep); |
205 } | 175 } |
206 size_t rep_size_of(FieldAccess access) { | 176 size_t rep_size_of(FieldAccess access) { |
207 return rep_size_of(access.machine_type.representation()); | 177 return rep_size_of(access.machine_type.representation()); |
208 } | 178 } |
209 | 179 |
210 bool AtMostTagged(FieldAccess access) { | 180 bool at_most_tagged(FieldAccess access) { |
211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); | 181 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
212 } | 182 } |
213 | 183 |
214 bool AtLeastTagged(FieldAccess access) { | 184 bool at_least_tagged(FieldAccess access) { |
215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); | 185 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
216 } | 186 } |
217 | 187 |
188 int effect_use_count(Node* node) { | |
189 int uses = 0; | |
190 for (const Edge edge : node->use_edges()) { | |
191 if (NodeProperties::IsEffectEdge(edge)) { | |
192 uses++; | |
193 } | |
194 } | |
195 return uses; | |
196 } | |
197 | |
218 } // namespace | 198 } // namespace |
219 | 199 |
220 bool StoreStoreElimination::IsEligibleNode(Node* node) { | 200 void StoreStoreElimination::Run(JSGraph* js_graph, Zone* temp_zone) { |
221 return (node->op()->opcode() == IrOpcode::kStoreField) && | 201 // Find superfluous nodes |
222 IsEndOfStoreFieldChain(node); | 202 GraphReducer graph_reducer(temp_zone, js_graph->graph(), js_graph->Dead()); |
223 } | 203 StoreStoreFinder finder(&graph_reducer, js_graph, temp_zone); |
224 | 204 graph_reducer.AddReducer(&finder); |
225 void StoreStoreElimination::ReduceEligibleNode(Node* node) { | 205 graph_reducer.ReduceGraph(); |
226 DCHECK(IsEligibleNode(node)); | 206 |
227 | 207 // Remove superfluous nodes |
228 // if (FLAG_trace_store_elimination) { | 208 TRACE1("Eliminating %d nodes", (int)finder.to_remove_const().size()); |
229 // PrintF("** StoreStoreElimination::ReduceEligibleNode: activated: | 209 for (Node* node : finder.to_remove_const()) { |
230 // #%d\n", | 210 TRACE1(" Eliminating node: #%d:%s", node->id(), node->op()->mnemonic()); |
231 // node->id()); | 211 Node* previous_effect = NodeProperties::GetEffectInput(node); |
232 // } | 212 NodeProperties::ReplaceUses(node, nullptr, previous_effect, nullptr, |
233 | 213 nullptr); |
234 TRACE("activated: #%d", node->id()); | 214 node->Kill(); |
235 | 215 } |
236 // Initialize empty futureStore. | 216 } |
237 ZoneMap<Offset, Node*> futureStore(temp_zone()); | 217 |
238 | 218 bool StoreStoreFinder::IsEligibleNode(Node* node) { |
239 Node* current_node = node; | 219 DCHECK_LE(node->op()->EffectOutputCount(), 1); |
220 | |
221 TRACE2(5, "%d e-inputs, %d e-outputs, %d e-uses for node %d:%s", | |
222 node->op()->EffectInputCount(), node->op()->EffectOutputCount(), | |
223 effect_use_count(node), node->id(), node->op()->mnemonic()); | |
224 | |
225 bool isEffectful = (node->op()->EffectInputCount() >= 1); | |
226 bool endsEffectChain = | |
227 (effect_use_count(node) == 1) | |
228 ? (single_effect_use(node)->op()->EffectInputCount() >= 2) | |
229 : true; | |
230 return isEffectful && endsEffectChain; | |
231 } | |
232 | |
233 // Recompute unobservables-set for a node. Will also mark superfluous nodes | |
234 // as to be removed. | |
235 | |
236 UnobservablesSet StoreStoreFinder::RecomputeSet(Node* node, | |
237 UnobservablesSet uses) { | |
238 // Usually, we decide using the operator properties that an operator | |
239 // observes everything or observes nothing (see CanObserveAnything, | |
240 // CanObserveNothing), but there are some opcodes we treat specially. | |
241 switch (node->op()->opcode()) { | |
242 case IrOpcode::kStoreField: { | |
243 Node* stored_to = node->InputAt(0); | |
244 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
245 StoreOffset offset = to_offset(access); | |
246 | |
247 StoreObservation observation = {stored_to->id(), offset}; | |
248 bool presentInSet = uses.Contains(observation); | |
249 | |
250 if (presentInSet && at_most_tagged(access)) { | |
251 TRACE2(1, " #%d is StoreField[+%d,%s](#%d), unobservable", node->id(), | |
252 offset, | |
253 MachineReprToString(access.machine_type.representation()), | |
254 stored_to->id()); | |
255 to_remove().insert(node); | |
256 return uses; | |
257 } else if (presentInSet && !at_most_tagged(access)) { | |
258 TRACE2(1, | |
259 " #%d is StoreField[+%d,%s](#%d), repeated in future but too " | |
260 "big to optimize away", | |
261 node->id(), offset, | |
262 MachineReprToString(access.machine_type.representation()), | |
263 stored_to->id()); | |
264 return uses; | |
265 } else if (!presentInSet && at_least_tagged(access)) { | |
266 TRACE2(1, | |
267 " #%d is StoreField[+%d,%s](#%d), observable, recording in set", | |
268 node->id(), offset, | |
269 MachineReprToString(access.machine_type.representation()), | |
270 stored_to->id()); | |
271 return uses.Add(observation, temp_zone()); | |
272 } else if (!presentInSet && !at_least_tagged(access)) { | |
273 TRACE2(1, | |
274 " #%d is StoreField[+%d,%s](#%d), observable but too small to " | |
275 "record", | |
276 node->id(), offset, | |
277 MachineReprToString(access.machine_type.representation()), | |
278 stored_to->id()); | |
279 return uses; | |
280 } else { | |
281 UNREACHABLE(); | |
282 } | |
283 break; | |
284 } | |
285 case IrOpcode::kLoadField: { | |
286 Node* loaded_from = node->InputAt(0); | |
287 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
288 StoreOffset offset = to_offset(access); | |
289 | |
290 TRACE2(1, | |
291 " #%d is LoadField[+%d,%s](#%d), removing all offsets [+%d] from " | |
292 "set", | |
293 node->id(), offset, | |
294 MachineReprToString(access.machine_type.representation()), | |
295 loaded_from->id(), offset); | |
296 | |
297 return uses.RemoveSameOffset(offset, temp_zone()); | |
298 break; | |
299 } | |
300 default: | |
301 if (CanObserveNothing(node)) { | |
302 TRACE2(1, " #%d:%s can observe nothing, set stays unchanged", | |
303 node->id(), node->op()->mnemonic()); | |
304 return uses; | |
305 } else if (CanObserveAnything(node)) { | |
306 TRACE2(1, " #%d:%s can observe everything, recording empty set", | |
307 node->id(), node->op()->mnemonic()); | |
308 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
309 } else { | |
310 // It is safe to turn this check off in the future, but it is better | |
311 // to list opcodes in CanObserveNothing, in CanObserveAnything, or if | |
312 // you don't know, to add another case inside this DCHECK_EXTRA. | |
313 DCHECK_EXTRA(node->op()->opcode() == IrOpcode::kCall, "%s", | |
314 node->op()->mnemonic()); | |
315 TRACE2(1, | |
316 " cannot determine unobservables-set for #%d:%s; " | |
317 "conservatively recording empty set", | |
318 node->id(), node->op()->mnemonic()); | |
319 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
320 } | |
321 } | |
322 UNREACHABLE(); | |
323 return UnobservablesSet::Undetermined(); | |
324 } | |
325 | |
326 bool StoreStoreFinder::CanObserveNothing(Node* node) { | |
327 Operator::Properties mask = | |
328 Operator::kNoRead | Operator::kNoDeopt | Operator::kNoThrow; | |
329 | |
330 return (node->op()->properties() & mask) == mask || | |
Jarin
2016/07/20 11:43:33
node->op()->HasProperty(Operator::kNoRead | Operat
bgeron
2016/07/20 16:27:16
That doesn't compile, because Operator::kNoRead |
| |
331 node->opcode() == IrOpcode::kAllocate || | |
332 node->opcode() == IrOpcode::kCheckedLoad || | |
333 node->opcode() == IrOpcode::kLoadElement; | |
334 } | |
335 | |
336 bool StoreStoreFinder::CanObserveAnything(Node* node) { | |
337 const Operator* op = node->op(); | |
338 auto opcode = op->opcode(); | |
339 if (opcode == IrOpcode::kLoad) { | |
340 return true; | |
341 } | |
342 return !op->HasProperty(Operator::kNoThrow) || | |
343 !op->HasProperty(Operator::kNoDeopt); | |
344 } | |
345 | |
346 // Initialize unobservable_ with js_graph->graph->NodeCount() empty sets. | |
347 StoreStoreFinder::StoreStoreFinder(Editor* editor, JSGraph* js_graph, | |
348 Zone* temp_zone) | |
349 : AdvancedReducer(editor), | |
350 jsgraph_(js_graph), | |
351 temp_zone_(temp_zone), | |
352 unobservable_(js_graph->graph()->NodeCount(), | |
353 UnobservablesSet::Undetermined(), temp_zone), | |
354 to_remove_(temp_zone) {} | |
355 | |
356 StoreStoreFinder::~StoreStoreFinder() {} | |
357 | |
358 Reduction StoreStoreFinder::Reduce(Node* node) { | |
359 if (IsEligibleNode(node)) { | |
360 return ReduceEligibleNode(node); | |
361 } else { | |
362 return NoChange(); | |
363 } | |
364 } | |
365 | |
366 Reduction StoreStoreFinder::ReduceEligibleNode(Node* node) { | |
367 TRACE2(1, "Found eligible node: %4d:%s", node->id(), node->op()->mnemonic()); | |
368 | |
369 Node* cur = node; | |
370 TRACE2(1, " Recomputing unobservable-use-intersection for #%d:%s", cur->id(), | |
371 cur->op()->mnemonic()); | |
372 UnobservablesSet after_set = RecomputeUseIntersection(cur); | |
373 bool cur_set_changed; | |
240 | 374 |
241 do { | 375 do { |
242 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); | 376 UnobservablesSet before_set = RecomputeSet(cur, after_set); |
243 Offset offset = ToOffset(access); | 377 |
244 Node* object_input = current_node->InputAt(0); | 378 DCHECK_NOT_NULL(before_set.set()); |
245 | 379 TRACE2(2, " %d StoreObservations in new set", |
246 Node* previous = PreviousEffectBeforeStoreField(current_node); | 380 (int)before_set.set()->size()); |
247 | 381 |
248 // Find the map entry. | 382 UnobservablesSet* stored_for_node = &unobservable().at(cur->id()); |
249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); | 383 |
250 | 384 cur_set_changed = (*stored_for_node != before_set); |
251 bool present = find_result != futureStore.end(); | 385 |
252 Node* value = present ? find_result->second : nullptr; | 386 if (!stored_for_node->IsUndetermined() && !cur_set_changed) { |
253 | 387 // We will not be able to update the part of this chain above any more. |
254 if (present && value == object_input && AtMostTagged(access)) { | 388 // Exit. |
255 // Key was present, and the value equalled object_input. This means | 389 TRACE2(1, "+ No change: stabilized. Stopping this chain."); |
256 // that soon after in the effect chain, we will do a StoreField to the | 390 break; |
257 // same object with the same offset, therefore current_node can be | 391 } else if (stored_for_node->IsUndetermined() && !cur_set_changed) { |
258 // optimized away. Also, the future StoreField is at least as big as this | 392 DCHECK(before_set.IsEmpty()); |
259 // one. | 393 TRACE2(2, " Still empty set, but marking determined. Walking up."); |
260 // | |
261 // We don't need to update futureStore. | |
262 | |
263 Node* previous_effect = NodeProperties::GetEffectInput(current_node); | |
264 | |
265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, | |
266 nullptr, nullptr); | |
267 current_node->Kill(); | |
268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", | |
269 current_node->id(), offset, | |
270 MachineReprToString(access.machine_type.representation()), | |
271 object_input->id()); | |
272 } else if (present && value == object_input && !AtMostTagged(access)) { | |
273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", | |
274 current_node->id(), offset, | |
275 MachineReprToString(access.machine_type.representation()), | |
276 object_input->id()); | |
277 } else if (present && value != object_input && AtLeastTagged(access)) { | |
278 // Key was present, and the value did not equal object_input. This means | |
279 // that there is a StoreField to this offset in the future, but the | |
280 // object instance comes from a different Node. We pessimistically | |
281 // assume that we cannot optimize current_node away. However, we will | |
282 // guess that the current StoreField is more relevant than the future | |
283 // one, record the current StoreField in futureStore instead, and | |
284 // continue ascending up the chain. | |
285 find_result->second = object_input; | |
286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", | |
287 current_node->id(), offset, | |
288 MachineReprToString(access.machine_type.representation()), | |
289 object_input->id()); | |
290 } else if (!present && AtLeastTagged(access)) { | |
291 // Key was not present. This means that there is no matching | |
292 // StoreField to this offset in the future, so we cannot optimize | |
293 // current_node away. However, we will record the current StoreField | |
294 // in futureStore, and continue ascending up the chain. | |
295 futureStore.insert(std::make_pair(offset, object_input)); | |
296 TRACE( | |
297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", | |
298 current_node->id(), offset, | |
299 MachineReprToString(access.machine_type.representation()), | |
300 object_input->id()); | |
301 } else if (!AtLeastTagged(access)) { | |
302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), | |
303 offset, MachineReprToString(access.machine_type.representation()), | |
304 object_input->id()); | |
305 } else { | 394 } else { |
306 UNREACHABLE(); | 395 TRACE2(2, " Growing unreachable-set and walking up."); |
307 } | 396 } |
308 | 397 |
309 // Regardless of whether we eliminated node {current}, we want to | 398 // Overwrite vector in-place. |
310 // continue walking up the effect chain. | 399 *stored_for_node = before_set; |
311 | 400 |
312 current_node = previous; | 401 Node* previous = previous_effect_in_chain(cur); |
313 } while (current_node != nullptr && | 402 if (previous == nullptr && cur_set_changed) { |
314 current_node->op()->opcode() == IrOpcode::kStoreField); | 403 TRACE2(1, |
315 | 404 "- Reached top of chain; marking effect inputs for revisiting."); |
316 TRACE("finished"); | 405 for (int i = 0; i < cur->op()->EffectInputCount(); i++) { |
406 Node* input = NodeProperties::GetEffectInput(cur, i); | |
407 if (!CanObserveAnything(input)) { | |
408 Revisit(input); | |
409 } | |
410 } | |
411 | |
412 cur = nullptr; | |
413 } else if (previous == nullptr && !cur_set_changed) { | |
414 TRACE2(1, "+ Reached top of chain and stabilized."); | |
415 cur = nullptr; | |
416 } else { | |
417 // Update variables for next loop iteration | |
418 cur = previous; | |
419 DCHECK(effect_use_count(previous) == 1); | |
420 after_set = before_set; | |
421 if (FLAG_turbo_verify_store_elimination) { | |
422 DCHECK(after_set == RecomputeUseIntersection(cur)); | |
423 } | |
424 DCHECK_NOT_NULL(cur); | |
425 } | |
426 } while (cur != nullptr); | |
427 | |
428 return NoChange(); | |
429 } | |
430 | |
431 // Compute the intersection of the UnobservablesSets of all effect uses and | |
432 // return it. This function only works if {node} has an effect use. | |
433 // | |
434 // The result UnobservablesSet will always be determined. | |
435 UnobservablesSet StoreStoreFinder::RecomputeUseIntersection(Node* node) { | |
436 TRACE2(4, " recomputing use intersections of #%d:%s", node->id(), | |
437 node->op()->mnemonic()); | |
438 | |
439 // {first} == true indicates that we haven't looked at any elements yet. | |
440 // {first} == false indicates that cur_set is the intersection of at least one | |
441 // thing. | |
442 | |
443 bool first = true; | |
444 UnobservablesSet cur_set = UnobservablesSet::Undetermined(); // irrelevant | |
445 | |
446 for (Edge edge : node->use_edges()) { | |
447 // Skip non-effect edges | |
448 if (!NodeProperties::IsEffectEdge(edge)) { | |
449 continue; | |
450 } | |
451 | |
452 Node* use = edge.from(); | |
453 TRACE2(4, " found use %d", use->id()); | |
454 UnobservablesSet new_set = unobservable().at(use->id()); | |
455 // Include new_set in the intersection. | |
456 if (first) { | |
457 // Intersection of a one-element set is that one element | |
458 first = false; | |
459 cur_set = new_set; | |
460 } else { | |
461 // Take the intersection of cur_set and new_set. | |
462 cur_set = cur_set.Intersect(new_set, temp_zone()); | |
463 } | |
464 | |
465 if (FLAG_trace_store_elimination) { | |
466 // Serialise the UnobservablesSet. | |
467 std::ostringstream os; | |
468 os << "intersected with " << new_set << ", current intersection is " | |
469 << cur_set; | |
470 std::string msg = os.str(); | |
471 TRACE2(4, " %s", msg.c_str()); | |
472 } | |
473 } | |
474 | |
475 if (first) { | |
476 // There were no effect uses. | |
477 auto opcode = node->op()->opcode(); | |
478 // List of opcodes that may end this effect chain. The opcodes are not | |
479 // important to the soundness of this optimization; this serves as a | |
480 // general sanity check. Add opcodes to this list as it suits you. | |
481 // | |
482 // Everything is observable after these opcodes; return the empty set. | |
483 DCHECK_EXTRA( | |
484 opcode == IrOpcode::kReturn || opcode == IrOpcode::kTerminate || | |
485 opcode == IrOpcode::kDeoptimize || opcode == IrOpcode::kThrow, | |
486 "for #%d:%s", node->id(), node->op()->mnemonic()); | |
487 USE(opcode); // silence warning about unused variable | |
488 | |
489 TRACE2(3, " ..no effect uses, so unobservables-set = []"); | |
490 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
491 } else { | |
492 if (cur_set.IsUndetermined()) { | |
493 cur_set = UnobservablesSet::DeterminedEmpty(temp_zone()); | |
494 } | |
495 | |
496 if (FLAG_trace_store_elimination) { | |
497 // Serialise the UnobservablesSet. | |
498 std::ostringstream os; | |
499 os << cur_set; | |
500 std::string msg = os.str(); | |
501 TRACE2(2, " ..use-intersection: %s", msg.c_str()); | |
502 } | |
503 | |
504 return cur_set; | |
505 } | |
506 } | |
507 | |
508 UnobservablesSet UnobservablesSet::Undetermined() { return UnobservablesSet(); } | |
509 | |
510 UnobservablesSet::UnobservablesSet() : set_(nullptr) {} | |
511 | |
512 UnobservablesSet UnobservablesSet::DeterminedEmpty(Zone* zone) { | |
513 // Create a new empty UnobservablesSet. This allocates in the zone, and | |
514 // can probably be optimized to use a global singleton. | |
515 ZoneSet<StoreObservation>* empty_set = | |
516 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
517 ZoneSet<StoreObservation>(zone); | |
518 return UnobservablesSet(empty_set); | |
519 } | |
520 | |
521 // Computes the intersection of two UnobservablesSets. May return | |
522 // UnobservablesSet::Undetermined() instead of an empty UnobservablesSet for | |
523 // speed. | |
524 UnobservablesSet UnobservablesSet::Intersect(UnobservablesSet other, | |
525 Zone* zone) const { | |
526 if (set() == nullptr || other.set() == nullptr) { | |
527 return Undetermined(); | |
528 } else if (other.set() == nullptr) { | |
529 return *this; | |
530 } else { | |
531 ZoneSet<StoreObservation>* intersection = | |
532 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
533 ZoneSet<StoreObservation>(zone); | |
534 // Put the intersection of set() and other.set() in intersection. | |
535 set_intersection(set()->begin(), set()->end(), other.set()->begin(), | |
536 other.set()->end(), | |
537 std::inserter(*intersection, intersection->end())); | |
538 TRACE2(3, "intersected; result:"); | |
539 for (StoreObservation obs : *intersection) { | |
540 std::ostringstream os; | |
541 os << obs; | |
542 std::string msg = os.str(); | |
543 TRACE2(3, "- %s", msg.c_str()); | |
544 } | |
545 return UnobservablesSet(intersection); | |
546 } | |
547 } | |
548 | |
549 UnobservablesSet UnobservablesSet::Add(StoreObservation obs, Zone* zone) const { | |
550 bool present = (set()->find(obs) != set()->end()); | |
551 if (present) { | |
552 return *this; | |
553 } else { | |
554 // Make a new empty set. | |
555 ZoneSet<StoreObservation>* new_set = | |
556 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
557 ZoneSet<StoreObservation>(zone); | |
558 // Copy the old elements over. | |
559 *new_set = *set(); | |
560 // Add the new element. | |
561 bool inserted = new_set->insert(obs).second; | |
562 DCHECK(inserted); | |
563 USE(inserted); // silence warning about unused variable | |
564 | |
565 return UnobservablesSet(new_set); | |
566 } | |
567 } | |
568 | |
569 UnobservablesSet UnobservablesSet::RemoveSameOffset(StoreOffset offset, | |
570 Zone* zone) const { | |
571 // Make a new empty set. | |
572 ZoneSet<StoreObservation>* new_set = | |
573 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
574 ZoneSet<StoreObservation>(zone); | |
575 // Copy all elements over that have a different offset. | |
576 for (auto obs : *set()) { | |
577 if (obs.offset_ != offset) { | |
578 new_set->insert(obs); | |
579 } | |
580 } | |
581 | |
582 return UnobservablesSet(new_set); | |
583 } | |
584 | |
585 // Used for debugging. | |
586 std::ostream& operator<<(std::ostream& os, UnobservablesSet set) { | |
587 if (set.set() == nullptr) { | |
588 os << "(undetermined)"; | |
589 } else { | |
590 os << "["; | |
591 bool first = true; | |
592 for (StoreObservation obs : *set.set()) { | |
593 if (!first) { | |
594 os << ","; | |
595 } else { | |
596 first = false; | |
597 } | |
598 os << obs; | |
599 } | |
600 os << "]"; | |
601 } | |
602 return os; | |
603 } | |
604 | |
605 bool UnobservablesSet::operator==(const UnobservablesSet& other) const { | |
606 if (IsUndetermined() || other.IsUndetermined()) { | |
607 TRACE2(4, " (either is undetermined)"); | |
608 return IsEmpty() && other.IsEmpty(); | |
609 } else { | |
610 TRACE2(4, " (neither is undetermined)"); | |
611 // Both pointers guaranteed not to be nullptrs. | |
612 return *set() == *other.set(); | |
613 } | |
614 } | |
615 | |
616 bool UnobservablesSet::operator!=(const UnobservablesSet& other) const { | |
617 return !(*this == other); | |
618 } | |
619 | |
620 bool operator==(StoreObservation a, StoreObservation b) { | |
621 return (a.id_ == b.id_) && (a.offset_ == b.offset_); | |
622 } | |
623 | |
624 bool operator<(StoreObservation a, StoreObservation b) { | |
625 return (a.id_ < b.id_) || (a.id_ == b.id_ && a.offset_ < b.offset_); | |
626 } | |
627 | |
628 std::ostream& operator<<(std::ostream& os, StoreObservation obs) { | |
629 os << "#" << obs.id_ << "[+" << obs.offset_ << "]"; | |
630 return os; | |
317 } | 631 } |
318 | 632 |
319 } // namespace compiler | 633 } // namespace compiler |
320 } // namespace internal | 634 } // namespace internal |
321 } // namespace v8 | 635 } // namespace v8 |
OLD | NEW |