Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/compiler/store-store-elimination.h" | 5 #include "src/compiler/store-store-elimination.h" |
| 6 | 6 |
| 7 #include "src/compiler/all-nodes.h" | 7 #include "src/compiler/all-nodes.h" |
| 8 #include "src/compiler/js-graph.h" | 8 #include "src/compiler/js-graph.h" |
| 9 #include "src/compiler/node-properties.h" | 9 #include "src/compiler/node-properties.h" |
| 10 #include "src/compiler/simplified-operator.h" | 10 #include "src/compiler/simplified-operator.h" |
| 11 | 11 |
| 12 namespace v8 { | 12 namespace v8 { |
| 13 namespace internal { | 13 namespace internal { |
| 14 namespace compiler { | 14 namespace compiler { |
| 15 | 15 |
| 16 #define TRACE(fmt, ...) \ | 16 #define TRACE1(fmt, ...) \ |
| 17 do { \ | 17 do { \ |
| 18 if (FLAG_trace_store_elimination) { \ | 18 if (FLAG_trace_store_elimination) { \ |
| 19 PrintF("StoreStoreElimination::ReduceEligibleNode: " fmt "\n", \ | 19 PrintF("StoreStoreElimination::Run: " fmt "\n", ##__VA_ARGS__); \ |
| 20 ##__VA_ARGS__); \ | 20 } \ |
| 21 } \ | |
| 22 } while (false) | 21 } while (false) |
| 23 | 22 |
| 24 // A simple store-store elimination. When the effect chain contains the | 23 #define TRACE2(level, fmt, ...) \ |
| 25 // following sequence, | 24 do { \ |
| 25 if (FLAG_trace_store_elimination && \ | |
| 26 FLAG_trace_store_elimination_level >= (level)) { \ | |
| 27 PrintF("StoreStoreFinder: [%d] " fmt "\n", level, ##__VA_ARGS__); \ | |
| 28 } \ | |
| 29 } while (false) | |
|
Jarin
2016/07/20 11:43:33
This feels a bit over-engineered - normally, you w
bgeron
2016/07/20 16:27:16
Because trace level 1 is ~37% as big as trace leve
| |
| 30 | |
| 31 // CHECK_EXTRA is like CHECK, but has two or more arguments: a boolean | |
| 32 // expression, a format string, and any number of extra arguments. The boolean | |
| 33 // expression will be evaluated at runtime. If it evaluates to false, then an | |
| 34 // error message will be shown containing the condition, as well as the extra | |
| 35 // info formatted like with printf. | |
| 36 #define CHECK_EXTRA(condition, fmt, ...) \ | |
| 37 do { \ | |
| 38 if (V8_UNLIKELY(!(condition))) { \ | |
| 39 V8_Fatal(__FILE__, __LINE__, "Check failed: %s. Extra info: " fmt, \ | |
| 40 #condition, ##__VA_ARGS__); \ | |
| 41 } \ | |
| 42 } while (0) | |
| 43 | |
| 44 #ifdef DEBUG | |
| 45 #define DCHECK_EXTRA(condition, fmt, ...) \ | |
| 46 CHECK_EXTRA(condition, fmt, ##__VA_ARGS__) | |
| 47 #else | |
| 48 #define DCHECK_EXTRA(condition, fmt, ...) ((void)0) | |
| 49 #endif | |
| 50 | |
| 51 // Store-store elimination. | |
| 26 // | 52 // |
| 27 // - StoreField[[+off_1]](x1, y1) | 53 // The aim of this optimization is to detect the following pattern in the |
| 28 // - StoreField[[+off_2]](x2, y2) | 54 // effect graph: |
| 29 // - StoreField[[+off_3]](x3, y3) | |
| 30 // ... | |
| 31 // - StoreField[[+off_n]](xn, yn) | |
| 32 // | 55 // |
| 33 // where the xes are the objects and the ys are the values to be stored, then | 56 // - StoreField[+24, kRepTagged](263, ...) |
| 34 // we are going to say that a store is superfluous if the same offset of the | |
| 35 // same object will be stored to in the future. If off_i == off_j and xi == xj | |
| 36 // and i < j, then we optimize the i'th StoreField away. | |
| 37 // | 57 // |
| 38 // This optimization should be initiated on the last StoreField in such a | 58 // ... lots of nodes from which the field at offset 24 of the object |
| 39 // sequence. | 59 // returned by node #263 cannot be observed ... |
| 40 // | 60 // |
| 41 // The algorithm works by walking the effect chain from the last StoreField | 61 // - StoreField[+24, kRepTagged](263, ...) |
| 42 // upwards. While walking, we maintain a map {futureStore} from offsets to | |
| 43 // nodes; initially it is empty. As we walk the effect chain upwards, if | |
| 44 // futureStore[off] = n, then any store to node {n} with offset {off} is | |
| 45 // guaranteed to be useless because we do a tagged-width[2] store to that | |
| 46 // offset of that object in the near future anyway. For example, for this | |
| 47 // effect chain | |
| 48 // | 62 // |
| 49 // 71: StoreField(60, 0) | 63 // In such situations, the earlier StoreField cannot be observed, and can be |
| 50 // 72: StoreField(65, 8) | 64 // eliminated. This optimization should work for any offset and input node, of |
| 51 // 73: StoreField(63, 8) | 65 // course. |
| 52 // 74: StoreField(65, 16) | |
| 53 // 75: StoreField(62, 8) | |
| 54 // | 66 // |
| 55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}. | 67 // The optimization also works across splits. It currently does not work for |
| 68 // loops, because we tend to put a stack check in loops, and like deopts, | |
| 69 // stack checks can observe anything. | |
| 56 // | 70 // |
| 57 // Here is the complete process. | 71 // The implementation consists of two phases. The first phase |
| 72 // (StoreStoreFinder) analyzes the graph, and at every point determines what | |
| 73 // fields are guaranteed not to be observed in the future. The second phase | |
| 74 // then eliminates all StoreField nodes that are known to be unobservable. | |
| 58 // | 75 // |
| 59 // - We are at the end of a sequence of consecutive StoreFields. | 76 // Per node, we store a data structure called the UnobservablesSet, which is a |
| 60 // - We start out with futureStore = empty. | 77 // set of pairs of nodes and offsets. For instance, a Return always has an |
| 61 // - We then walk the effect chain upwards to find the next StoreField [1]. | 78 // empty UnobservablesSet, as does a deoptimization, throw, or stack check. |
| 79 // The UnobservablesSet of a StoreField consists of the node and the offset | |
| 80 // that it stores to, as well as the UnobservablesSet of the next node in the | |
| 81 // effect chain. When there are multiple succeeding nodes in the effect graph, | |
| 82 // then the intersection of their UnobservablesSet is taken, and then the | |
| 83 // offset of that StoreField is added. | |
| 62 // | 84 // |
| 63 // 1. If the offset is not a key of {futureStore} yet, we put it in. | 85 // To allow for cycles in the effect graph, the implementation does a fixpoint |
| 64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a | 86 // computation. Unfortunately, we compute a least fixpoint not a greatest |
| 65 // different node, we overwrite futureStore[offset] with the current node. | 87 // fixpoint. In practical terms this means that if we have a loop without any |
| 66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals | 88 // deopt or stack check, and a StoreField inside and after the loop, then we |
| 67 // this node, we eliminate this StoreField. | 89 // will not detect that the StoreField inside the loop is unobservable. This |
| 90 // means that theoretically this implementation is slightly suboptimal. | |
| 91 // However, in practice we always put a stack check inside a loop, so the | |
| 92 // implementation should be optimal in practice. | |
| 68 // | 93 // |
| 69 // As long as the current effect input points to a node with a single effect | 94 // To implement this fixpoint, we use a special value |
| 70 // output, and as long as its opcode is StoreField, we keep traversing | 95 // UnobservablesSet::Undetermined(), which is functionally empty. When a |
| 71 // upwards. | 96 // node's UnobservablesSet is undetermined, then that means that we have no |
| 97 // knowledge about what fields are unobservable at that point. In contrast, | |
| 98 // when a node's UnobservablesSet is determined but empty, and the node is not | |
| 99 // marked for revisiting, then the node's effect input is determined and up- | |
| 100 // to-date. Initially, the UnobservablesSet of all nodes is set to | |
| 101 // undetermined. | |
| 72 // | 102 // |
| 103 // We apply some sharing to save memory. The class UnobservablesSet is only a | |
| 104 // pointer wide, and a copy does not use any heap (or temp_zone) memory. Most | |
| 105 // changes to an UnobservablesSet allocate in the temp_zone. | |
|
Jarin
2016/07/20 11:43:33
This blurb should go into the UnobservablesSet cla
bgeron
2016/07/20 16:27:16
Done.
| |
| 106 | |
| 107 // An ideal store-store elimination would keep for every object a set of byte | |
| 108 // ranges into that object which are unobservable. We skimp on this, and only | |
| 109 // store a set of offsets. If offset {off} is in the set, then that means that | |
| 110 // bytes {off} up to but excluding {off+taggedSize} are unobservable, where | |
| 111 // {taggedSize} is the size in bytes of a tagged value. We don't record that | |
| 112 // writes smaller than taggedSize are unobservable, and we don't optimize away | |
| 113 // writes larger than taggedSize. | |
| 73 // | 114 // |
| 74 // | 115 // The result should be that this store-store elimination is fast enough, and |
| 75 // footnotes: | 116 // also optimizes away most superfluous stores. |
| 76 // | |
| 77 // [1] We make sure that we only traverse the linear part, that is, the part | |
| 78 // where every node has exactly one incoming and one outgoing effect edge. | |
| 79 // Also, we only keep walking upwards as long as we keep finding consecutive | |
| 80 // StoreFields on the same node. | |
| 81 // | |
| 82 // [2] This optimization is sound only in certain cases. Specifically, the | |
| 83 // presence of a future store to {off} by itself does not automatically mean | |
| 84 // that earlier stores to {off} are superfluous: a future narrow store does | |
| 85 // not obviate an earlier wide store. However, future stores of a certain | |
| 86 // size do obviate stores to the same offset of lesser or equal size. | |
| 87 // | |
| 88 // It turns out that we are most interested in stores of "tagged" size, | |
| 89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In | |
| 90 // {futureStore}, we record future writes that are of at least this size. | |
| 91 // The three cases are actually a bit more subtle. | |
| 92 // | |
| 93 // 1. If the offset is not a key of {futureStore} and the StoreField is of | |
| 94 // "tagged" size or wider, then we put it in. | |
| 95 // 2. If the offset is present in {futureStore} but the value is different, | |
| 96 // then we overwrite the value if the current StoreField is of "tagged" | |
| 97 // size or wider. | |
| 98 // 3. If the offset is present and the value matches the node, and the | |
| 99 // current StoreField is AT MOST of "tagged" size, then we eliminate this | |
| 100 // StoreField. | |
| 101 // | |
| 102 // Examples of stores that we do not detect as superfluous: 2-byte stores | |
| 103 // followed by 2-byte stores to the same offset; 16-byte stores followed by | |
| 104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive | |
| 105 // float64 stores as superfluous, and on x86 we do not detect consecutive | |
| 106 // int32 stores as superfluous. | |
| 107 | 117 |
| 108 // At a late stage, we realized that this code is more complicated than it | 118 // Assumption: every byte of a JS object is only ever accessed through one |
| 109 // needs to be: if we store a set of pairs (offset, node), the code simplifies | 119 // offset. For instance, byte 15 of a given object may be accessed using a |
| 110 // to 3 cases instead of 6. We could even store a map from nodes to sets of | 120 // two-byte read at offset 14, or a four-byte read at offset 12, but never |
| 111 // bytes. | 121 // both in the same program. |
| 112 | 122 |
| 113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone) | 123 // This implementation needs all dead nodes removed from the graph, and the |
| 114 : jsgraph_(js_graph), temp_zone_(temp_zone) {} | 124 // graph should be trimmed. |
| 115 | |
| 116 StoreStoreElimination::~StoreStoreElimination() {} | |
| 117 | |
| 118 void StoreStoreElimination::Run() { | |
| 119 // The store-store elimination performs work on chains of certain types of | |
| 120 // nodes. The elimination must be invoked on the lowest node in such a | |
| 121 // chain; we have a helper function IsEligibleNode that returns true | |
| 122 // precisely on the lowest node in such a chain. | |
| 123 // | |
| 124 // Because the elimination removes nodes from the graph, even remove nodes | |
| 125 // that the elimination was not invoked on, we cannot use a normal | |
| 126 // AdvancedReducer but we manually find which nodes to invoke the | |
| 127 // elimination on. Then in a next step, we invoke the elimination for each | |
| 128 // node that was eligible. | |
| 129 | |
| 130 NodeVector eligible(temp_zone()); // loops over all nodes | |
| 131 AllNodes all(temp_zone(), jsgraph()->graph()); | |
| 132 | |
| 133 for (Node* node : all.live) { | |
| 134 if (IsEligibleNode(node)) { | |
| 135 eligible.push_back(node); | |
| 136 } | |
| 137 } | |
| 138 | |
| 139 for (Node* node : eligible) { | |
| 140 ReduceEligibleNode(node); | |
| 141 } | |
| 142 } | |
| 143 | 125 |
| 144 namespace { | 126 namespace { |
| 145 | 127 |
| 146 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too | |
| 147 // few. | |
| 148 typedef uint16_t Offset; | |
| 149 | |
| 150 // To safely cast an offset from a FieldAccess, which has a wider range | 128 // To safely cast an offset from a FieldAccess, which has a wider range |
| 151 // (namely int). | 129 // (namely int). |
| 152 Offset ToOffset(int offset) { | 130 StoreOffset to_offset(int offset) { |
|
Jarin
2016/07/20 11:43:33
Function names should be CamelCase. (Here and belo
bgeron
2016/07/20 16:27:16
Yeah, you're right. I was thinking of the "simple
| |
| 153 CHECK(0 <= offset && offset < (1 << 8 * sizeof(Offset))); | 131 CHECK(0 <= offset && offset < (1 << 8 * sizeof(StoreOffset))); |
| 154 return (Offset)offset; | 132 return (StoreOffset)offset; |
| 155 } | 133 } |
| 156 | 134 |
| 157 Offset ToOffset(const FieldAccess& access) { return ToOffset(access.offset); } | 135 StoreOffset to_offset(const FieldAccess& access) { |
| 136 return to_offset(access.offset); | |
| 137 } | |
| 158 | 138 |
| 159 // If node has a single effect use, return that node. If node has no or | 139 // If node has a single effect use, return that node. If node has no or |
| 160 // multiple effect uses, return nullptr. | 140 // multiple effect uses, return nullptr. |
| 161 Node* SingleEffectUse(Node* node) { | 141 Node* single_effect_use(Node* node) { |
| 162 Node* last_use = nullptr; | 142 Node* last_use = nullptr; |
| 163 for (Edge edge : node->use_edges()) { | 143 for (Edge edge : node->use_edges()) { |
| 164 if (!NodeProperties::IsEffectEdge(edge)) { | 144 if (!NodeProperties::IsEffectEdge(edge)) { |
| 165 continue; | 145 continue; |
| 166 } | 146 } |
| 167 if (last_use != nullptr) { | 147 if (last_use != nullptr) { |
| 168 // more than one | 148 // more than one |
| 169 return nullptr; | 149 return nullptr; |
| 170 } | 150 } |
| 171 last_use = edge.from(); | 151 last_use = edge.from(); |
| 172 DCHECK_NOT_NULL(last_use); | 152 DCHECK_NOT_NULL(last_use); |
| 173 } | 153 } |
| 174 return last_use; | 154 return last_use; |
| 175 } | 155 } |
| 176 | 156 |
| 177 // Return true if node is the last consecutive StoreField node in a linear | 157 // If there is a node before {node} in the effect chain, and if this part of |
| 178 // part of the effect chain. | 158 // the effect chain is linear (no other effect uses of that previous node), |
| 179 bool IsEndOfStoreFieldChain(Node* node) { | 159 // then return that previous node. Otherwise, return nullptr. |
| 180 Node* next_on_chain = SingleEffectUse(node); | 160 Node* previous_effect_in_chain(Node* node) { |
| 181 return (next_on_chain == nullptr || | 161 if (node->op()->EffectInputCount() == 1) { |
| 182 next_on_chain->op()->opcode() != IrOpcode::kStoreField); | 162 Node* previous = NodeProperties::GetEffectInput(node); |
| 183 } | 163 if (previous != nullptr && node == single_effect_use(previous)) { |
| 184 | 164 return previous; |
| 185 // The argument must be a StoreField node. If there is a node before it in the | 165 } else { |
| 186 // effect chain, and if this part of the effect chain is linear (no other | 166 return nullptr; |
| 187 // effect uses of that previous node), then return that previous node. | 167 } |
| 188 // Otherwise, return nullptr. | |
| 189 // | |
| 190 // The returned node need not be a StoreField. | |
| 191 Node* PreviousEffectBeforeStoreField(Node* node) { | |
| 192 DCHECK_EQ(node->op()->opcode(), IrOpcode::kStoreField); | |
| 193 DCHECK_EQ(node->op()->EffectInputCount(), 1); | |
| 194 | |
| 195 Node* previous = NodeProperties::GetEffectInput(node); | |
| 196 if (previous != nullptr && node == SingleEffectUse(previous)) { | |
| 197 return previous; | |
| 198 } else { | 168 } else { |
| 199 return nullptr; | 169 return nullptr; |
| 200 } | 170 } |
| 201 } | 171 } |
| 202 | 172 |
| 203 size_t rep_size_of(MachineRepresentation rep) { | 173 size_t rep_size_of(MachineRepresentation rep) { |
| 204 return ((size_t)1) << ElementSizeLog2Of(rep); | 174 return ((size_t)1) << ElementSizeLog2Of(rep); |
| 205 } | 175 } |
| 206 size_t rep_size_of(FieldAccess access) { | 176 size_t rep_size_of(FieldAccess access) { |
| 207 return rep_size_of(access.machine_type.representation()); | 177 return rep_size_of(access.machine_type.representation()); |
| 208 } | 178 } |
| 209 | 179 |
| 210 bool AtMostTagged(FieldAccess access) { | 180 bool at_most_tagged(FieldAccess access) { |
| 211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); | 181 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged); |
| 212 } | 182 } |
| 213 | 183 |
| 214 bool AtLeastTagged(FieldAccess access) { | 184 bool at_least_tagged(FieldAccess access) { |
| 215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); | 185 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged); |
| 216 } | 186 } |
| 217 | 187 |
| 188 int effect_use_count(Node* node) { | |
| 189 int uses = 0; | |
| 190 for (const Edge edge : node->use_edges()) { | |
| 191 if (NodeProperties::IsEffectEdge(edge)) { | |
| 192 uses++; | |
| 193 } | |
| 194 } | |
| 195 return uses; | |
| 196 } | |
| 197 | |
| 218 } // namespace | 198 } // namespace |
| 219 | 199 |
| 220 bool StoreStoreElimination::IsEligibleNode(Node* node) { | 200 void StoreStoreElimination::Run(JSGraph* js_graph, Zone* temp_zone) { |
| 221 return (node->op()->opcode() == IrOpcode::kStoreField) && | 201 // Find superfluous nodes |
| 222 IsEndOfStoreFieldChain(node); | 202 GraphReducer graph_reducer(temp_zone, js_graph->graph(), js_graph->Dead()); |
| 223 } | 203 StoreStoreFinder finder(&graph_reducer, js_graph, temp_zone); |
| 224 | 204 graph_reducer.AddReducer(&finder); |
| 225 void StoreStoreElimination::ReduceEligibleNode(Node* node) { | 205 graph_reducer.ReduceGraph(); |
| 226 DCHECK(IsEligibleNode(node)); | 206 |
| 227 | 207 // Remove superfluous nodes |
| 228 // if (FLAG_trace_store_elimination) { | 208 TRACE1("Eliminating %d nodes", (int)finder.to_remove_const().size()); |
| 229 // PrintF("** StoreStoreElimination::ReduceEligibleNode: activated: | 209 for (Node* node : finder.to_remove_const()) { |
| 230 // #%d\n", | 210 TRACE1(" Eliminating node: #%d:%s", node->id(), node->op()->mnemonic()); |
| 231 // node->id()); | 211 Node* previous_effect = NodeProperties::GetEffectInput(node); |
| 232 // } | 212 NodeProperties::ReplaceUses(node, nullptr, previous_effect, nullptr, |
| 233 | 213 nullptr); |
| 234 TRACE("activated: #%d", node->id()); | 214 node->Kill(); |
| 235 | 215 } |
| 236 // Initialize empty futureStore. | 216 } |
| 237 ZoneMap<Offset, Node*> futureStore(temp_zone()); | 217 |
| 238 | 218 bool StoreStoreFinder::IsEligibleNode(Node* node) { |
| 239 Node* current_node = node; | 219 DCHECK_LE(node->op()->EffectOutputCount(), 1); |
| 220 | |
| 221 TRACE2(5, "%d e-inputs, %d e-outputs, %d e-uses for node %d:%s", | |
| 222 node->op()->EffectInputCount(), node->op()->EffectOutputCount(), | |
| 223 effect_use_count(node), node->id(), node->op()->mnemonic()); | |
| 224 | |
| 225 bool isEffectful = (node->op()->EffectInputCount() >= 1); | |
| 226 bool endsEffectChain = | |
| 227 (effect_use_count(node) == 1) | |
| 228 ? (single_effect_use(node)->op()->EffectInputCount() >= 2) | |
| 229 : true; | |
| 230 return isEffectful && endsEffectChain; | |
| 231 } | |
| 232 | |
| 233 // Recompute unobservables-set for a node. Will also mark superfluous nodes | |
| 234 // as to be removed. | |
| 235 | |
| 236 UnobservablesSet StoreStoreFinder::RecomputeSet(Node* node, | |
| 237 UnobservablesSet uses) { | |
| 238 // Usually, we decide using the operator properties that an operator | |
| 239 // observes everything or observes nothing (see CanObserveAnything, | |
| 240 // CanObserveNothing), but there are some opcodes we treat specially. | |
| 241 switch (node->op()->opcode()) { | |
| 242 case IrOpcode::kStoreField: { | |
| 243 Node* stored_to = node->InputAt(0); | |
| 244 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
| 245 StoreOffset offset = to_offset(access); | |
| 246 | |
| 247 StoreObservation observation = {stored_to->id(), offset}; | |
| 248 bool presentInSet = uses.Contains(observation); | |
| 249 | |
| 250 if (presentInSet && at_most_tagged(access)) { | |
| 251 TRACE2(1, " #%d is StoreField[+%d,%s](#%d), unobservable", node->id(), | |
| 252 offset, | |
| 253 MachineReprToString(access.machine_type.representation()), | |
| 254 stored_to->id()); | |
| 255 to_remove().insert(node); | |
| 256 return uses; | |
| 257 } else if (presentInSet && !at_most_tagged(access)) { | |
| 258 TRACE2(1, | |
| 259 " #%d is StoreField[+%d,%s](#%d), repeated in future but too " | |
| 260 "big to optimize away", | |
| 261 node->id(), offset, | |
| 262 MachineReprToString(access.machine_type.representation()), | |
| 263 stored_to->id()); | |
| 264 return uses; | |
| 265 } else if (!presentInSet && at_least_tagged(access)) { | |
| 266 TRACE2(1, | |
| 267 " #%d is StoreField[+%d,%s](#%d), observable, recording in set", | |
| 268 node->id(), offset, | |
| 269 MachineReprToString(access.machine_type.representation()), | |
| 270 stored_to->id()); | |
| 271 return uses.Add(observation, temp_zone()); | |
| 272 } else if (!presentInSet && !at_least_tagged(access)) { | |
| 273 TRACE2(1, | |
| 274 " #%d is StoreField[+%d,%s](#%d), observable but too small to " | |
| 275 "record", | |
| 276 node->id(), offset, | |
| 277 MachineReprToString(access.machine_type.representation()), | |
| 278 stored_to->id()); | |
| 279 return uses; | |
| 280 } else { | |
| 281 UNREACHABLE(); | |
| 282 } | |
| 283 break; | |
| 284 } | |
| 285 case IrOpcode::kLoadField: { | |
| 286 Node* loaded_from = node->InputAt(0); | |
| 287 FieldAccess access = OpParameter<FieldAccess>(node->op()); | |
| 288 StoreOffset offset = to_offset(access); | |
| 289 | |
| 290 TRACE2(1, | |
| 291 " #%d is LoadField[+%d,%s](#%d), removing all offsets [+%d] from " | |
| 292 "set", | |
| 293 node->id(), offset, | |
| 294 MachineReprToString(access.machine_type.representation()), | |
| 295 loaded_from->id(), offset); | |
| 296 | |
| 297 return uses.RemoveSameOffset(offset, temp_zone()); | |
| 298 break; | |
| 299 } | |
| 300 default: | |
| 301 if (CanObserveNothing(node)) { | |
| 302 TRACE2(1, " #%d:%s can observe nothing, set stays unchanged", | |
| 303 node->id(), node->op()->mnemonic()); | |
| 304 return uses; | |
| 305 } else if (CanObserveAnything(node)) { | |
| 306 TRACE2(1, " #%d:%s can observe everything, recording empty set", | |
| 307 node->id(), node->op()->mnemonic()); | |
| 308 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
| 309 } else { | |
| 310 // It is safe to turn this check off in the future, but it is better | |
| 311 // to list opcodes in CanObserveNothing, in CanObserveAnything, or if | |
| 312 // you don't know, to add another case inside this DCHECK_EXTRA. | |
| 313 DCHECK_EXTRA(node->op()->opcode() == IrOpcode::kCall, "%s", | |
| 314 node->op()->mnemonic()); | |
| 315 TRACE2(1, | |
| 316 " cannot determine unobservables-set for #%d:%s; " | |
| 317 "conservatively recording empty set", | |
| 318 node->id(), node->op()->mnemonic()); | |
| 319 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
| 320 } | |
| 321 } | |
| 322 UNREACHABLE(); | |
| 323 return UnobservablesSet::Undetermined(); | |
| 324 } | |
| 325 | |
| 326 bool StoreStoreFinder::CanObserveNothing(Node* node) { | |
| 327 Operator::Properties mask = | |
| 328 Operator::kNoRead | Operator::kNoDeopt | Operator::kNoThrow; | |
| 329 | |
| 330 return (node->op()->properties() & mask) == mask || | |
|
Jarin
2016/07/20 11:43:33
node->op()->HasProperty(Operator::kNoRead | Operat
bgeron
2016/07/20 16:27:16
That doesn't compile, because Operator::kNoRead |
| |
| 331 node->opcode() == IrOpcode::kAllocate || | |
| 332 node->opcode() == IrOpcode::kCheckedLoad || | |
| 333 node->opcode() == IrOpcode::kLoadElement; | |
| 334 } | |
| 335 | |
| 336 bool StoreStoreFinder::CanObserveAnything(Node* node) { | |
| 337 const Operator* op = node->op(); | |
| 338 auto opcode = op->opcode(); | |
| 339 if (opcode == IrOpcode::kLoad) { | |
| 340 return true; | |
| 341 } | |
| 342 return !op->HasProperty(Operator::kNoThrow) || | |
| 343 !op->HasProperty(Operator::kNoDeopt); | |
| 344 } | |
| 345 | |
| 346 // Initialize unobservable_ with js_graph->graph->NodeCount() empty sets. | |
| 347 StoreStoreFinder::StoreStoreFinder(Editor* editor, JSGraph* js_graph, | |
| 348 Zone* temp_zone) | |
| 349 : AdvancedReducer(editor), | |
| 350 jsgraph_(js_graph), | |
| 351 temp_zone_(temp_zone), | |
| 352 unobservable_(js_graph->graph()->NodeCount(), | |
| 353 UnobservablesSet::Undetermined(), temp_zone), | |
| 354 to_remove_(temp_zone) {} | |
| 355 | |
| 356 StoreStoreFinder::~StoreStoreFinder() {} | |
| 357 | |
| 358 Reduction StoreStoreFinder::Reduce(Node* node) { | |
| 359 if (IsEligibleNode(node)) { | |
| 360 return ReduceEligibleNode(node); | |
| 361 } else { | |
| 362 return NoChange(); | |
| 363 } | |
| 364 } | |
| 365 | |
| 366 Reduction StoreStoreFinder::ReduceEligibleNode(Node* node) { | |
| 367 TRACE2(1, "Found eligible node: %4d:%s", node->id(), node->op()->mnemonic()); | |
| 368 | |
| 369 Node* cur = node; | |
| 370 TRACE2(1, " Recomputing unobservable-use-intersection for #%d:%s", cur->id(), | |
| 371 cur->op()->mnemonic()); | |
| 372 UnobservablesSet after_set = RecomputeUseIntersection(cur); | |
| 373 bool cur_set_changed; | |
| 240 | 374 |
| 241 do { | 375 do { |
| 242 FieldAccess access = OpParameter<FieldAccess>(current_node->op()); | 376 UnobservablesSet before_set = RecomputeSet(cur, after_set); |
| 243 Offset offset = ToOffset(access); | 377 |
| 244 Node* object_input = current_node->InputAt(0); | 378 DCHECK_NOT_NULL(before_set.set()); |
| 245 | 379 TRACE2(2, " %d StoreObservations in new set", |
| 246 Node* previous = PreviousEffectBeforeStoreField(current_node); | 380 (int)before_set.set()->size()); |
| 247 | 381 |
| 248 // Find the map entry. | 382 UnobservablesSet* stored_for_node = &unobservable().at(cur->id()); |
| 249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset); | 383 |
| 250 | 384 cur_set_changed = (*stored_for_node != before_set); |
| 251 bool present = find_result != futureStore.end(); | 385 |
| 252 Node* value = present ? find_result->second : nullptr; | 386 if (!stored_for_node->IsUndetermined() && !cur_set_changed) { |
| 253 | 387 // We will not be able to update the part of this chain above any more. |
| 254 if (present && value == object_input && AtMostTagged(access)) { | 388 // Exit. |
| 255 // Key was present, and the value equalled object_input. This means | 389 TRACE2(1, "+ No change: stabilized. Stopping this chain."); |
| 256 // that soon after in the effect chain, we will do a StoreField to the | 390 break; |
| 257 // same object with the same offset, therefore current_node can be | 391 } else if (stored_for_node->IsUndetermined() && !cur_set_changed) { |
| 258 // optimized away. Also, the future StoreField is at least as big as this | 392 DCHECK(before_set.IsEmpty()); |
| 259 // one. | 393 TRACE2(2, " Still empty set, but marking determined. Walking up."); |
| 260 // | |
| 261 // We don't need to update futureStore. | |
| 262 | |
| 263 Node* previous_effect = NodeProperties::GetEffectInput(current_node); | |
| 264 | |
| 265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect, | |
| 266 nullptr, nullptr); | |
| 267 current_node->Kill(); | |
| 268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated", | |
| 269 current_node->id(), offset, | |
| 270 MachineReprToString(access.machine_type.representation()), | |
| 271 object_input->id()); | |
| 272 } else if (present && value == object_input && !AtMostTagged(access)) { | |
| 273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated", | |
| 274 current_node->id(), offset, | |
| 275 MachineReprToString(access.machine_type.representation()), | |
| 276 object_input->id()); | |
| 277 } else if (present && value != object_input && AtLeastTagged(access)) { | |
| 278 // Key was present, and the value did not equal object_input. This means | |
| 279 // that there is a StoreField to this offset in the future, but the | |
| 280 // object instance comes from a different Node. We pessimistically | |
| 281 // assume that we cannot optimize current_node away. However, we will | |
| 282 // guess that the current StoreField is more relevant than the future | |
| 283 // one, record the current StoreField in futureStore instead, and | |
| 284 // continue ascending up the chain. | |
| 285 find_result->second = object_input; | |
| 286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map", | |
| 287 current_node->id(), offset, | |
| 288 MachineReprToString(access.machine_type.representation()), | |
| 289 object_input->id()); | |
| 290 } else if (!present && AtLeastTagged(access)) { | |
| 291 // Key was not present. This means that there is no matching | |
| 292 // StoreField to this offset in the future, so we cannot optimize | |
| 293 // current_node away. However, we will record the current StoreField | |
| 294 // in futureStore, and continue ascending up the chain. | |
| 295 futureStore.insert(std::make_pair(offset, object_input)); | |
| 296 TRACE( | |
| 297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map", | |
| 298 current_node->id(), offset, | |
| 299 MachineReprToString(access.machine_type.representation()), | |
| 300 object_input->id()); | |
| 301 } else if (!AtLeastTagged(access)) { | |
| 302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(), | |
| 303 offset, MachineReprToString(access.machine_type.representation()), | |
| 304 object_input->id()); | |
| 305 } else { | 394 } else { |
| 306 UNREACHABLE(); | 395 TRACE2(2, " Growing unreachable-set and walking up."); |
| 307 } | 396 } |
| 308 | 397 |
| 309 // Regardless of whether we eliminated node {current}, we want to | 398 // Overwrite vector in-place. |
| 310 // continue walking up the effect chain. | 399 *stored_for_node = before_set; |
| 311 | 400 |
| 312 current_node = previous; | 401 Node* previous = previous_effect_in_chain(cur); |
| 313 } while (current_node != nullptr && | 402 if (previous == nullptr && cur_set_changed) { |
| 314 current_node->op()->opcode() == IrOpcode::kStoreField); | 403 TRACE2(1, |
| 315 | 404 "- Reached top of chain; marking effect inputs for revisiting."); |
| 316 TRACE("finished"); | 405 for (int i = 0; i < cur->op()->EffectInputCount(); i++) { |
| 406 Node* input = NodeProperties::GetEffectInput(cur, i); | |
| 407 if (!CanObserveAnything(input)) { | |
| 408 Revisit(input); | |
| 409 } | |
| 410 } | |
| 411 | |
| 412 cur = nullptr; | |
| 413 } else if (previous == nullptr && !cur_set_changed) { | |
| 414 TRACE2(1, "+ Reached top of chain and stabilized."); | |
| 415 cur = nullptr; | |
| 416 } else { | |
| 417 // Update variables for next loop iteration | |
| 418 cur = previous; | |
| 419 DCHECK(effect_use_count(previous) == 1); | |
| 420 after_set = before_set; | |
| 421 if (FLAG_turbo_verify_store_elimination) { | |
| 422 DCHECK(after_set == RecomputeUseIntersection(cur)); | |
| 423 } | |
| 424 DCHECK_NOT_NULL(cur); | |
| 425 } | |
| 426 } while (cur != nullptr); | |
| 427 | |
| 428 return NoChange(); | |
| 429 } | |
| 430 | |
| 431 // Compute the intersection of the UnobservablesSets of all effect uses and | |
| 432 // return it. This function only works if {node} has an effect use. | |
| 433 // | |
| 434 // The result UnobservablesSet will always be determined. | |
| 435 UnobservablesSet StoreStoreFinder::RecomputeUseIntersection(Node* node) { | |
| 436 TRACE2(4, " recomputing use intersections of #%d:%s", node->id(), | |
| 437 node->op()->mnemonic()); | |
| 438 | |
| 439 // {first} == true indicates that we haven't looked at any elements yet. | |
| 440 // {first} == false indicates that cur_set is the intersection of at least one | |
| 441 // thing. | |
| 442 | |
| 443 bool first = true; | |
| 444 UnobservablesSet cur_set = UnobservablesSet::Undetermined(); // irrelevant | |
| 445 | |
| 446 for (Edge edge : node->use_edges()) { | |
| 447 // Skip non-effect edges | |
| 448 if (!NodeProperties::IsEffectEdge(edge)) { | |
| 449 continue; | |
| 450 } | |
| 451 | |
| 452 Node* use = edge.from(); | |
| 453 TRACE2(4, " found use %d", use->id()); | |
| 454 UnobservablesSet new_set = unobservable().at(use->id()); | |
| 455 // Include new_set in the intersection. | |
| 456 if (first) { | |
| 457 // Intersection of a one-element set is that one element | |
| 458 first = false; | |
| 459 cur_set = new_set; | |
| 460 } else { | |
| 461 // Take the intersection of cur_set and new_set. | |
| 462 cur_set = cur_set.Intersect(new_set, temp_zone()); | |
| 463 } | |
| 464 | |
| 465 if (FLAG_trace_store_elimination) { | |
| 466 // Serialise the UnobservablesSet. | |
| 467 std::ostringstream os; | |
| 468 os << "intersected with " << new_set << ", current intersection is " | |
| 469 << cur_set; | |
| 470 std::string msg = os.str(); | |
| 471 TRACE2(4, " %s", msg.c_str()); | |
| 472 } | |
| 473 } | |
| 474 | |
| 475 if (first) { | |
| 476 // There were no effect uses. | |
| 477 auto opcode = node->op()->opcode(); | |
| 478 // List of opcodes that may end this effect chain. The opcodes are not | |
| 479 // important to the soundness of this optimization; this serves as a | |
| 480 // general sanity check. Add opcodes to this list as it suits you. | |
| 481 // | |
| 482 // Everything is observable after these opcodes; return the empty set. | |
| 483 DCHECK_EXTRA( | |
| 484 opcode == IrOpcode::kReturn || opcode == IrOpcode::kTerminate || | |
| 485 opcode == IrOpcode::kDeoptimize || opcode == IrOpcode::kThrow, | |
| 486 "for #%d:%s", node->id(), node->op()->mnemonic()); | |
| 487 USE(opcode); // silence warning about unused variable | |
| 488 | |
| 489 TRACE2(3, " ..no effect uses, so unobservables-set = []"); | |
| 490 return UnobservablesSet::DeterminedEmpty(temp_zone()); | |
| 491 } else { | |
| 492 if (cur_set.IsUndetermined()) { | |
| 493 cur_set = UnobservablesSet::DeterminedEmpty(temp_zone()); | |
| 494 } | |
| 495 | |
| 496 if (FLAG_trace_store_elimination) { | |
| 497 // Serialise the UnobservablesSet. | |
| 498 std::ostringstream os; | |
| 499 os << cur_set; | |
| 500 std::string msg = os.str(); | |
| 501 TRACE2(2, " ..use-intersection: %s", msg.c_str()); | |
| 502 } | |
| 503 | |
| 504 return cur_set; | |
| 505 } | |
| 506 } | |
| 507 | |
| 508 UnobservablesSet UnobservablesSet::Undetermined() { return UnobservablesSet(); } | |
| 509 | |
| 510 UnobservablesSet::UnobservablesSet() : set_(nullptr) {} | |
| 511 | |
| 512 UnobservablesSet UnobservablesSet::DeterminedEmpty(Zone* zone) { | |
| 513 // Create a new empty UnobservablesSet. This allocates in the zone, and | |
| 514 // can probably be optimized to use a global singleton. | |
| 515 ZoneSet<StoreObservation>* empty_set = | |
| 516 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
| 517 ZoneSet<StoreObservation>(zone); | |
| 518 return UnobservablesSet(empty_set); | |
| 519 } | |
| 520 | |
| 521 // Computes the intersection of two UnobservablesSets. May return | |
| 522 // UnobservablesSet::Undetermined() instead of an empty UnobservablesSet for | |
| 523 // speed. | |
| 524 UnobservablesSet UnobservablesSet::Intersect(UnobservablesSet other, | |
| 525 Zone* zone) const { | |
| 526 if (set() == nullptr || other.set() == nullptr) { | |
| 527 return Undetermined(); | |
| 528 } else if (other.set() == nullptr) { | |
| 529 return *this; | |
| 530 } else { | |
| 531 ZoneSet<StoreObservation>* intersection = | |
| 532 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
| 533 ZoneSet<StoreObservation>(zone); | |
| 534 // Put the intersection of set() and other.set() in intersection. | |
| 535 set_intersection(set()->begin(), set()->end(), other.set()->begin(), | |
| 536 other.set()->end(), | |
| 537 std::inserter(*intersection, intersection->end())); | |
| 538 TRACE2(3, "intersected; result:"); | |
| 539 for (StoreObservation obs : *intersection) { | |
| 540 std::ostringstream os; | |
| 541 os << obs; | |
| 542 std::string msg = os.str(); | |
| 543 TRACE2(3, "- %s", msg.c_str()); | |
| 544 } | |
| 545 return UnobservablesSet(intersection); | |
| 546 } | |
| 547 } | |
| 548 | |
| 549 UnobservablesSet UnobservablesSet::Add(StoreObservation obs, Zone* zone) const { | |
| 550 bool present = (set()->find(obs) != set()->end()); | |
| 551 if (present) { | |
| 552 return *this; | |
| 553 } else { | |
| 554 // Make a new empty set. | |
| 555 ZoneSet<StoreObservation>* new_set = | |
| 556 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
| 557 ZoneSet<StoreObservation>(zone); | |
| 558 // Copy the old elements over. | |
| 559 *new_set = *set(); | |
| 560 // Add the new element. | |
| 561 bool inserted = new_set->insert(obs).second; | |
| 562 DCHECK(inserted); | |
| 563 USE(inserted); // silence warning about unused variable | |
| 564 | |
| 565 return UnobservablesSet(new_set); | |
| 566 } | |
| 567 } | |
| 568 | |
| 569 UnobservablesSet UnobservablesSet::RemoveSameOffset(StoreOffset offset, | |
| 570 Zone* zone) const { | |
| 571 // Make a new empty set. | |
| 572 ZoneSet<StoreObservation>* new_set = | |
| 573 new (zone->New(sizeof(ZoneSet<StoreObservation>))) | |
| 574 ZoneSet<StoreObservation>(zone); | |
| 575 // Copy all elements over that have a different offset. | |
| 576 for (auto obs : *set()) { | |
| 577 if (obs.offset_ != offset) { | |
| 578 new_set->insert(obs); | |
| 579 } | |
| 580 } | |
| 581 | |
| 582 return UnobservablesSet(new_set); | |
| 583 } | |
| 584 | |
| 585 // Used for debugging. | |
| 586 std::ostream& operator<<(std::ostream& os, UnobservablesSet set) { | |
| 587 if (set.set() == nullptr) { | |
| 588 os << "(undetermined)"; | |
| 589 } else { | |
| 590 os << "["; | |
| 591 bool first = true; | |
| 592 for (StoreObservation obs : *set.set()) { | |
| 593 if (!first) { | |
| 594 os << ","; | |
| 595 } else { | |
| 596 first = false; | |
| 597 } | |
| 598 os << obs; | |
| 599 } | |
| 600 os << "]"; | |
| 601 } | |
| 602 return os; | |
| 603 } | |
| 604 | |
| 605 bool UnobservablesSet::operator==(const UnobservablesSet& other) const { | |
| 606 if (IsUndetermined() || other.IsUndetermined()) { | |
| 607 TRACE2(4, " (either is undetermined)"); | |
| 608 return IsEmpty() && other.IsEmpty(); | |
| 609 } else { | |
| 610 TRACE2(4, " (neither is undetermined)"); | |
| 611 // Both pointers guaranteed not to be nullptrs. | |
| 612 return *set() == *other.set(); | |
| 613 } | |
| 614 } | |
| 615 | |
| 616 bool UnobservablesSet::operator!=(const UnobservablesSet& other) const { | |
| 617 return !(*this == other); | |
| 618 } | |
| 619 | |
| 620 bool operator==(StoreObservation a, StoreObservation b) { | |
| 621 return (a.id_ == b.id_) && (a.offset_ == b.offset_); | |
| 622 } | |
| 623 | |
| 624 bool operator<(StoreObservation a, StoreObservation b) { | |
| 625 return (a.id_ < b.id_) || (a.id_ == b.id_ && a.offset_ < b.offset_); | |
| 626 } | |
| 627 | |
| 628 std::ostream& operator<<(std::ostream& os, StoreObservation obs) { | |
| 629 os << "#" << obs.id_ << "[+" << obs.offset_ << "]"; | |
| 630 return os; | |
| 317 } | 631 } |
| 318 | 632 |
| 319 } // namespace compiler | 633 } // namespace compiler |
| 320 } // namespace internal | 634 } // namespace internal |
| 321 } // namespace v8 | 635 } // namespace v8 |
| OLD | NEW |