src/compiler/store-store-elimination.cc - Issue 2159303002: [turbofan] Improve the store-store elimination.

Side by Side Diff: src/compiler/store-store-elimination.cc

Issue 2159303002: [turbofan] Improve the store-store elimination. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@p1-base

Patch Set: Determined -> visited. Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

	5 #include <iterator>

	6

5 #include "src/compiler/store-store-elimination.h"	7 #include "src/compiler/store-store-elimination.h"

6	8

7 #include "src/compiler/all-nodes.h"	9 #include "src/compiler/all-nodes.h"

8 #include "src/compiler/js-graph.h"	10 #include "src/compiler/js-graph.h"

9 #include "src/compiler/node-properties.h"	11 #include "src/compiler/node-properties.h"

10 #include "src/compiler/simplified-operator.h"	12 #include "src/compiler/simplified-operator.h"

11	13

12 namespace v8 {	14 namespace v8 {

13 namespace internal {	15 namespace internal {

14 namespace compiler {	16 namespace compiler {

15	17

16 #define TRACE(fmt, ...) \	18 #define TRACE(fmt, ...) \

17 do { \	19 do { \

18 if (FLAG_trace_store_elimination) { \	20 if (FLAG_trace_store_elimination) { \

19 PrintF("StoreStoreElimination::ReduceEligibleNode: " fmt "\n", \	21 PrintF("RedundantStoreFinder: " fmt "\n", ##__VA_ARGS__); \

20 ##__VA_ARGS__); \	22 } \

21 } \

22 } while (false)	23 } while (false)

23	24

24 // A simple store-store elimination. When the effect chain contains the	25 // CHECK_EXTRA is like CHECK, but has two or more arguments: a boolean

25 // following sequence,	26 // expression, a format string, and any number of extra arguments. The boolean

26 //	27 // expression will be evaluated at runtime. If it evaluates to false, then an

27 // - StoreField[[+off_1]](x1, y1)	28 // error message will be shown containing the condition, as well as the extra

28 // - StoreField[[+off_2]](x2, y2)	29 // info formatted like with printf.

29 // - StoreField[[+off_3]](x3, y3)	30 #define CHECK_EXTRA(condition, fmt, ...) \

30 // ...	31 do { \

31 // - StoreField[[+off_n]](xn, yn)	32 if (V8_UNLIKELY(!(condition))) { \

32 //	33 V8_Fatal(__FILE__, __LINE__, "Check failed: %s. Extra info: " fmt, \

33 // where the xes are the objects and the ys are the values to be stored, then	34 #condition, ##__VA_ARGS__); \

34 // we are going to say that a store is superfluous if the same offset of the	35 } \

35 // same object will be stored to in the future. If off_i == off_j and xi == xj	36 } while (0)

36 // and i < j, then we optimize the i'th StoreField away.	37

37 //	38 #ifdef DEBUG

38 // This optimization should be initiated on the last StoreField in such a	39 #define DCHECK_EXTRA(condition, fmt, ...) \

39 // sequence.	40 CHECK_EXTRA(condition, fmt, ##__VA_ARGS__)

40 //	41 #else

41 // The algorithm works by walking the effect chain from the last StoreField	42 #define DCHECK_EXTRA(condition, fmt, ...) ((void)0)

42 // upwards. While walking, we maintain a map {futureStore} from offsets to	43 #endif

43 // nodes; initially it is empty. As we walk the effect chain upwards, if	44

44 // futureStore[off] = n, then any store to node {n} with offset {off} is	45 // Store-store elimination.

45 // guaranteed to be useless because we do a tagged-width[2] store to that	46 //

46 // offset of that object in the near future anyway. For example, for this	47 // The aim of this optimization is to detect the following pattern in the

47 // effect chain	48 // effect graph:

48 //	49 //

49 // 71: StoreField(60, 0)	50 // - StoreField[+24, kRepTagged](263, ...)

50 // 72: StoreField(65, 8)	51 //

51 // 73: StoreField(63, 8)	52 // ... lots of nodes from which the field at offset 24 of the object

52 // 74: StoreField(65, 16)	53 // returned by node #263 cannot be observed ...

53 // 75: StoreField(62, 8)	54 //

54 //	55 // - StoreField[+24, kRepTagged](263, ...)

55 // just before we get to 72, we will have futureStore = {8: 63, 16: 65}.	56 //

56 //	57 // In such situations, the earlier StoreField cannot be observed, and can be

57 // Here is the complete process.	58 // eliminated. This optimization should work for any offset and input node, of

58 //	59 // course.

59 // - We are at the end of a sequence of consecutive StoreFields.	60 //

60 // - We start out with futureStore = empty.	61 // The optimization also works across splits. It currently does not work for

61 // - We then walk the effect chain upwards to find the next StoreField [1].	62 // loops, because we tend to put a stack check in loops, and like deopts,

62 //	63 // stack checks can observe anything.

63 // 1. If the offset is not a key of {futureStore} yet, we put it in.	64

64 // 2. If the offset is a key of {futureStore}, but futureStore[offset] is a	65 // Assumption: every byte of a JS object is only ever accessed through one

65 // different node, we overwrite futureStore[offset] with the current node.	66 // offset. For instance, byte 15 of a given object may be accessed using a

66 // 3. If the offset is a key of {futureStore} and futureStore[offset] equals	67 // two-byte read at offset 14, or a four-byte read at offset 12, but never

67 // this node, we eliminate this StoreField.	68 // both in the same program.

68 //	69 //

69 // As long as the current effect input points to a node with a single effect	70 // This implementation needs all dead nodes removed from the graph, and the

70 // output, and as long as its opcode is StoreField, we keep traversing	71 // graph should be trimmed.

71 // upwards.

72 //

73 //

74 //

75 // footnotes:

76 //

77 // [1] We make sure that we only traverse the linear part, that is, the part

78 // where every node has exactly one incoming and one outgoing effect edge.

79 // Also, we only keep walking upwards as long as we keep finding consecutive

80 // StoreFields on the same node.

81 //

82 // [2] This optimization is sound only in certain cases. Specifically, the

83 // presence of a future store to {off} by itself does not automatically mean

84 // that earlier stores to {off} are superfluous: a future narrow store does

85 // not obviate an earlier wide store. However, future stores of a certain

86 // size do obviate stores to the same offset of lesser or equal size.

87 //

88 // It turns out that we are most interested in stores of "tagged" size,

89 // which is 8 bytes on 64-bit archs and 4 bit on 32-bit archs. In

90 // {futureStore}, we record future writes that are of at least this size.

91 // The three cases are actually a bit more subtle.

92 //

93 // 1. If the offset is not a key of {futureStore} and the StoreField is of

94 // "tagged" size or wider, then we put it in.

95 // 2. If the offset is present in {futureStore} but the value is different,

96 // then we overwrite the value if the current StoreField is of "tagged"

97 // size or wider.

98 // 3. If the offset is present and the value matches the node, and the

99 // current StoreField is AT MOST of "tagged" size, then we eliminate this

100 // StoreField.

101 //

102 // Examples of stores that we do not detect as superfluous: 2-byte stores

103 // followed by 2-byte stores to the same offset; 16-byte stores followed by

104 // 16-byte stores to the same offset. On ia32, we do not detect consecutive

105 // float64 stores as superfluous, and on x86 we do not detect consecutive

106 // int32 stores as superfluous.

107

108 // At a late stage, we realized that this code is more complicated than it

109 // needs to be: if we store a set of pairs (offset, node), the code simplifies

110 // to 3 cases instead of 6. We could even store a map from nodes to sets of

111 // bytes.

112

113 StoreStoreElimination::StoreStoreElimination(JSGraph* js_graph, Zone* temp_zone)

114 : jsgraph_(js_graph), temp_zone_(temp_zone) {}

115

116 StoreStoreElimination::~StoreStoreElimination() {}

117

118 void StoreStoreElimination::Run() {

119 // The store-store elimination performs work on chains of certain types of

120 // nodes. The elimination must be invoked on the lowest node in such a

121 // chain; we have a helper function IsEligibleNode that returns true

122 // precisely on the lowest node in such a chain.

123 //

124 // Because the elimination removes nodes from the graph, even remove nodes

125 // that the elimination was not invoked on, we cannot use a normal

126 // AdvancedReducer but we manually find which nodes to invoke the

127 // elimination on. Then in a next step, we invoke the elimination for each

128 // node that was eligible.

129

130 NodeVector eligible(temp_zone()); // loops over all nodes

131 AllNodes all(temp_zone(), jsgraph()->graph());

132

133 for (Node* node : all.live) {

134 if (IsEligibleNode(node)) {

135 eligible.push_back(node);

136 }

137 }

138

139 for (Node* node : eligible) {

140 ReduceEligibleNode(node);

141 }

142 }

143	72

144 namespace {	73 namespace {

145	74

146 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too	75 // 16 bits was chosen fairly arbitrarily; it seems enough now. 8 bits is too

147 // few.	76 // few.

148 typedef uint16_t Offset;	77 typedef uint16_t StoreOffset;

	78

	79 struct UnobservableStore {

	80 NodeId id_;

	81 StoreOffset offset_;

	82

	83 bool operator==(const UnobservableStore) const;

	84 bool operator!=(const UnobservableStore) const;

	85 bool operator<(const UnobservableStore) const;

	86 };

	87

	88 } // namespace

	89

	90 namespace {

	91

	92 // Instances of UnobservablesSet are immutable. They represent either a set of

	93 // UnobservableStores, or the "unvisited empty set".

	94 //

	95 // We apply some sharing to save memory. The class UnobservablesSet is only a

	96 // pointer wide, and a copy does not use any heap (or temp_zone) memory. Most

	97 // changes to an UnobservablesSet might allocate in the temp_zone.

	98 //

	99 // The size of an instance should be the size of a pointer, plus additional

	100 // space in the zone in the case of non-unvisited UnobservablesSets. Copying

	101 // an UnobservablesSet allocates no memory.

	102 class UnobservablesSet final {

	103 public:

	104 static UnobservablesSet Unvisited();

	105 static UnobservablesSet VisitedEmpty(Zone* zone);

	106 UnobservablesSet(); // unvisited

	107 UnobservablesSet(const UnobservablesSet& other) : set_(other.set_) {}

	108

	109 UnobservablesSet Intersect(UnobservablesSet other, Zone* zone) const;

	110 UnobservablesSet Add(UnobservableStore obs, Zone* zone) const;

	111 UnobservablesSet RemoveSameOffset(StoreOffset off, Zone* zone) const;

	112

	113 const ZoneSet<UnobservableStore>* set() const { return set_; }

	114

	115 bool IsUnvisited() const { return set_ == nullptr; }

	116 bool IsEmpty() const { return set_ == nullptr \|\| set_->empty(); }

	117 bool Contains(UnobservableStore obs) const {

	118 return set_ != nullptr && (set_->find(obs) != set_->end());

	119 }

	120

	121 bool operator==(const UnobservablesSet&) const;

	122 bool operator!=(const UnobservablesSet&) const;

	123

	124 private:

	125 explicit UnobservablesSet(const ZoneSet<UnobservableStore>* set)

	126 : set_(set) {}

	127 const ZoneSet<UnobservableStore>* set_;

	128 };

	129

	130 } // namespace

	131

	132 namespace {

	133

	134 class RedundantStoreFinder final {

	135 public:

	136 RedundantStoreFinder(JSGraph* js_graph, Zone* temp_zone);

	137

	138 void Find();

	139

	140 const ZoneSet<Node*>& to_remove_const() { return to_remove_; }

	141

	142 virtual void Visit(Node* node);

	143

	144 private:

	145 static bool IsEffectful(Node* node);

	146 void VisitEffectfulNode(Node* node);

	147 UnobservablesSet RecomputeUseIntersection(Node* node);

	148 UnobservablesSet RecomputeSet(Node* node, UnobservablesSet uses);

	149 static bool CannotObserveStoreField(Node* node);

	150 static bool CanObserveAnything(Node* node);

	151

	152 void MarkForRevisit(Node* node);

	153 bool HasBeenVisited(Node* node);

	154

	155 JSGraph* jsgraph() const { return jsgraph_; }

	156 Zone* temp_zone() const { return temp_zone_; }

	157 ZoneVector<UnobservablesSet>& unobservable() { return unobservable_; }

	158 UnobservablesSet& unobservable_for_id(NodeId id) {

	159 DCHECK_LT(id, unobservable().size());

	160 return unobservable()[id];

	161 }

	162 ZoneSet<Node*>& to_remove() { return to_remove_; }

	163

	164 JSGraph* const jsgraph_;

	165 Zone* const temp_zone_;

	166

	167 ZoneStack<Node*> revisit_;

	168 ZoneVector<bool> in_revisit_;

	169 // Maps node IDs to UnobservableNodeSets.

	170 ZoneVector<UnobservablesSet> unobservable_;

	171 ZoneSet<Node*> to_remove_;

	172 };

149	173

150 // To safely cast an offset from a FieldAccess, which has a wider range	174 // To safely cast an offset from a FieldAccess, which has a wider range

151 // (namely int).	175 // (namely int).

152 Offset ToOffset(int offset) {	176 StoreOffset ToOffset(int offset) {

153 CHECK(0 <= offset && offset < (1 << 8 * sizeof(Offset)));	177 CHECK(0 <= offset && offset < (1 << 8 * sizeof(StoreOffset)));

154 return (Offset)offset;	178 return (StoreOffset)offset;

155 }	179 }

156	180

157 Offset ToOffset(const FieldAccess& access) { return ToOffset(access.offset); }	181 StoreOffset ToOffset(const FieldAccess& access) {

158	182 return ToOffset(access.offset);

159 // If node has a single effect use, return that node. If node has no or	183 }

160 // multiple effect uses, return nullptr.	184

161 Node* SingleEffectUse(Node* node) {	185 size_t RepSizeOf(MachineRepresentation rep) {

162 Node* last_use = nullptr;	186 return ((size_t)1) << ElementSizeLog2Of(rep);
	Jarin 2016/08/01 08:25:43 ((size_t)1) -> 1u ((size_t)1) -> 1u bgeron 2016/08/04 12:31:02 Hm, I think that would be an unsigned int, which i Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > ((size_t)1) -> 1u Hm, I think that would be an unsigned int, which is often smaller than size_t. I'm considering changing it just to int, to simplify everything. It's not as if we can reasonably expect any field to be bigger than 2^20 anyway. (Included in CL now.) What do you think?
	187 }

	188 size_t RepSizeOf(FieldAccess access) {

	189 return RepSizeOf(access.machine_type.representation());

	190 }

	191

	192 bool AtMostTagged(FieldAccess access) {

	193 return RepSizeOf(access) <= RepSizeOf(MachineRepresentation::kTagged);

	194 }

	195

	196 bool AtLeastTagged(FieldAccess access) {

	197 return RepSizeOf(access) >= RepSizeOf(MachineRepresentation::kTagged);

	198 }

	199

	200 } // namespace

	201

	202 void RedundantStoreFinder::Find() {

	203 Visit(jsgraph()->graph()->end());

	204

	205 while (!revisit_.empty()) {

	206 Node* next = revisit_.top();

	207 revisit_.pop();

	208 DCHECK_LT(next->id(), in_revisit_.size());

	209 in_revisit_[next->id()] = false;

	210 Visit(next);

	211 }

	212

	213 #ifdef DEBUG

	214 // Check that we visited all the StoreFields

	215 AllNodes all(temp_zone(), jsgraph()->graph());

	216 for (Node* node : all.live) {

	217 if (node->op()->opcode() == IrOpcode::kStoreField) {

	218 DCHECK_EXTRA(HasBeenVisited(node), "#%d:%s", node->id(),

	219 node->op()->mnemonic());

	220 }

	221 }

	222 #endif

	223 }

	224

	225 void RedundantStoreFinder::MarkForRevisit(Node* node) {

	226 DCHECK_LT(node->id(), in_revisit_.size());

	227 if (!in_revisit_[node->id()]) {

	228 revisit_.push(node);

	229 in_revisit_[node->id()] = true;

	230 }

	231 }

	232

	233 bool RedundantStoreFinder::HasBeenVisited(Node* node) {

	234 return !unobservable_for_id(node->id()).IsUnvisited();

	235 }

	236

	237 void StoreStoreElimination::Run(JSGraph* js_graph, Zone* temp_zone) {

	238 // Find superfluous nodes

	239 RedundantStoreFinder finder(js_graph, temp_zone);

	240 finder.Find();

	241

	242 // Remove superfluous nodes

	243

	244 for (Node* node : finder.to_remove_const()) {

	245 if (FLAG_trace_store_elimination) {

	246 PrintF("StoreStoreElimination::Run: Eliminating node #%d:%s\n",

	247 node->id(), node->op()->mnemonic());

	248 }

	249 Node* previous_effect = NodeProperties::GetEffectInput(node);

	250 NodeProperties::ReplaceUses(node, nullptr, previous_effect, nullptr,

	251 nullptr);

	252 node->Kill();

	253 }

	254 }

	255

	256 bool RedundantStoreFinder::IsEffectful(Node* node) {

	257 return (node->op()->EffectInputCount() >= 1);

	258 }

	259

	260 // Recompute unobservables-set for a node. Will also mark superfluous nodes

	261 // as to be removed.

	262

	263 UnobservablesSet RedundantStoreFinder::RecomputeSet(Node* node,

	264 UnobservablesSet uses) {

	265 switch (node->op()->opcode()) {

	266 case IrOpcode::kStoreField: {

	267 Node* stored_to = node->InputAt(0);

	268 FieldAccess access = OpParameter<FieldAccess>(node->op());

	269 StoreOffset offset = ToOffset(access);

	270

	271 UnobservableStore observation = {stored_to->id(), offset};

	272 bool isNotObservable = uses.Contains(observation);

	273

	274 if (isNotObservable && AtMostTagged(access)) {

	275 TRACE(" #%d is StoreField[+%d,%s](#%d), unobservable", node->id(),

	276 offset, MachineReprToString(access.machine_type.representation()),

	277 stored_to->id());

	278 to_remove().insert(node);

	279 return uses;

	280 } else if (isNotObservable && !AtMostTagged(access)) {

	281 TRACE(

	282 " #%d is StoreField[+%d,%s](#%d), repeated in future but too "

	283 "big to optimize away",

	284 node->id(), offset,

	285 MachineReprToString(access.machine_type.representation()),

	286 stored_to->id());

	287 return uses;

	288 } else if (!isNotObservable && AtLeastTagged(access)) {

	289 TRACE(" #%d is StoreField[+%d,%s](#%d), observable, recording in set",

	290 node->id(), offset,

	291 MachineReprToString(access.machine_type.representation()),

	292 stored_to->id());

	293 return uses.Add(observation, temp_zone());

	294 } else if (!isNotObservable && !AtLeastTagged(access)) {

	295 TRACE(

	296 " #%d is StoreField[+%d,%s](#%d), observable but too small to "

	297 "record",

	298 node->id(), offset,

	299 MachineReprToString(access.machine_type.representation()),

	300 stored_to->id());

	301 return uses;

	302 } else {

	303 UNREACHABLE();

	304 }

	305 break;

	306 }

	307 case IrOpcode::kLoadField: {

	308 Node* loaded_from = node->InputAt(0);

	309 FieldAccess access = OpParameter<FieldAccess>(node->op());

	310 StoreOffset offset = ToOffset(access);

	311

	312 TRACE(

	313 " #%d is LoadField[+%d,%s](#%d), removing all offsets [+%d] from "

	314 "set",

	315 node->id(), offset,

	316 MachineReprToString(access.machine_type.representation()),

	317 loaded_from->id(), offset);

	318

	319 return uses.RemoveSameOffset(offset, temp_zone());

	320 break;

	321 }

	322 default:

	323 if (CannotObserveStoreField(node)) {

	324 TRACE(" #%d:%s can observe nothing, set stays unchanged", node->id(),

	325 node->op()->mnemonic());

	326 return uses;

	327 } else if (CanObserveAnything(node)) {

	328 TRACE(" #%d:%s can observe everything, recording empty set",

	329 node->id(), node->op()->mnemonic());

	330 return UnobservablesSet::VisitedEmpty(temp_zone());

	331 } else {

	332 // It is safe to turn this check off in the future, but it is better

	333 // to list opcodes in CannotObserveStoreField, in CanObserveAnything,

	334 // or if you don't know, to add another case inside this DCHECK_EXTRA.

	335 DCHECK_EXTRA(node->op()->opcode() == IrOpcode::kCall, "%s",

	336 node->op()->mnemonic());

	337 TRACE(

	338 " cannot determine unobservables-set for #%d:%s; "

	339 "conservatively recording empty set",

	340 node->id(), node->op()->mnemonic());

	341 return UnobservablesSet::VisitedEmpty(temp_zone());

	342 }

	343 }

	344 UNREACHABLE();

	345 return UnobservablesSet::Unvisited();

	346 }

	347

	348 bool RedundantStoreFinder::CannotObserveStoreField(Node* node) {

	349 Operator::Properties mask =

	350 Operator::kNoRead \| Operator::kNoDeopt \| Operator::kNoThrow;

	351

	352 return (node->op()->properties() & mask) == mask \|\|

	353 node->opcode() == IrOpcode::kAllocate \|\|

	354 node->opcode() == IrOpcode::kCheckedLoad \|\|

	355 node->opcode() == IrOpcode::kLoadElement \|\|

	356 node->opcode() == IrOpcode::kLoad;

	357 }

	358

	359 bool RedundantStoreFinder::CanObserveAnything(Node* node) {

	360 return !node->op()->HasProperty(Operator::kNoThrow) \|\|

	361 !node->op()->HasProperty(Operator::kNoDeopt);

	362 }

	363

	364 // Initialize unobservable_ with js_graph->graph->NodeCount() empty sets.

	365 RedundantStoreFinder::RedundantStoreFinder(JSGraph* js_graph, Zone* temp_zone)

	366 : jsgraph_(js_graph),

	367 temp_zone_(temp_zone),

	368 revisit_(temp_zone),

	369 in_revisit_(js_graph->graph()->NodeCount(), temp_zone),

	370 unobservable_(js_graph->graph()->NodeCount(),

	371 UnobservablesSet::Unvisited(), temp_zone),

	372 to_remove_(temp_zone) {}

	373

	374 void RedundantStoreFinder::Visit(Node* node) {

	375 // All effectful nodes should be reachable from End via a sequence of

	376 // control, then a sequence of effect edges. In VisitEffectfulNode we mark

	377 // all effect inputs for revisiting (if they might have stale state); here

	378 // we mark all control inputs at least once.

	379

	380 if (!HasBeenVisited(node)) {

	381 for (Edge edge : node->input_edges()) {
	Jarin 2016/08/01 08:25:43 You can also say: for (int i = 0; i < node->op()- You can also say: for (int i = 0; i < node->op()->ControlInputCount(); i++) { Node* control_input = NodeProperties::GetControlInput(node, i); if (!HasBeenVisited(control_input)) { MarkForRevisit(control_input); } } bgeron 2016/08/04 12:31:02 Is the speed the reason that that fragment might b Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > You can also say: > > for (int i = 0; i < node->op()->ControlInputCount(); i++) { > Node* control_input = NodeProperties::GetControlInput(node, i); > if (!HasBeenVisited(control_input)) { > MarkForRevisit(control_input); > } > } Is the speed the reason that that fragment might be better? I actually feel both of these are 'wrong', and it should be something like for (Node* control_input : node->control_inputs()) { if (!HasBeenVisited(control_input)) { MarkForRevisit(control_input); } } I think this should be as fast as your proposal, and it's more readable than either proposal, and this pattern (iterate over value/effect/control inputs) occurs really often, in my mind. (As a separate thing, I also fail to see the importance of having node properties in a separate file/class: there are many trivial functions there that would be much nicer as methods on Node or Edge.) Jarin 2016/08/05 13:49:49 Yes. Also consistency with the rest of the codebas Show quoted text On 2016/08/04 12:31:02, bgeron wrote: > On 2016/08/01 08:25:43, Jarin wrote: > > You can also say: > > > > for (int i = 0; i < node->op()->ControlInputCount(); i++) { > > Node* control_input = NodeProperties::GetControlInput(node, i); > > if (!HasBeenVisited(control_input)) { > > MarkForRevisit(control_input); > > } > > } > > Is the speed the reason that that fragment might be better? Yes. Also consistency with the rest of the codebase. Show quoted text > I actually feel both of these are 'wrong', and it should be something like > > for (Node* control_input : node->control_inputs()) { > if (!HasBeenVisited(control_input)) { > MarkForRevisit(control_input); > } > } > > I think this should be as fast as your proposal, and it's more readable than > either proposal, and this pattern (iterate over value/effect/control inputs) > occurs really often, in my mind. > > (As a separate thing, I also fail to see the importance of having node > properties in a separate file/class: there are many trivial functions there that > would be much nicer as methods on Node or Edge.) I tend to agree. The idea here was that the Node class does not really know anything about the control/effect/value distinction. bgeron 2016/08/09 18:52:52 Changed to your suggestion; we can refactor all th Show quoted text On 2016/08/05 13:49:49, Jarin wrote: > On 2016/08/04 12:31:02, bgeron wrote: > > On 2016/08/01 08:25:43, Jarin wrote: > > > You can also say: > > > > > > for (int i = 0; i < node->op()->ControlInputCount(); i++) { > > > Node* control_input = NodeProperties::GetControlInput(node, i); > > > if (!HasBeenVisited(control_input)) { > > > MarkForRevisit(control_input); > > > } > > > } > > > > Is the speed the reason that that fragment might be better? > > Yes. Also consistency with the rest of the codebase. > > > I actually feel both of these are 'wrong', and it should be something like > > > > for (Node* control_input : node->control_inputs()) { > > if (!HasBeenVisited(control_input)) { > > MarkForRevisit(control_input); > > } > > } > > > > I think this should be as fast as your proposal, and it's more readable than > > either proposal, and this pattern (iterate over value/effect/control inputs) > > occurs really often, in my mind. > > > > (As a separate thing, I also fail to see the importance of having node > > properties in a separate file/class: there are many trivial functions there > that > > would be much nicer as methods on Node or Edge.) > > I tend to agree. The idea here was that the Node class does not really know > anything about the control/effect/value distinction. > Changed to your suggestion; we can refactor all the things in a separate CL.
	382 if (NodeProperties::IsControlEdge(edge) && !HasBeenVisited(edge.to())) {

	383 MarkForRevisit(edge.to());

	384 }

	385 }

	386 }

	387

	388 bool isEffectful = (node->op()->EffectInputCount() >= 1);

	389 if (isEffectful) {

	390 VisitEffectfulNode(node);

	391 DCHECK(HasBeenVisited(node));

	392 }

	393

	394 if (!HasBeenVisited(node)) {

	395 // Mark as visited.

	396 unobservable_for_id(node->id()) =

	397 UnobservablesSet::VisitedEmpty(temp_zone());
	Jarin 2016/08/01 08:25:43 Could this be moved to the control node case? (Aft Could this be moved to the control node case? (After line 385.) bgeron 2016/08/04 12:31:02 Not that I know of. In some cases, VisitEffectfulN Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > Could this be moved to the control node case? (After line 385.) Not that I know of. In some cases, VisitEffectfulNode behaves differently on unvisited nodes than on visited nodes. If it finds a visited node computes that its UnobservablesSet is still empty, then it does not revisit its effect inputs. If it finds an unvisited node and computes an empty UnobservablesSet, it will still (re)visit the effect inputs, which are probably also unvisited. Jarin 2016/08/05 13:49:49 Acknowledged. Show quoted text On 2016/08/04 12:31:02, bgeron wrote: > On 2016/08/01 08:25:43, Jarin wrote: > > Could this be moved to the control node case? (After line 385.) > > Not that I know of. In some cases, VisitEffectfulNode behaves differently on > unvisited nodes than on visited nodes. If it finds a visited node computes that > its UnobservablesSet is still empty, then it does not revisit its effect inputs. > If it finds an unvisited node and computes an empty UnobservablesSet, it will > still (re)visit the effect inputs, which are probably also unvisited. Acknowledged.
	398 }

	399 }

	400

	401 void RedundantStoreFinder::VisitEffectfulNode(Node* node) {

	402 if (HasBeenVisited(node)) {

	403 TRACE("- Revisiting: #%d:%s", node->id(), node->op()->mnemonic());

	404 }

	405 UnobservablesSet after_set = RecomputeUseIntersection(node);

	406 UnobservablesSet before_set = RecomputeSet(node, after_set);

	407 DCHECK(!before_set.IsUnvisited());

	408

	409 UnobservablesSet stored_for_node = unobservable_for_id(node->id());

	410 bool cur_set_changed =

	411 (stored_for_node.IsUnvisited() \|\| stored_for_node != before_set);

	412 if (!cur_set_changed) {

	413 // We will not be able to update the part of this chain above any more.

	414 // Exit.

	415 TRACE("+ No change: stabilized. Not visiting effect inputs.");

	416 } else {

	417 unobservable_for_id(node->id()) = before_set;

	418

	419 // Mark effect inputs for visiting.

	420 for (int i = 0; i < node->op()->EffectInputCount(); i++) {

	421 Node* input = NodeProperties::GetEffectInput(node, i);

	422 if (!CanObserveAnything(input) \|\| !HasBeenVisited(input)) {
	Jarin 2016/08/01 08:25:43 Is this condition necessary? If the node can obser Is this condition necessary? If the node can observe anything and has been visited before, then the non-observation set should be empty, so it should never even get here (because cur_set_changed should be false). No? bgeron 2016/08/04 12:31:02 No. :) Well, it's not strictly necessary because t Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > Is this condition necessary? If the node can observe anything and has been > visited before, then the non-observation set should be empty, so it should never > even get here (because cur_set_changed should be false). No? No. :) Well, it's not strictly necessary because the algorithm will be correct nonetheless, but it saves some revisits in the case of a Call that can deopt. We visit the whole control graph, so we might visit the IfSuccess of a Call, and then the Call; the effect outputs of the Call have not been visited yet. Some time later, the effect outputs are visited, and the Call might be re-visited from both effect outputs if it weren't for this check, even though the Call observes everything. The trace output is significantly shorter with this check, so it's obvious that the algorithm doesn't too many superfluous revisits. However, I suppose the question is whether this check makes things slower or faster. Jarin 2016/08/05 13:49:49 Yeah, I was confused. Thanks for the explanation. Show quoted text On 2016/08/04 12:31:02, bgeron wrote: > On 2016/08/01 08:25:43, Jarin wrote: > > Is this condition necessary? If the node can observe anything and has been > > visited before, then the non-observation set should be empty, so it should > never > > even get here (because cur_set_changed should be false). No? > > No. :) Well, it's not strictly necessary because the algorithm will be correct > nonetheless, but it saves some revisits in the case of a Call that can deopt. We > visit the whole control graph, so we might visit the IfSuccess of a Call, and > then the Call; the effect outputs of the Call have not been visited yet. Some > time later, the effect outputs are visited, and the Call might be re-visited > from both effect outputs if it weren't for this check, even though the Call > observes everything. > > The trace output is significantly shorter with this check, so it's obvious that > the algorithm doesn't too many superfluous revisits. However, I suppose the > question is whether this check makes things slower or faster. Yeah, I was confused. Thanks for the explanation.
	423 TRACE(" marking #%d:%s for revisit", input->id(),

	424 input->op()->mnemonic());

	425 MarkForRevisit(input);

	426 }

	427 }

	428 }

	429 }

	430

	431 // Compute the intersection of the UnobservablesSets of all effect uses and

	432 // return it. This function only works if {node} has an effect use.

	433 //

	434 // The result UnobservablesSet will always be visited.

	435 UnobservablesSet RedundantStoreFinder::RecomputeUseIntersection(Node* node) {

	436 // {first} == true indicates that we haven't looked at any elements yet.

	437 // {first} == false indicates that cur_set is the intersection of at least one

	438 // thing.

	439

	440 bool first = true;

	441 UnobservablesSet cur_set = UnobservablesSet::Unvisited(); // irrelevant

	442

163 for (Edge edge : node->use_edges()) {	443 for (Edge edge : node->use_edges()) {

	444 // Skip non-effect edges

164 if (!NodeProperties::IsEffectEdge(edge)) {	445 if (!NodeProperties::IsEffectEdge(edge)) {

165 continue;	446 continue;

166 }	447 }

167 if (last_use != nullptr) {	448

168 // more than one	449 Node* use = edge.from();

169 return nullptr;	450 UnobservablesSet new_set = unobservable_for_id(use->id());

170 }	451 // Include new_set in the intersection.

171 last_use = edge.from();	452 if (first) {

172 DCHECK_NOT_NULL(last_use);	453 // Intersection of a one-element set is that one element

173 }	454 first = false;

174 return last_use;	455 cur_set = new_set;

175 }	456 } else {

176	457 // Take the intersection of cur_set and new_set.

177 // Return true if node is the last consecutive StoreField node in a linear	458 cur_set = cur_set.Intersect(new_set, temp_zone());

178 // part of the effect chain.	459 }

179 bool IsEndOfStoreFieldChain(Node* node) {	460 }

180 Node* next_on_chain = SingleEffectUse(node);	461

181 return (next_on_chain == nullptr \|\|	462 if (first) {

182 next_on_chain->op()->opcode() != IrOpcode::kStoreField);	463 // There were no effect uses.

183 }	464 auto opcode = node->op()->opcode();

184	465 // List of opcodes that may end this effect chain. The opcodes are not

185 // The argument must be a StoreField node. If there is a node before it in the	466 // important to the soundness of this optimization; this serves as a

186 // effect chain, and if this part of the effect chain is linear (no other	467 // general sanity check. Add opcodes to this list as it suits you.

187 // effect uses of that previous node), then return that previous node.	468 //

188 // Otherwise, return nullptr.	469 // Everything is observable after these opcodes; return the empty set.

189 //	470 DCHECK_EXTRA(

190 // The returned node need not be a StoreField.	471 opcode == IrOpcode::kReturn \|\| opcode == IrOpcode::kTerminate \|\|

191 Node* PreviousEffectBeforeStoreField(Node* node) {	472 opcode == IrOpcode::kDeoptimize \|\| opcode == IrOpcode::kThrow,

192 DCHECK_EQ(node->op()->opcode(), IrOpcode::kStoreField);	473 "for #%d:%s", node->id(), node->op()->mnemonic());

193 DCHECK_EQ(node->op()->EffectInputCount(), 1);	474 USE(opcode); // silence warning about unused variable in release mode

194	475

195 Node* previous = NodeProperties::GetEffectInput(node);	476 return UnobservablesSet::VisitedEmpty(temp_zone());

196 if (previous != nullptr && node == SingleEffectUse(previous)) {

197 return previous;

198 } else {	477 } else {

199 return nullptr;	478 if (cur_set.IsUnvisited()) {

200 }	479 cur_set = UnobservablesSet::VisitedEmpty(temp_zone());

201 }	480 }

202	481

203 size_t rep_size_of(MachineRepresentation rep) {	482 return cur_set;

204 return ((size_t)1) << ElementSizeLog2Of(rep);	483 }

205 }	484 }

206 size_t rep_size_of(FieldAccess access) {	485

207 return rep_size_of(access.machine_type.representation());	486 UnobservablesSet UnobservablesSet::Unvisited() { return UnobservablesSet(); }

208 }	487

209	488 UnobservablesSet::UnobservablesSet() : set_(nullptr) {}

210 bool AtMostTagged(FieldAccess access) {	489

211 return rep_size_of(access) <= rep_size_of(MachineRepresentation::kTagged);	490 UnobservablesSet UnobservablesSet::VisitedEmpty(Zone* zone) {

212 }	491 // Create a new empty UnobservablesSet. This allocates in the zone, and

213	492 // can probably be optimized to use a global singleton.
	Jarin 2016/08/01 08:25:43 Global singletons are forbidden, but you might wan Global singletons are forbidden, but you might want to cache this set in the finder class. bgeron 2016/08/04 12:31:02 I know, it's silly right? But if we memoize a Visi Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > Global singletons are forbidden, but you might want to cache this set in the > finder class. I know, it's silly right? But if we memoize a VisitedEmpty, then that should probably not be in the temp_zone. (Unless we want to get exciting memory corruption if we'll ever try to execute this pass twice.) I suppose maybe we should allocate it in the global zone. How would I find that? Can I just use malloc? Jarin 2016/08/05 13:49:49 graph()->zone() Show quoted text On 2016/08/04 12:31:02, bgeron wrote: > On 2016/08/01 08:25:43, Jarin wrote: > > Global singletons are forbidden, but you might want to cache this set in the > > finder class. > > I know, it's silly right? But if we memoize a VisitedEmpty, then that should > probably not be in the temp_zone. (Unless we want to get exciting memory > corruption if we'll ever try to execute this pass twice.) > > I suppose maybe we should allocate it in the global zone. How would I find that? > Can I just use malloc? > graph()->zone() bgeron 2016/08/09 18:52:52 Done, as discussed. Show quoted text On 2016/08/05 13:49:49, Jarin wrote: > On 2016/08/04 12:31:02, bgeron wrote: > > On 2016/08/01 08:25:43, Jarin wrote: > > > Global singletons are forbidden, but you might want to cache this set in the > > > finder class. > > > > I know, it's silly right? But if we memoize a VisitedEmpty, then that should > > probably not be in the temp_zone. (Unless we want to get exciting memory > > corruption if we'll ever try to execute this pass twice.) > > > > I suppose maybe we should allocate it in the global zone. How would I find > that? > > Can I just use malloc? > > > > graph()->zone() Done, as discussed.
214 bool AtLeastTagged(FieldAccess access) {	493 ZoneSet<UnobservableStore>* empty_set =

215 return rep_size_of(access) >= rep_size_of(MachineRepresentation::kTagged);	494 new (zone->New(sizeof(ZoneSet<UnobservableStore>)))

216 }	495 ZoneSet<UnobservableStore>(zone);

217	496 return UnobservablesSet(empty_set);

218 } // namespace	497 }

219	498

220 bool StoreStoreElimination::IsEligibleNode(Node* node) {	499 // Computes the intersection of two UnobservablesSets. May return

221 return (node->op()->opcode() == IrOpcode::kStoreField) &&	500 // UnobservablesSet::Unvisited() instead of an empty UnobservablesSet for

222 IsEndOfStoreFieldChain(node);	501 // speed.

223 }	502 UnobservablesSet UnobservablesSet::Intersect(UnobservablesSet other,

224	503 Zone* zone) const {

225 void StoreStoreElimination::ReduceEligibleNode(Node* node) {	504 if (set() == nullptr \|\| other.set() == nullptr) {

226 DCHECK(IsEligibleNode(node));	505 return Unvisited();

227	506 } else if (other.set() == nullptr) {
	Jarin 2016/08/01 08:25:43 This case is unreachable, no? (It is already handl This case is unreachable, no? (It is already handled above.) bgeron 2016/08/04 12:31:02 Just checking if you were paying attention, of cou Show quoted text On 2016/08/01 08:25:43, Jarin wrote: > This case is unreachable, no? (It is already handled above.) Just checking if you were paying attention, of course! Fixed and improved.
228 // if (FLAG_trace_store_elimination) {	507 return *this;

229 // PrintF("** StoreStoreElimination::ReduceEligibleNode: activated:	508 } else {

230 // #%d\n",	509 ZoneSet<UnobservableStore>* intersection =

231 // node->id());	510 new (zone->New(sizeof(ZoneSet<UnobservableStore>)))

232 // }	511 ZoneSet<UnobservableStore>(zone);

233	512 // Put the intersection of set() and other.set() in intersection.

234 TRACE("activated: #%d", node->id());	513 set_intersection(set()->begin(), set()->end(), other.set()->begin(),

235	514 other.set()->end(),

236 // Initialize empty futureStore.	515 std::inserter(*intersection, intersection->end()));

237 ZoneMap<Offset, Node*> futureStore(temp_zone());	516

238	517 return UnobservablesSet(intersection);

239 Node* current_node = node;	518 }

240	519 }

241 do {	520

242 FieldAccess access = OpParameter<FieldAccess>(current_node->op());	521 UnobservablesSet UnobservablesSet::Add(UnobservableStore obs,

243 Offset offset = ToOffset(access);	522 Zone* zone) const {

244 Node* object_input = current_node->InputAt(0);	523 bool present = (set()->find(obs) != set()->end());

245	524 if (present) {

246 Node* previous = PreviousEffectBeforeStoreField(current_node);	525 return *this;

247	526 } else {

248 // Find the map entry.	527 // Make a new empty set.

249 ZoneMap<Offset, Node*>::iterator find_result = futureStore.find(offset);	528 ZoneSet<UnobservableStore>* new_set =

250	529 new (zone->New(sizeof(ZoneSet<UnobservableStore>)))

251 bool present = find_result != futureStore.end();	530 ZoneSet<UnobservableStore>(zone);

252 Node* value = present ? find_result->second : nullptr;	531 // Copy the old elements over.

253	532 new_set = set();

254 if (present && value == object_input && AtMostTagged(access)) {	533 // Add the new element.

255 // Key was present, and the value equalled object_input. This means	534 bool inserted = new_set->insert(obs).second;

256 // that soon after in the effect chain, we will do a StoreField to the	535 DCHECK(inserted);

257 // same object with the same offset, therefore current_node can be	536 USE(inserted); // silence warning about unused variable

258 // optimized away. Also, the future StoreField is at least as big as this	537

259 // one.	538 return UnobservablesSet(new_set);

260 //	539 }

261 // We don't need to update futureStore.	540 }

262	541

263 Node* previous_effect = NodeProperties::GetEffectInput(current_node);	542 UnobservablesSet UnobservablesSet::RemoveSameOffset(StoreOffset offset,

264	543 Zone* zone) const {

265 NodeProperties::ReplaceUses(current_node, nullptr, previous_effect,	544 // Make a new empty set.

266 nullptr, nullptr);	545 ZoneSet<UnobservableStore>* new_set =

267 current_node->Kill();	546 new (zone->New(sizeof(ZoneSet<UnobservableStore>)))

268 TRACE("#%d[[+%d,%s]](#%d) -- at most tagged size, eliminated",	547 ZoneSet<UnobservableStore>(zone);

269 current_node->id(), offset,	548 // Copy all elements over that have a different offset.

270 MachineReprToString(access.machine_type.representation()),	549 for (auto obs : *set()) {

271 object_input->id());	550 if (obs.offset_ != offset) {

272 } else if (present && value == object_input && !AtMostTagged(access)) {	551 new_set->insert(obs);

273 TRACE("#%d[[+%d,%s]](#%d) -- too wide, not eliminated",	552 }

274 current_node->id(), offset,	553 }

275 MachineReprToString(access.machine_type.representation()),	554

276 object_input->id());	555 return UnobservablesSet(new_set);

277 } else if (present && value != object_input && AtLeastTagged(access)) {	556 }

278 // Key was present, and the value did not equal object_input. This means	557

279 // that there is a StoreField to this offset in the future, but the	558 // Used for debugging.

280 // object instance comes from a different Node. We pessimistically	559 bool UnobservablesSet::operator==(const UnobservablesSet& other) const {

281 // assume that we cannot optimize current_node away. However, we will	560 if (IsUnvisited() \|\| other.IsUnvisited()) {

282 // guess that the current StoreField is more relevant than the future	561 return IsEmpty() && other.IsEmpty();

283 // one, record the current StoreField in futureStore instead, and	562 } else {

284 // continue ascending up the chain.	563 // Both pointers guaranteed not to be nullptrs.

285 find_result->second = object_input;	564 return set() == other.set();

286 TRACE("#%d[[+%d,%s]](#%d) -- wide enough, diff object, updated in map",	565 }

287 current_node->id(), offset,	566 }

288 MachineReprToString(access.machine_type.representation()),	567

289 object_input->id());	568 bool UnobservablesSet::operator!=(const UnobservablesSet& other) const {

290 } else if (!present && AtLeastTagged(access)) {	569 return !(*this == other);

291 // Key was not present. This means that there is no matching	570 }

292 // StoreField to this offset in the future, so we cannot optimize	571

293 // current_node away. However, we will record the current StoreField	572 bool UnobservableStore::operator==(const UnobservableStore other) const {

294 // in futureStore, and continue ascending up the chain.	573 return (id_ == other.id_) && (offset_ == other.offset_);

295 futureStore.insert(std::make_pair(offset, object_input));	574 }

296 TRACE(	575

297 "#%d[[+%d,%s]](#%d) -- wide enough, key not present, inserted in map",	576 bool UnobservableStore::operator!=(const UnobservableStore other) const {

298 current_node->id(), offset,	577 return !(*this == other);

299 MachineReprToString(access.machine_type.representation()),	578 }

300 object_input->id());	579

301 } else if (!AtLeastTagged(access)) {	580 bool UnobservableStore::operator<(const UnobservableStore other) const {

302 TRACE("#%d[[+%d,%s]](#%d) -- too narrow to record", current_node->id(),	581 return (id_ < other.id_) \|\| (id_ == other.id_ && offset_ < other.offset_);

303 offset, MachineReprToString(access.machine_type.representation()),

304 object_input->id());

305 } else {

306 UNREACHABLE();

307 }

308

309 // Regardless of whether we eliminated node {current}, we want to

310 // continue walking up the effect chain.

311

312 current_node = previous;

313 } while (current_node != nullptr &&

314 current_node->op()->opcode() == IrOpcode::kStoreField);

315

316 TRACE("finished");

317 }	582 }

318	583

319 } // namespace compiler	584 } // namespace compiler

320 } // namespace internal	585 } // namespace internal

321 } // namespace v8	586 } // namespace v8

OLD	NEW

« no previous file with comments | « src/compiler/store-store-elimination.h ('k') | src/flag-definitions.h » ('j') | no next file with comments »