OLD | NEW |
| (Empty) |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library dart_style.src.line_splitting.line_splitter; | |
6 | |
7 import '../chunk.dart'; | |
8 import '../debug.dart' as debug; | |
9 import '../line_writer.dart'; | |
10 import '../rule/rule.dart'; | |
11 import 'rule_set.dart'; | |
12 import 'solve_state.dart'; | |
13 import 'solve_state_queue.dart'; | |
14 | |
15 /// To ensure the solver doesn't go totally pathological on giant code, we cap | |
16 /// it at a fixed number of attempts. | |
17 /// | |
18 /// If the optimal solution isn't found after this many tries, it just uses the | |
19 /// best it found so far. | |
20 const _maxAttempts = 5000; | |
21 | |
22 /// Takes a set of chunks and determines the best values for its rules in order | |
23 /// to fit it inside the page boundary. | |
24 /// | |
25 /// This problem is exponential in the number of rules and a single expression | |
26 /// in Dart can be quite large, so it isn't feasible to brute force this. For | |
27 /// example: | |
28 /// | |
29 /// outer( | |
30 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
31 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
32 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8), | |
33 /// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8)); | |
34 /// | |
35 /// There are 509,607,936 ways this can be split. | |
36 /// | |
37 /// The problem is even harder because we may not be able to easily tell if a | |
38 /// given solution is the best one. It's possible that there is *no* solution | |
39 /// that fits in the page (due to long strings or identifiers) so the winning | |
40 /// solution may still have overflow characters. This makes it hard to know | |
41 /// when we are done and can stop looking. | |
42 /// | |
43 /// There are a couple of pieces of domain knowledge we use to cope with this: | |
44 /// | |
45 /// - Changing a rule from unsplit to split will never lower its cost. A | |
46 /// solution with all rules unsplit will always be the one with the lowest | |
47 /// cost (zero). Conversely, setting all of its rules to the maximum split | |
48 /// value will always have the highest cost. | |
49 /// | |
50 /// (You might think there is a converse rule about overflow characters. The | |
51 /// solution with the fewest splits will have the most overflow, and the | |
52 /// solution with the most splits will have the least overflow. Alas, because | |
53 /// of indentation, that isn't always the case. Adding a split may *increase* | |
54 /// overflow in some cases.) | |
55 /// | |
56 /// - If all of the chunks for a rule are inside lines that already fit in the | |
57 /// page, then splitting that rule will never improve the solution. | |
58 /// | |
59 /// - If two partial solutions have the same cost and the bound rules don't | |
60 /// affect any of the remaining unbound rules, then whichever partial | |
61 /// solution is currently better will always be the winner regardless of what | |
62 /// the remaining unbound rules are bound to. | |
63 /// | |
64 /// We start off with a [SolveState] where all rules are unbound (which | |
65 /// implicitly treats them as unsplit). For a given solve state, we can produce | |
66 /// a set of expanded states that takes some of the rules in the first long | |
67 /// line and bind them to split values. This always produces new solve states | |
68 /// with higher cost (but often fewer overflow characters) than the parent | |
69 /// state. | |
70 /// | |
71 /// We take these expanded states and add them to a work list sorted by cost. | |
72 /// Since unsplit rules always have lower cost solutions, we know that no state | |
73 /// we enqueue later will ever have a lower cost than the ones we already have | |
74 /// enqueued. | |
75 /// | |
76 /// Then we keep pulling states off the work list and expanding them and adding | |
77 /// the results back into the list. We do this until we hit a solution where | |
78 /// all characters fit in the page. The first one we find will have the lowest | |
79 /// cost and we're done. | |
80 /// | |
81 /// We also keep running track of the best solution we've found so far that | |
82 /// has the fewest overflow characters and the lowest cost. If no solution fits, | |
83 /// we'll use this one. | |
84 /// | |
85 /// When enqueing a solution, we can sometimes collapse it and a previously | |
86 /// queued one by preferring one or the other. If two solutions have the same | |
87 /// cost and we can prove that they won't diverge later as unbound rules are | |
88 /// set, we can pick the winner now and discard the other. This lets us avoid | |
89 /// redundantly exploring entire subtrees of the solution space. | |
90 /// | |
91 /// As a final escape hatch for pathologically nasty code, after trying some | |
92 /// fixed maximum number of solve states, we just bail and return the best | |
93 /// solution found so far. | |
94 /// | |
95 /// Even with the above algorithmic optimizations, complex code may still | |
96 /// require a lot of exploring to find an optimal solution. To make that fast, | |
97 /// this code is carefully profiled and optimized. If you modify this, make | |
98 /// sure to test against the benchmark to ensure you don't regress performance. | |
99 class LineSplitter { | |
100 final LineWriter writer; | |
101 | |
102 /// The list of chunks being split. | |
103 final List<Chunk> chunks; | |
104 | |
105 /// The set of soft rules whose values are being selected. | |
106 final List<Rule> rules; | |
107 | |
108 /// The number of characters of additional indentation to apply to each line. | |
109 /// | |
110 /// This is used when formatting blocks to get the output into the right | |
111 /// column based on where the block appears. | |
112 final int blockIndentation; | |
113 | |
114 /// The starting column of the first line. | |
115 final int firstLineIndent; | |
116 | |
117 /// The queue of solve states to explore further. | |
118 /// | |
119 /// This is sorted lowest-cost first. This ensures that as soon as we find a | |
120 /// solution that fits in the page, we know it will be the lowest cost one | |
121 /// and can stop looking. | |
122 final _queue = new SolveStateQueue(); | |
123 | |
124 /// The lowest cost solution found so far. | |
125 SolveState _bestSolution; | |
126 | |
127 /// Creates a new splitter for [_writer] that tries to fit [chunks] into the | |
128 /// page width. | |
129 LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation, | |
130 int firstLineIndent, | |
131 {bool flushLeft: false}) | |
132 : chunks = chunks, | |
133 // Collect the set of soft rules that we need to select values for. | |
134 rules = chunks | |
135 .map((chunk) => chunk.rule) | |
136 .where((rule) => rule != null && rule is! HardSplitRule) | |
137 .toSet() | |
138 .toList(growable: false), | |
139 blockIndentation = blockIndentation, | |
140 firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation { | |
141 _queue.bindSplitter(this); | |
142 | |
143 // Store the rule's index in the rule so we can get from a chunk to a rule | |
144 // index quickly. | |
145 for (var i = 0; i < rules.length; i++) { | |
146 rules[i].index = i; | |
147 } | |
148 } | |
149 | |
150 /// Determine the best way to split the chunks into lines that fit in the | |
151 /// page, if possible. | |
152 /// | |
153 /// Returns a [SplitSet] that defines where each split occurs and the | |
154 /// indentation of each line. | |
155 /// | |
156 /// [firstLineIndent] is the number of characters of whitespace to prefix the | |
157 /// first line of output with. | |
158 SplitSet apply() { | |
159 // Start with a completely unbound, unsplit solution. | |
160 _queue.add(new SolveState(this, new RuleSet(rules.length))); | |
161 | |
162 var attempts = 0; | |
163 while (_queue.isNotEmpty) { | |
164 var state = _queue.removeFirst(); | |
165 | |
166 if (state.isBetterThan(_bestSolution)) { | |
167 _bestSolution = state; | |
168 | |
169 // Since we sort solutions by cost the first solution we find that | |
170 // fits is the winner. | |
171 if (_bestSolution.overflowChars == 0) break; | |
172 } | |
173 | |
174 if (debug.traceSplitter) { | |
175 var best = state == _bestSolution ? " (best)" : ""; | |
176 debug.log("$state$best"); | |
177 debug.dumpLines(chunks, firstLineIndent, state.splits); | |
178 debug.log(); | |
179 } | |
180 | |
181 if (attempts++ > _maxAttempts) break; | |
182 | |
183 // Try bumping the rule values for rules whose chunks are on long lines. | |
184 state.expand(); | |
185 } | |
186 | |
187 if (debug.traceSplitter) { | |
188 debug.log("$_bestSolution (winner)"); | |
189 debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits); | |
190 debug.log(); | |
191 } | |
192 | |
193 return _bestSolution.splits; | |
194 } | |
195 | |
196 void enqueue(SolveState state) { | |
197 _queue.add(state); | |
198 } | |
199 } | |
OLD | NEW |