tools/gn/command_format2.cc - Issue 750373002: XXX WIP gn format: Dijkstra-style line break

Unified Diff: tools/gn/command_format2.cc

Issue 750373002: XXX WIP gn format: Dijkstra-style line break Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: . Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/gn/command_format2.cc

diff --git a/tools/gn/command_format2.cc b/tools/gn/command_format2.cc

new file mode 100644

index 0000000000000000000000000000000000000000..820294361850c8f72c65bb31f6f2fe295bef73a1

--- /dev/null

+++ b/tools/gn/command_format2.cc

@@ -0,0 +1,566 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include <sstream>

+#include "base/command_line.h"

+#include "base/files/file_util.h"

+#include "base/strings/string_split.h"

+#include "tools/gn/commands.h"

+#include "tools/gn/filesystem_utils.h"

+#include "tools/gn/input_file.h"

+#include "tools/gn/parser.h"

+#include "tools/gn/scheduler.h"

+#include "tools/gn/setup.h"

+#include "tools/gn/source_file.h"

+#include "tools/gn/tokenizer.h"

+namespace commands {

+const char kSwitchDumpTree[] = "dump-tree";

+const char kSwitchInPlace[] = "in-place";

+const char kSwitchStdin[] = "stdin";

+const char kFormat[] = "format";

+const char kFormat_HelpShort[] =

+ "format: Format .gn file. (ALPHA, WILL DESTROY DATA!)";

+const char kFormat_Help[] =

+ "gn format [--dump-tree] [--in-place] [--stdin] BUILD.gn\n"

+ "\n"

+ " Formats .gn file to a standard format. THIS IS NOT FULLY IMPLEMENTED\n"

+ " YET! IT WILL EAT YOUR BEAUTIFUL .GN FILES. AND YOUR LAUNDRY.\n"

+ " At a minimum, make sure everything is `git commit`d so you can\n"

+ " `git checkout -f` to recover.\n"

+ "\n"

+ "Arguments\n"

+ " --dump-tree\n"

+ " For debugging only, dumps the parse tree.\n"

+ "\n"

+ " --in-place\n"

+ " Instead writing the formatted file to stdout, replace the input\n"

+ " with the formatted output.\n"

+ "\n"

+ " --stdin\n"

+ " Read input from stdin (and write to stdout).\n"

+ "\n"

+ "Examples\n"

+ " gn format //some/BUILD.gn\n"

+ " gn format some\\BUILD.gn\n"

+ " gn format /abspath/some/BUILD.gn\n"

+ " gn format --stdin\n";

+const int kIndentSize = 2;

+//const int kMaximumWidth = 80;

+struct AnnotatedLine;

+struct AnnotatedLineNode;

+enum FormatDecision {

+ FD_UNFORMATTED,

+ FD_CONTINUE,

+ FD_BREAK

+};

+// -----------------------------------------------------------------------------

+// A wrapper around Token storing information about whitespace characters

+// preceding it.

+struct FormatToken {

+ FormatToken()

+ : newlines_before(0),

+ decision(FD_UNFORMATTED),

+ total_length(0),

+ previous(nullptr),

+ next(nullptr) {}

+ // The token.

+ Token tok;

+ // Number of newlines immediately before the token. This can be used to

+ // determine what the user wrote in the original code to e.g. leave an empty

+ // line between two function definitions.

+ int newlines_before;

+ /// Stores the formatting decision for the token once it was made.

+ FormatDecision decision;

+ // The total length of the annotated line up to and including this token.

+ int total_length;

+ FormatToken* previous;

+ FormatToken* next;

+ std::vector<AnnotatedLine*> children;

+ FormatToken* GetPreviousNonComment() const {

+ FormatToken* tok = previous;

+ /* TODO

+ while (tok && tok->Is(COMMENT))

+ tok = tok->previous;

+ */

+ return tok;

+ }

+};

+// -----------------------------------------------------------------------------

+struct ParenState {

+ ParenState(int indent, int indent_level, int last_space, bool no_line_break)

+ : indent(indent),

+ indent_level(indent_level),

+ last_space(last_space),

+ no_line_break(no_line_break) {}

+ // The position to which a specific parenthesis level needs to be indented.

+ int indent;

+ // The number of indentation levels of the block.

+ int indent_level;

+ // The potision of the last space on each level.

+ // Used to break in the case of:

+ // Function(parameter, OtherCall(

+ // other_parameter))

+ int last_space;

+ // Break after the next comma.

+ bool break_before_parameter;

+ // Line breaking in this context would break a formatting rule.

+ bool no_line_break;

+ // If the last binary operator on this level was wrapped to th next line.

+ bool last_operator_wrapped;

+ bool operator<(const ParenState& rhs) const {

+ if (indent != rhs.indent)

+ return indent < rhs.indent;

+ if (indent_level != rhs.indent_level)

+ return indent_level < rhs.indent_level;

+ if (last_space != rhs.last_space)

+ return last_space < rhs.last_space;

+ if (break_before_parameter != rhs.break_before_parameter)

+ return break_before_parameter;

+ if (no_line_break != rhs.no_line_break)

+ return no_line_break;

+ if (last_operator_wrapped != rhs.last_operator_wrapped)

+ return last_operator_wrapped;

+ return false;

+ }

+};

+// -----------------------------------------------------------------------------

+// The current state when indenting a chunk.

+//

+// As the indenting tries different combinations, this is copied by value.

+struct LineState {

+ // The number of used columns in the current line.

+ int column;

+ // The token that needs to be formatted next.

+ FormatToken* next_token;

+ // A stack tracing the properties applying to parenthesis levels.

+ std::vector<ParenState> stack;

+ // The indent of the first token.

+ int first_indent;

+ const AnnotatedLine* line;

+ // To be able to use LineState in a map.

+ bool operator<(const LineState& rhs) const {

+ if (next_token != rhs.next_token)

+ return next_token < rhs.next_token;

+ if (column != rhs.column)

+ return column < rhs.column;

+ if (first_indent != rhs.first_indent)

+ return first_indent < rhs.first_indent;

+ return stack < rhs.stack;

+ }

+};

+// -----------------------------------------------------------------------------

+class WhitespaceManager {

+ public:

+ void ReplaceWhitespace(FormatToken* tok,

+ int newlines,

+ int indent_level,

+ int spaces,

+ int start_of_token_column);

+};

+// -----------------------------------------------------------------------------

+class ContinuationIndenter {

+ public:

+ ContinuationIndenter(WhitespaceManager* whitespaces)

+ : whitespaces_(whitespaces) {}

+ // Get the initial state (the state after place |line|s first token and

+ // |first_indent|).

+ LineState GetInitialState(int first_indent,

+ const AnnotatedLine* line,

+ bool dry_run);

+ // Appends the next token to state and updates information necessary for

+ // indentation.

+ //

+ // Puts the token on the current line if newline is false and adds a line

+ // break and necessary indentation otherwise.

+ int AddTokenToState(LineState* state,

+ bool newline,

+ int dry_run,

+ int extra_spaces);

+ private:

+ WhitespaceManager* whitespaces_;

+};

+int ContinuationIndenter::AddTokenToState(LineState* state,

+ bool newline,

+ int dry_run,

+ int extra_spaces) {

+ const FormatToken& current = *state.next_token;

+// -----------------------------------------------------------------------------

+// An annotated line is a sequence of Token that we would prefer to put on a

+// single line if there were no column limit.

+//

+// The parse tree is flattened down AnnotatedLines and passed to

+// AnnotatedLineFormatter.

+struct AnnotatedLine {

+ AnnotatedLine() : level(0) {}

+ // The indent level of the line.

+ int level;

+ FormatToken* first;

+ FormatToken* last;

+ std::vector<AnnotatedLine*> children;

+ std::list<AnnotatedLineNode> tokens;

+};

+// -----------------------------------------------------------------------------

+struct AnnotatedLineNode {

+ AnnotatedLineNode() : tok(nullptr) {}

+ AnnotatedLineNode(FormatToken* tok) : tok(tok) {}

+ FormatToken *tok;

+ std::vector<AnnotatedLine> children;

+};

+// -----------------------------------------------------------------------------

+class AnnotatedLineFormatter {

+ public:

+ AnnotatedLineFormatter(ContinuationIndenter* indenter,

+ WhitespaceManager* whitespaces)

+ : indenter_(indenter) /*, whitespaces_(whitespaces)*/ {}

+ // Formats a line, and returns the penalty.

+ int FormatLine(const AnnotatedLine& line, int first_indent, bool dry_run);

+ int FormatLines(const std::vector<AnnotatedLine*>& lines,

+ bool dry_run,

+ int additional_indent,

+ bool fix_bad_indentation);

+ private:

+ struct CompareLineStatePointers {

+ bool operator()(LineState* a, LineState* b) const { return *a < *b; }

+ };

+ struct StateNode {

+ StateNode(const LineState& state, bool newline, StateNode* previous)

+ : state(state), newline(newline), previous(previous) {}

+ LineState state;

+ bool newline;

+ StateNode* previous;

+ };

+ // A pair of <penalty, count> that is used to prioritize the BFS.

+ //

+ // In the case of equal penalties, we want to prefer states that were inserted

+ // first. During state generation, we make sure that we insert states first

+ // that break the line as late as possible.

+ typedef std::pair<unsigned, unsigned> OrderedPenalty;

+ // An item in the BFS search queue. The StateNode's State has the given

+ // OrderedPenalty.

+ typedef std::pair<OrderedPenalty, StateNode*> QueueItem;

+ // The BFS queue type.

+ typedef std::priority_queue<QueueItem,

+ std::vector<QueueItem>,

+ std::greater<QueueItem>> QueueType;

+ // Analyze the entire solution space starting from |initial_state|.

+ //

+ // This implements Dijkstra's algorithm on the graph that has LineStates as

+ // nodes. It tries to find the lowest penalty path from |initial_state| to a

+ // state where all tokens are placed. Returns the penalty.

+ int AnalyzeSolutionSpace(LineState* initial_state, bool dry_run);

+ // Add the following state to the analysis queue.

+ //

+ // Assume the current state is previous_node and has been reached with the

+ // given penalty. Insert a newline if newline is true.

+ void AddNextStateToQueue(int penalty,

+ StateNode* previous_node,

+ bool newline,

+ int* count,

+ QueueType* queue);

+ // If the state's next token is a } closing a nested block, format the nested

+ // block before it.

+ bool FormatChildren(LineState* state,

+ bool newline,

+ bool dry_run,

+ int* penalty);

+ ContinuationIndenter* indenter_;

+ //WhitespaceManager* whitespaces_;

+};

+int AnnotatedLineFormatter::FormatLines(

+ const std::vector<AnnotatedLine*>& lines,

+ bool dry_run,

+ int additional_indent,

+ bool fix_bad_indentation) {

+ return 0;

+int AnnotatedLineFormatter::FormatLine(const AnnotatedLine& line,

+ int first_indent,

+ bool dry_run) {

+ LineState state = indenter_->GetInitialState(first_indent, &line, dry_run);

+ return AnalyzeSolutionSpace(&state, dry_run);

+int AnnotatedLineFormatter::AnalyzeSolutionSpace(LineState* initial_state,

+ bool dry_run) {

+ std::set<LineState*, CompareLineStatePointers> seen;

+ // Increasing count of StateNode items we have created. This is used to

+ // create a deterministic order independent of the container.

+ int count = 0;

+ QueueType queue;

+ // Insert start element into queue.

+ StateNode* node = new StateNode(*initial_state, false, nullptr);

+ queue.push(QueueItem(OrderedPenalty(0, count), node));

+ ++count;

+ int penalty = 0;

+ // While not empty, take first element and follow edges.

+ while (!queue.empty()) {

+ penalty = queue.top().first.first;

+ StateNode* node = queue.top().second;

+ if (!node->state.next_token) {

+ fprintf(stderr, "\n---\nPenalty for line: %d\n", penalty);

+ break;

+ }

+ queue.pop();

+ if (!seen.insert(&node->state).second) {

+ // State already examinied with lower penalty.

+ continue;

+ }

+ FormatDecision last_format = node->state.next_token->decision;

+ if (last_format == FD_UNFORMATTED || last_format == FD_CONTINUE)

+ AddNextStateToQueue(penalty, node, false, &count, &queue);

+ if (last_format == FD_UNFORMATTED || last_format == FD_BREAK)

+ AddNextStateToQueue(penalty, node, true, &count, &queue);

+ }

+ if (queue.empty()) {

+ fprintf(stderr, "Could not find a solution.\n");

+ return 0;

+ }

+ if (!dry_run) {

+ fprintf(stderr, "todo; ReconstructPath\n");

+ }

+ fprintf(stderr, "Total number of analyzed states: %d\n---\n", count);

+ return penalty;

+void AnnotatedLineFormatter::AddNextStateToQueue(int penalty,

+ StateNode* previous_node,

+ bool newline,

+ int* count,

+ QueueType* queue) {

+ StateNode* node =

+ new StateNode(previous_node->state, newline, previous_node);

+ if (!FormatChildren(&node->state, newline, true, &penalty))

+ return;

+ penalty = indenter_->AddTokenToState(&node->state, newline, true, 0);

+ queue->push(QueueItem(OrderedPenalty(penalty, *count), node));

+ ++(*count);

+bool AnnotatedLineFormatter::FormatChildren(LineState* state,

+ bool newline,

+ bool dry_run,

+ int* penalty) {

+ FormatToken* previous = state->next_token->previous;

+ const FormatToken* lbrace = state->next_token->GetPreviousNonComment();

+ if (!lbrace || previous->children.size() == 0 /* TODO: stuff */)

+ return true;

+ if (newline) {

+ int additional_indent =

+ state->first_indent - state->line->level * kIndentSize;

+ penalty +=

+ FormatLines(previous->children, dry_run, additional_indent, true);

+ return true;

+ }

+ // Can't merge multiple statements into a single line.

+ if (previous->children.size() > 1)

+ return false;

+ // Can't merge into one line if this line ends on a comment.

+ //if (previous.is(COMMENT))

+ //return false;

+ // TODO: more

+ penalty +=

+ FormatLine(*previous->children[0], state->column + 1, dry_run);

+ state->column += 1 + previous->children[0]->last->total_length;

+ return true;

+// -----------------------------------------------------------------------------

+void DoFormat(const ParseNode* root, bool dump_tree, std::string* output) {

+ *output = std::string();

+std::string ReadStdin() {

+ static const int kBufferSize = 256;

+ char buffer[kBufferSize];

+ std::string result;

+ while (true) {

+ char* input = NULL;

+ input = fgets(buffer, kBufferSize, stdin);

+ if (input == NULL && feof(stdin))

+ return result;

+ int length = static_cast<int>(strlen(buffer));

+ if (length == 0)

+ return result;

+ else

+ result += std::string(buffer, length);

+ }

+bool FormatFileToString(Setup* setup,

+ const SourceFile& file,

+ bool dump_tree,

+ std::string* output) {

+ Err err;

+ const ParseNode* parse_node =

+ setup->scheduler().input_file_manager()->SyncLoadFile(

+ LocationRange(), &setup->build_settings(), file, &err);

+ if (err.has_error()) {

+ err.PrintToStdout();

+ return false;

+ }

+ DoFormat(parse_node, dump_tree, output);

+ return true;

+bool FormatStringToString(const std::string& input,

+ bool dump_tree,

+ std::string* output) {

+ SourceFile source_file;

+ InputFile file(source_file);

+ file.SetContents(input);

+ Err err;

+ // Tokenize.

+ std::vector<Token> tokens = Tokenizer::Tokenize(&file, &err);

+ if (err.has_error()) {

+ err.PrintToStdout();

+ return false;

+ }

+ // Parse.

+ scoped_ptr<ParseNode> parse_node = Parser::Parse(tokens, &err);

+ if (err.has_error()) {

+ err.PrintToStdout();

+ return false;

+ }

+ DoFormat(parse_node.get(), dump_tree, output);

+ return true;

+int RunFormat(const std::vector<std::string>& args) {

+ bool dump_tree =

+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchDumpTree);

+ bool from_stdin =

+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchStdin);

+ if (from_stdin) {

+ if (args.size() != 0) {

+ Err(Location(), "Expecting no arguments when reading from stdin.\n")

+ .PrintToStdout();

+ return 1;

+ }

+ std::string input = ReadStdin();

+ std::string output;

+ if (!FormatStringToString(input, dump_tree, &output))

+ return 1;

+ printf("%s", output.c_str());

+ return 0;

+ }

+ // TODO(scottmg): Eventually, this should be a list/spec of files, and they

+ // should all be done in parallel.

+ if (args.size() != 1) {

+ Err(Location(), "Expecting exactly one argument, see `gn help format`.\n")

+ .PrintToStdout();

+ return 1;

+ }

+ Setup setup;

+ SourceDir source_dir =

+ SourceDirForCurrentDirectory(setup.build_settings().root_path());

+ SourceFile file = source_dir.ResolveRelativeFile(args[0],

+ setup.build_settings().root_path_utf8());

+ std::string output_string;

+ if (FormatFileToString(&setup, file, dump_tree, &output_string)) {

+ bool in_place =

+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchInPlace);

+ if (in_place) {

+ base::FilePath to_write = setup.build_settings().GetFullPath(file);

+ if (base::WriteFile(to_write,

+ output_string.data(),

+ static_cast<int>(output_string.size())) == -1) {

+ Err(Location(),

+ std::string("Failed to write formatted output back to \"") +

+ to_write.AsUTF8Unsafe() + std::string("\".")).PrintToStdout();

+ return 1;

+ }

+ printf("Wrote formatted to '%s'.\n", to_write.AsUTF8Unsafe().c_str());

+ } else {

+ printf("%s", output_string.c_str());

+ }

+ return 0;

+} // namespace commands

« no previous file with comments | « no previous file | tools/gn/gn.gyp » ('j') | no next file with comments »