Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Unified Diff: tools/gn/command_format2.cc

Issue 750373002: XXX WIP gn format: Dijkstra-style line break Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: . Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tools/gn/gn.gyp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/gn/command_format2.cc
diff --git a/tools/gn/command_format2.cc b/tools/gn/command_format2.cc
new file mode 100644
index 0000000000000000000000000000000000000000..820294361850c8f72c65bb31f6f2fe295bef73a1
--- /dev/null
+++ b/tools/gn/command_format2.cc
@@ -0,0 +1,566 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <sstream>
+
+#include "base/command_line.h"
+#include "base/files/file_util.h"
+#include "base/strings/string_split.h"
+#include "tools/gn/commands.h"
+#include "tools/gn/filesystem_utils.h"
+#include "tools/gn/input_file.h"
+#include "tools/gn/parser.h"
+#include "tools/gn/scheduler.h"
+#include "tools/gn/setup.h"
+#include "tools/gn/source_file.h"
+#include "tools/gn/tokenizer.h"
+
+namespace commands {
+
+const char kSwitchDumpTree[] = "dump-tree";
+const char kSwitchInPlace[] = "in-place";
+const char kSwitchStdin[] = "stdin";
+
+const char kFormat[] = "format";
+const char kFormat_HelpShort[] =
+ "format: Format .gn file. (ALPHA, WILL DESTROY DATA!)";
+const char kFormat_Help[] =
+ "gn format [--dump-tree] [--in-place] [--stdin] BUILD.gn\n"
+ "\n"
+ " Formats .gn file to a standard format. THIS IS NOT FULLY IMPLEMENTED\n"
+ " YET! IT WILL EAT YOUR BEAUTIFUL .GN FILES. AND YOUR LAUNDRY.\n"
+ " At a minimum, make sure everything is `git commit`d so you can\n"
+ " `git checkout -f` to recover.\n"
+ "\n"
+ "Arguments\n"
+ " --dump-tree\n"
+ " For debugging only, dumps the parse tree.\n"
+ "\n"
+ " --in-place\n"
+ " Instead writing the formatted file to stdout, replace the input\n"
+ " with the formatted output.\n"
+ "\n"
+ " --stdin\n"
+ " Read input from stdin (and write to stdout).\n"
+ "\n"
+ "Examples\n"
+ " gn format //some/BUILD.gn\n"
+ " gn format some\\BUILD.gn\n"
+ " gn format /abspath/some/BUILD.gn\n"
+ " gn format --stdin\n";
+
+const int kIndentSize = 2;
+//const int kMaximumWidth = 80;
+
+struct AnnotatedLine;
+struct AnnotatedLineNode;
+
+enum FormatDecision {
+ FD_UNFORMATTED,
+ FD_CONTINUE,
+ FD_BREAK
+};
+
+// -----------------------------------------------------------------------------
+// A wrapper around Token storing information about whitespace characters
+// preceding it.
+struct FormatToken {
+ FormatToken()
+ : newlines_before(0),
+ decision(FD_UNFORMATTED),
+ total_length(0),
+ previous(nullptr),
+ next(nullptr) {}
+
+ // The token.
+ Token tok;
+
+ // Number of newlines immediately before the token. This can be used to
+ // determine what the user wrote in the original code to e.g. leave an empty
+ // line between two function definitions.
+ int newlines_before;
+
+ /// Stores the formatting decision for the token once it was made.
+ FormatDecision decision;
+
+ // The total length of the annotated line up to and including this token.
+ int total_length;
+
+ FormatToken* previous;
+ FormatToken* next;
+
+ std::vector<AnnotatedLine*> children;
+
+
+ FormatToken* GetPreviousNonComment() const {
+ FormatToken* tok = previous;
+ /* TODO
+ while (tok && tok->Is(COMMENT))
+ tok = tok->previous;
+ */
+ return tok;
+ }
+};
+
+// -----------------------------------------------------------------------------
+struct ParenState {
+ ParenState(int indent, int indent_level, int last_space, bool no_line_break)
+ : indent(indent),
+ indent_level(indent_level),
+ last_space(last_space),
+ no_line_break(no_line_break) {}
+
+ // The position to which a specific parenthesis level needs to be indented.
+ int indent;
+
+ // The number of indentation levels of the block.
+ int indent_level;
+
+ // The potision of the last space on each level.
+ // Used to break in the case of:
+ // Function(parameter, OtherCall(
+ // other_parameter))
+ int last_space;
+
+ // Break after the next comma.
+ bool break_before_parameter;
+
+ // Line breaking in this context would break a formatting rule.
+ bool no_line_break;
+
+ // If the last binary operator on this level was wrapped to th next line.
+ bool last_operator_wrapped;
+
+ bool operator<(const ParenState& rhs) const {
+ if (indent != rhs.indent)
+ return indent < rhs.indent;
+ if (indent_level != rhs.indent_level)
+ return indent_level < rhs.indent_level;
+ if (last_space != rhs.last_space)
+ return last_space < rhs.last_space;
+ if (break_before_parameter != rhs.break_before_parameter)
+ return break_before_parameter;
+ if (no_line_break != rhs.no_line_break)
+ return no_line_break;
+ if (last_operator_wrapped != rhs.last_operator_wrapped)
+ return last_operator_wrapped;
+ return false;
+ }
+};
+
+// -----------------------------------------------------------------------------
+// The current state when indenting a chunk.
+//
+// As the indenting tries different combinations, this is copied by value.
+struct LineState {
+ // The number of used columns in the current line.
+ int column;
+
+ // The token that needs to be formatted next.
+ FormatToken* next_token;
+
+ // A stack tracing the properties applying to parenthesis levels.
+ std::vector<ParenState> stack;
+
+ // The indent of the first token.
+ int first_indent;
+
+ const AnnotatedLine* line;
+
+ // To be able to use LineState in a map.
+ bool operator<(const LineState& rhs) const {
+ if (next_token != rhs.next_token)
+ return next_token < rhs.next_token;
+ if (column != rhs.column)
+ return column < rhs.column;
+ if (first_indent != rhs.first_indent)
+ return first_indent < rhs.first_indent;
+ return stack < rhs.stack;
+ }
+};
+
+// -----------------------------------------------------------------------------
+class WhitespaceManager {
+ public:
+ void ReplaceWhitespace(FormatToken* tok,
+ int newlines,
+ int indent_level,
+ int spaces,
+ int start_of_token_column);
+};
+
+// -----------------------------------------------------------------------------
+class ContinuationIndenter {
+ public:
+ ContinuationIndenter(WhitespaceManager* whitespaces)
+ : whitespaces_(whitespaces) {}
+
+ // Get the initial state (the state after place |line|s first token and
+ // |first_indent|).
+ LineState GetInitialState(int first_indent,
+ const AnnotatedLine* line,
+ bool dry_run);
+
+ // Appends the next token to state and updates information necessary for
+ // indentation.
+ //
+ // Puts the token on the current line if newline is false and adds a line
+ // break and necessary indentation otherwise.
+ int AddTokenToState(LineState* state,
+ bool newline,
+ int dry_run,
+ int extra_spaces);
+
+ private:
+ WhitespaceManager* whitespaces_;
+};
+
+int ContinuationIndenter::AddTokenToState(LineState* state,
+ bool newline,
+ int dry_run,
+ int extra_spaces) {
+ const FormatToken& current = *state.next_token;
+}
+
+// -----------------------------------------------------------------------------
+// An annotated line is a sequence of Token that we would prefer to put on a
+// single line if there were no column limit.
+//
+// The parse tree is flattened down AnnotatedLines and passed to
+// AnnotatedLineFormatter.
+struct AnnotatedLine {
+ AnnotatedLine() : level(0) {}
+
+ // The indent level of the line.
+ int level;
+
+ FormatToken* first;
+ FormatToken* last;
+
+ std::vector<AnnotatedLine*> children;
+
+ std::list<AnnotatedLineNode> tokens;
+};
+
+// -----------------------------------------------------------------------------
+struct AnnotatedLineNode {
+ AnnotatedLineNode() : tok(nullptr) {}
+ AnnotatedLineNode(FormatToken* tok) : tok(tok) {}
+
+ FormatToken *tok;
+ std::vector<AnnotatedLine> children;
+};
+
+// -----------------------------------------------------------------------------
+class AnnotatedLineFormatter {
+ public:
+ AnnotatedLineFormatter(ContinuationIndenter* indenter,
+ WhitespaceManager* whitespaces)
+ : indenter_(indenter) /*, whitespaces_(whitespaces)*/ {}
+
+ // Formats a line, and returns the penalty.
+ int FormatLine(const AnnotatedLine& line, int first_indent, bool dry_run);
+
+ int FormatLines(const std::vector<AnnotatedLine*>& lines,
+ bool dry_run,
+ int additional_indent,
+ bool fix_bad_indentation);
+
+ private:
+ struct CompareLineStatePointers {
+ bool operator()(LineState* a, LineState* b) const { return *a < *b; }
+ };
+
+ struct StateNode {
+ StateNode(const LineState& state, bool newline, StateNode* previous)
+ : state(state), newline(newline), previous(previous) {}
+ LineState state;
+ bool newline;
+ StateNode* previous;
+ };
+
+ // A pair of <penalty, count> that is used to prioritize the BFS.
+ //
+ // In the case of equal penalties, we want to prefer states that were inserted
+ // first. During state generation, we make sure that we insert states first
+ // that break the line as late as possible.
+ typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+ // An item in the BFS search queue. The StateNode's State has the given
+ // OrderedPenalty.
+ typedef std::pair<OrderedPenalty, StateNode*> QueueItem;
+
+ // The BFS queue type.
+ typedef std::priority_queue<QueueItem,
+ std::vector<QueueItem>,
+ std::greater<QueueItem>> QueueType;
+
+ // Analyze the entire solution space starting from |initial_state|.
+ //
+ // This implements Dijkstra's algorithm on the graph that has LineStates as
+ // nodes. It tries to find the lowest penalty path from |initial_state| to a
+ // state where all tokens are placed. Returns the penalty.
+ int AnalyzeSolutionSpace(LineState* initial_state, bool dry_run);
+
+ // Add the following state to the analysis queue.
+ //
+ // Assume the current state is previous_node and has been reached with the
+ // given penalty. Insert a newline if newline is true.
+ void AddNextStateToQueue(int penalty,
+ StateNode* previous_node,
+ bool newline,
+ int* count,
+ QueueType* queue);
+
+ // If the state's next token is a } closing a nested block, format the nested
+ // block before it.
+ bool FormatChildren(LineState* state,
+ bool newline,
+ bool dry_run,
+ int* penalty);
+
+ ContinuationIndenter* indenter_;
+ //WhitespaceManager* whitespaces_;
+};
+
+int AnnotatedLineFormatter::FormatLines(
+ const std::vector<AnnotatedLine*>& lines,
+ bool dry_run,
+ int additional_indent,
+ bool fix_bad_indentation) {
+ return 0;
+}
+
+int AnnotatedLineFormatter::FormatLine(const AnnotatedLine& line,
+ int first_indent,
+ bool dry_run) {
+ LineState state = indenter_->GetInitialState(first_indent, &line, dry_run);
+ return AnalyzeSolutionSpace(&state, dry_run);
+}
+
+int AnnotatedLineFormatter::AnalyzeSolutionSpace(LineState* initial_state,
+ bool dry_run) {
+ std::set<LineState*, CompareLineStatePointers> seen;
+
+ // Increasing count of StateNode items we have created. This is used to
+ // create a deterministic order independent of the container.
+ int count = 0;
+ QueueType queue;
+
+ // Insert start element into queue.
+ StateNode* node = new StateNode(*initial_state, false, nullptr);
+ queue.push(QueueItem(OrderedPenalty(0, count), node));
+ ++count;
+
+ int penalty = 0;
+
+ // While not empty, take first element and follow edges.
+ while (!queue.empty()) {
+ penalty = queue.top().first.first;
+ StateNode* node = queue.top().second;
+ if (!node->state.next_token) {
+ fprintf(stderr, "\n---\nPenalty for line: %d\n", penalty);
+ break;
+ }
+ queue.pop();
+
+ if (!seen.insert(&node->state).second) {
+ // State already examinied with lower penalty.
+ continue;
+ }
+
+ FormatDecision last_format = node->state.next_token->decision;
+ if (last_format == FD_UNFORMATTED || last_format == FD_CONTINUE)
+ AddNextStateToQueue(penalty, node, false, &count, &queue);
+ if (last_format == FD_UNFORMATTED || last_format == FD_BREAK)
+ AddNextStateToQueue(penalty, node, true, &count, &queue);
+ }
+
+ if (queue.empty()) {
+ fprintf(stderr, "Could not find a solution.\n");
+ return 0;
+ }
+
+ if (!dry_run) {
+ fprintf(stderr, "todo; ReconstructPath\n");
+ }
+
+ fprintf(stderr, "Total number of analyzed states: %d\n---\n", count);
+
+ return penalty;
+}
+
+void AnnotatedLineFormatter::AddNextStateToQueue(int penalty,
+ StateNode* previous_node,
+ bool newline,
+ int* count,
+ QueueType* queue) {
+ StateNode* node =
+ new StateNode(previous_node->state, newline, previous_node);
+ if (!FormatChildren(&node->state, newline, true, &penalty))
+ return;
+
+ penalty = indenter_->AddTokenToState(&node->state, newline, true, 0);
+ queue->push(QueueItem(OrderedPenalty(penalty, *count), node));
+ ++(*count);
+}
+
+bool AnnotatedLineFormatter::FormatChildren(LineState* state,
+ bool newline,
+ bool dry_run,
+ int* penalty) {
+ FormatToken* previous = state->next_token->previous;
+ const FormatToken* lbrace = state->next_token->GetPreviousNonComment();
+ if (!lbrace || previous->children.size() == 0 /* TODO: stuff */)
+ return true;
+
+ if (newline) {
+ int additional_indent =
+ state->first_indent - state->line->level * kIndentSize;
+ penalty +=
+ FormatLines(previous->children, dry_run, additional_indent, true);
+ return true;
+ }
+
+ // Can't merge multiple statements into a single line.
+ if (previous->children.size() > 1)
+ return false;
+
+ // Can't merge into one line if this line ends on a comment.
+ //if (previous.is(COMMENT))
+ //return false;
+
+ // TODO: more
+
+ penalty +=
+ FormatLine(*previous->children[0], state->column + 1, dry_run);
+ state->column += 1 + previous->children[0]->last->total_length;
+ return true;
+}
+
+// -----------------------------------------------------------------------------
+void DoFormat(const ParseNode* root, bool dump_tree, std::string* output) {
+ *output = std::string();
+}
+
+std::string ReadStdin() {
+ static const int kBufferSize = 256;
+ char buffer[kBufferSize];
+ std::string result;
+ while (true) {
+ char* input = NULL;
+ input = fgets(buffer, kBufferSize, stdin);
+ if (input == NULL && feof(stdin))
+ return result;
+ int length = static_cast<int>(strlen(buffer));
+ if (length == 0)
+ return result;
+ else
+ result += std::string(buffer, length);
+ }
+}
+
+
+
+
+bool FormatFileToString(Setup* setup,
+ const SourceFile& file,
+ bool dump_tree,
+ std::string* output) {
+ Err err;
+ const ParseNode* parse_node =
+ setup->scheduler().input_file_manager()->SyncLoadFile(
+ LocationRange(), &setup->build_settings(), file, &err);
+ if (err.has_error()) {
+ err.PrintToStdout();
+ return false;
+ }
+ DoFormat(parse_node, dump_tree, output);
+ return true;
+}
+
+bool FormatStringToString(const std::string& input,
+ bool dump_tree,
+ std::string* output) {
+ SourceFile source_file;
+ InputFile file(source_file);
+ file.SetContents(input);
+ Err err;
+ // Tokenize.
+ std::vector<Token> tokens = Tokenizer::Tokenize(&file, &err);
+ if (err.has_error()) {
+ err.PrintToStdout();
+ return false;
+ }
+
+ // Parse.
+ scoped_ptr<ParseNode> parse_node = Parser::Parse(tokens, &err);
+ if (err.has_error()) {
+ err.PrintToStdout();
+ return false;
+ }
+
+ DoFormat(parse_node.get(), dump_tree, output);
+ return true;
+}
+
+int RunFormat(const std::vector<std::string>& args) {
+ bool dump_tree =
+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchDumpTree);
+
+ bool from_stdin =
+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchStdin);
+
+ if (from_stdin) {
+ if (args.size() != 0) {
+ Err(Location(), "Expecting no arguments when reading from stdin.\n")
+ .PrintToStdout();
+ return 1;
+ }
+ std::string input = ReadStdin();
+ std::string output;
+ if (!FormatStringToString(input, dump_tree, &output))
+ return 1;
+ printf("%s", output.c_str());
+ return 0;
+ }
+
+ // TODO(scottmg): Eventually, this should be a list/spec of files, and they
+ // should all be done in parallel.
+ if (args.size() != 1) {
+ Err(Location(), "Expecting exactly one argument, see `gn help format`.\n")
+ .PrintToStdout();
+ return 1;
+ }
+
+ Setup setup;
+ SourceDir source_dir =
+ SourceDirForCurrentDirectory(setup.build_settings().root_path());
+ SourceFile file = source_dir.ResolveRelativeFile(args[0],
+ setup.build_settings().root_path_utf8());
+
+ std::string output_string;
+ if (FormatFileToString(&setup, file, dump_tree, &output_string)) {
+ bool in_place =
+ base::CommandLine::ForCurrentProcess()->HasSwitch(kSwitchInPlace);
+ if (in_place) {
+ base::FilePath to_write = setup.build_settings().GetFullPath(file);
+ if (base::WriteFile(to_write,
+ output_string.data(),
+ static_cast<int>(output_string.size())) == -1) {
+ Err(Location(),
+ std::string("Failed to write formatted output back to \"") +
+ to_write.AsUTF8Unsafe() + std::string("\".")).PrintToStdout();
+ return 1;
+ }
+ printf("Wrote formatted to '%s'.\n", to_write.AsUTF8Unsafe().c_str());
+ } else {
+ printf("%s", output_string.c_str());
+ }
+ }
+
+ return 0;
+}
+
+} // namespace commands
« no previous file with comments | « no previous file | tools/gn/gn.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698