Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(463)

Side by Side Diff: src/objects.cc

Issue 1137683003: Only record one in n line endings to save space. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix false detection of exotic newlines Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/objects.h ('k') | src/rewriter.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <iomanip> 5 #include <iomanip>
6 #include <sstream> 6 #include <sstream>
7 7
8 #include "src/v8.h" 8 #include "src/v8.h"
9 9
10 #include "src/accessors.h" 10 #include "src/accessors.h"
(...skipping 8883 matching lines...) Expand 10 before | Expand all | Expand 10 after
8894 } 8894 }
8895 8895
8896 8896
8897 8897
8898 template <typename SourceChar> 8898 template <typename SourceChar>
8899 static void CalculateLineEndsImpl(Isolate* isolate, 8899 static void CalculateLineEndsImpl(Isolate* isolate,
8900 List<int>* line_ends, 8900 List<int>* line_ends,
8901 Vector<const SourceChar> src, 8901 Vector<const SourceChar> src,
8902 bool include_ending_line) { 8902 bool include_ending_line) {
8903 const int src_len = src.length(); 8903 const int src_len = src.length();
8904 bool exotic_newlines = false;
8905 if (include_ending_line) {
8906 // Initally assume reduction is 1, ie all line endings are in the array.
8907 DCHECK_EQ(line_ends->length(), Script::kReductionIndex);
8908 line_ends->Add(1);
8909 // Write a placeholder for the number-of-lines indicator.
8910 DCHECK_EQ(line_ends->length(), Script::kNumberOfLinesIndex);
8911 line_ends->Add(0);
8912 DCHECK_EQ(line_ends->length(), Script::kFirstLineEndIndex);
8913 // There's a fictional newline just before the first character. This
8914 // simplifies a lot of things.
8915 line_ends->Add(-1);
8916 }
8904 UnicodeCache* cache = isolate->unicode_cache(); 8917 UnicodeCache* cache = isolate->unicode_cache();
8905 for (int i = 0; i < src_len - 1; i++) { 8918 for (int i = 0; i < src_len - 1; i++) {
8906 SourceChar current = src[i]; 8919 SourceChar current = src[i];
8907 SourceChar next = src[i + 1]; 8920 SourceChar next = src[i + 1];
8908 if (cache->IsLineTerminatorSequence(current, next)) line_ends->Add(i); 8921 if (cache->IsLineTerminatorSequence(current, next)) {
8922 if (current != '\n' && current != '\r') exotic_newlines = true;
8923 line_ends->Add(i);
8924 }
8909 } 8925 }
8910 8926
8911 if (src_len > 0 && cache->IsLineTerminatorSequence(src[src_len - 1], 0)) { 8927 int last_posn = src_len - 1;
8912 line_ends->Add(src_len - 1); 8928 if (last_posn >= 0 && cache->IsLineTerminatorSequence(src[last_posn], 0)) {
8929 if (src[last_posn] != '\n' && src[last_posn] != '\r')
8930 exotic_newlines = true;
8931 line_ends->Add(last_posn);
8913 } else if (include_ending_line) { 8932 } else if (include_ending_line) {
8914 // Even if the last line misses a line end, it is counted. 8933 // Even if the last line misses a line end, it is counted. Because we
8915 line_ends->Add(src_len); 8934 // sometimes use character positions that are one beyond the end of the
8935 // source (see Rewriter::Rewrite) we set the newline one beyond that.
8936 // This is used for substr calculations, which trims to string length,
8937 // so it's harmless.
8938 line_ends->Add(last_posn + 1);
8939 }
8940 if (include_ending_line) {
8941 // Update number of lines in script.
8942 int lines = line_ends->length() - (Script::kFirstLineEndIndex + 1);
8943 line_ends->Set(Script::kNumberOfLinesIndex, lines);
8944 // Abuse some flags. The bots will run with a good variety of these flags,
8945 // giving better coverage for the reduction code.
8946 bool always_reduce = FLAG_always_opt;
8947 bool never_reduce = !FLAG_crankshaft;
8948 if (!never_reduce && !exotic_newlines &&
8949 (always_reduce ||
8950 (line_ends->length() > 5 && line_ends->length() * 8 > src_len / 12))) {
8951 // If the line-ends array (8 bytes per entry) is larger than about 8%
8952 // of the source length, then we reduce it to save memory. This won't
8953 // trigger if lines are > 100 characters on average. If it triggers, then
8954 // the goal is for it to take only 3% of the source size.
8955 int reduction =
8956 always_reduce ? 2 : (line_ends->length() * 8 * 33 / src_len);
8957 DCHECK(reduction > 1);
8958 line_ends->Set(Script::kReductionIndex, reduction);
8959 }
8916 } 8960 }
8917 } 8961 }
8918 8962
8919 8963
8920 Handle<FixedArray> String::CalculateLineEnds(Handle<String> src, 8964 Handle<FixedArray> String::CalculateLineEnds(Handle<String> src,
8921 bool include_ending_line) { 8965 bool include_ending_line) {
8922 src = Flatten(src); 8966 src = Flatten(src);
8923 // Rough estimate of line count based on a roughly estimated average 8967 // Rough estimate of line count based on a roughly estimated average
8924 // length of (unpacked) code. 8968 // length of (unpacked) code.
8925 int line_count_estimate = src->length() >> 4; 8969 int line_count_estimate = src->length() >> 4;
8926 List<int> line_ends(line_count_estimate); 8970 List<int> line_ends(line_count_estimate);
8927 Isolate* isolate = src->GetIsolate(); 8971 Isolate* isolate = src->GetIsolate();
8928 { DisallowHeapAllocation no_allocation; // ensure vectors stay valid. 8972 { DisallowHeapAllocation no_allocation; // ensure vectors stay valid.
8929 // Dispatch on type of strings. 8973 // Dispatch on type of strings.
8930 String::FlatContent content = src->GetFlatContent(); 8974 String::FlatContent content = src->GetFlatContent();
8931 DCHECK(content.IsFlat()); 8975 DCHECK(content.IsFlat());
8932 if (content.IsOneByte()) { 8976 if (content.IsOneByte()) {
8933 CalculateLineEndsImpl(isolate, 8977 CalculateLineEndsImpl(isolate,
8934 &line_ends, 8978 &line_ends,
8935 content.ToOneByteVector(), 8979 content.ToOneByteVector(),
8936 include_ending_line); 8980 include_ending_line);
8937 } else { 8981 } else {
8938 CalculateLineEndsImpl(isolate, 8982 CalculateLineEndsImpl(isolate,
8939 &line_ends, 8983 &line_ends,
8940 content.ToUC16Vector(), 8984 content.ToUC16Vector(),
8941 include_ending_line); 8985 include_ending_line);
8942 } 8986 }
8943 } 8987 }
8944 int line_count = line_ends.length(); 8988 if (include_ending_line) {
8945 Handle<FixedArray> array = isolate->factory()->NewFixedArray(line_count); 8989 const int kReductionIndex = Script::kReductionIndex;
8946 for (int i = 0; i < line_count; i++) { 8990 const int kFirstLineEndIndex = Script::kFirstLineEndIndex;
8947 array->set(i, Smi::FromInt(line_ends[i])); 8991 int line_count = line_ends.length() - kFirstLineEndIndex;
8992 int reduction = line_ends[kReductionIndex];
8993 int reduced_lines = (line_count + reduction - 1) / reduction;
8994 Handle<FixedArray> array =
8995 isolate->factory()->NewFixedArray(kFirstLineEndIndex + reduced_lines);
8996 for (int i = 0; i < kFirstLineEndIndex; i++) {
8997 array->set(i, Smi::FromInt(line_ends[i]));
8998 }
8999 int j = kFirstLineEndIndex;
9000 for (int i = 0; i < line_count; i += reduction, ++j) {
9001 array->set(j, Smi::FromInt(line_ends[i + kFirstLineEndIndex]));
9002 }
9003 return array;
9004 } else {
9005 Handle<FixedArray> array =
9006 isolate->factory()->NewFixedArray(line_ends.length());
9007 for (int i = 0; i < line_ends.length(); i++) {
9008 array->set(i, Smi::FromInt(line_ends[i]));
9009 }
9010 return array;
8948 } 9011 }
8949 return array;
8950 } 9012 }
8951 9013
8952 9014
8953 // Compares the contents of two strings by reading and comparing 9015 // Compares the contents of two strings by reading and comparing
8954 // int-sized blocks of characters. 9016 // int-sized blocks of characters.
8955 template <typename Char> 9017 template <typename Char>
8956 static inline bool CompareRawStringContents(const Char* const a, 9018 static inline bool CompareRawStringContents(const Char* const a,
8957 const Char* const b, 9019 const Char* const b,
8958 int length) { 9020 int length) {
8959 return CompareChars(a, b, length) == 0; 9021 return CompareChars(a, b, length) == 0;
(...skipping 1337 matching lines...) Expand 10 before | Expand all | Expand 10 after
10297 10359
10298 if (*array != isolate->heap()->empty_fixed_array()) { 10360 if (*array != isolate->heap()->empty_fixed_array()) {
10299 array->set_map(isolate->heap()->fixed_cow_array_map()); 10361 array->set_map(isolate->heap()->fixed_cow_array_map());
10300 } 10362 }
10301 10363
10302 script->set_line_ends(*array); 10364 script->set_line_ends(*array);
10303 DCHECK(script->line_ends()->IsFixedArray()); 10365 DCHECK(script->line_ends()->IsFixedArray());
10304 } 10366 }
10305 10367
10306 10368
10369 static int CountForwardNNewlines(Handle<Script> script, int block_position,
10370 int n) {
10371 int position = block_position;
10372 Handle<Object> source_object(script->source(), script->GetIsolate());
10373 if (!source_object->IsString() || n == 0) return position;
10374 Handle<String> source(Handle<String>::cast(source_object));
10375 int length = source->length();
10376 for (int i = position; i < length; i++) {
10377 uc16 current = source->Get(i);
10378 if (current == '\r') {
10379 n--;
10380 if (i + 1 < length && source->Get(i + 1) == '\n') i++;
10381 } else if (current == '\n') {
10382 n--;
10383 }
10384 if (n == 0) return i + 1;
10385 }
10386 if (n == 1 && length > 0) {
10387 uc16 last = source->Get(length - 1);
10388 if (last != '\n' && last != '\r') return length;
10389 }
10390 return -1;
10391 }
10392
10393
10307 int Script::GetColumnNumber(Handle<Script> script, int code_pos) { 10394 int Script::GetColumnNumber(Handle<Script> script, int code_pos) {
10395 // Get zero-based line number.
10308 int line_number = GetLineNumber(script, code_pos); 10396 int line_number = GetLineNumber(script, code_pos);
10309 if (line_number == -1) return -1; 10397 if (line_number == -1) return -1;
10310 10398
10311 DisallowHeapAllocation no_allocation; 10399 DisallowHeapAllocation no_allocation;
10312 FixedArray* line_ends_array = FixedArray::cast(script->line_ends()); 10400 FixedArray* line_ends_array = FixedArray::cast(script->line_ends());
10313 line_number = line_number - script->line_offset()->value(); 10401 line_number = line_number - script->line_offset()->value();
10314 if (line_number == 0) return code_pos + script->column_offset()->value(); 10402 int reduction = Smi::cast(line_ends_array->get(kReductionIndex))->value();
10315 int prev_line_end_pos = 10403
10316 Smi::cast(line_ends_array->get(line_number - 1))->value(); 10404 int line_block_position =
10317 return code_pos - (prev_line_end_pos + 1); 10405 Smi::cast(line_ends_array->get(line_number / reduction +
10406 kFirstLineEndIndex))->value() +
10407 1;
10408
10409 int line_position = CountForwardNNewlines(script, line_block_position,
10410 line_number % reduction);
10411 if (line_number == 0) line_position = -script->column_offset()->value();
10412 return code_pos - line_position;
10318 } 10413 }
10319 10414
10320 10415
10416 // Zero-based line number, calculated from UTF16 character position.
10321 int Script::GetLineNumberWithArray(int code_pos) { 10417 int Script::GetLineNumberWithArray(int code_pos) {
10322 DisallowHeapAllocation no_allocation; 10418 DisallowHeapAllocation no_allocation;
10323 DCHECK(line_ends()->IsFixedArray()); 10419 DCHECK(line_ends()->IsFixedArray());
10324 FixedArray* line_ends_array = FixedArray::cast(line_ends()); 10420 FixedArray* line_ends_array = FixedArray::cast(line_ends());
10325 int line_ends_len = line_ends_array->length(); 10421 int line_ends_len = line_ends_array->length();
10326 if (line_ends_len == 0) return -1; 10422 if (line_ends_len == 0) return -1; // This happens if there is no source.
10423 // There's always at least one line ending: A fictional newline just before
10424 // the start.
10425 DCHECK_GE(line_ends_len, kFirstLineEndIndex + 1);
10426 int lower = kFirstLineEndIndex;
10427 int upper = line_ends_len - 1;
10327 10428
10328 if ((Smi::cast(line_ends_array->get(0)))->value() >= code_pos) { 10429 if (code_pos < 0) return -1;
10329 return line_offset()->value(); 10430 int index = 0;
10431
10432 if (code_pos > Smi::cast(line_ends_array->get(upper))->value()) {
10433 index = upper;
10434 } else {
10435 while (lower + 1 < upper) {
10436 DCHECK_LE(Smi::cast(line_ends_array->get(lower))->value(), code_pos);
10437 DCHECK_LE(code_pos, Smi::cast(line_ends_array->get(upper))->value());
10438 int i = (lower + upper) >> 1;
10439 DCHECK(lower != i && upper != i);
10440 if ((Smi::cast(line_ends_array->get(i)))->value() >= code_pos) {
10441 upper = i;
10442 } else {
10443 lower = i;
10444 }
10445 }
10446 index = lower;
10330 } 10447 }
10331 10448
10332 int left = 0; 10449 int reduction = Smi::cast(line_ends_array->get(kReductionIndex))->value();
10333 int right = line_ends_len; 10450 int line_number = (index - kFirstLineEndIndex) * reduction;
10334 while (int half = (right - left) / 2) { 10451
10335 if ((Smi::cast(line_ends_array->get(left + half)))->value() > code_pos) { 10452 // We only saved an nth of the line ends in the array, because there were so
10336 right -= half; 10453 // many.
10337 } else { 10454 int start_of_earlier_line =
10338 left += half; 10455 Smi::cast(line_ends_array->get(index))->value() + 1;
10456
10457 if (reduction == 1 || !source()->IsString()) {
10458 return line_number + line_offset()->value();
10459 }
10460 String* src = String::cast(source());
10461 // This '>' would normally be a '>=', but due to {}-less 'with' statements in
10462 // top-level code we sometimes encounter code positions that are one character
10463 // after the end of the source. See comment in Rewriter::Rewrite.
10464 if (code_pos > src->length()) return -1;
10465 for (int i = start_of_earlier_line; i < src->length() && i < code_pos; i++) {
10466 uc16 current = src->Get(i);
10467 if (current == '\r') {
10468 if (i < code_pos - 1 && i < src->length() - 1 && src->Get(i + 1) == '\n')
10469 i++;
10470 line_number++;
10471 } else if (current == '\n') {
10472 line_number++;
10339 } 10473 }
10340 } 10474 }
10341 return right + line_offset()->value(); 10475 return line_number + line_offset()->value();
10342 } 10476 }
10343 10477
10344 10478
10345 int Script::GetLineNumber(Handle<Script> script, int code_pos) { 10479 int Script::GetLineNumber(Handle<Script> script, int code_pos) {
10346 InitLineEnds(script); 10480 InitLineEnds(script);
10347 return script->GetLineNumberWithArray(code_pos); 10481 return script->GetLineNumberWithArray(code_pos);
10348 } 10482 }
10349 10483
10350 10484
10351 int Script::GetLineNumber(int code_pos) { 10485 int Script::GetLineNumber(int code_pos) {
(...skipping 6616 matching lines...) Expand 10 before | Expand all | Expand 10 after
16968 Handle<Object> new_value) { 17102 Handle<Object> new_value) {
16969 if (cell->value() != *new_value) { 17103 if (cell->value() != *new_value) {
16970 cell->set_value(*new_value); 17104 cell->set_value(*new_value);
16971 Isolate* isolate = cell->GetIsolate(); 17105 Isolate* isolate = cell->GetIsolate();
16972 cell->dependent_code()->DeoptimizeDependentCodeGroup( 17106 cell->dependent_code()->DeoptimizeDependentCodeGroup(
16973 isolate, DependentCode::kPropertyCellChangedGroup); 17107 isolate, DependentCode::kPropertyCellChangedGroup);
16974 } 17108 }
16975 } 17109 }
16976 } // namespace internal 17110 } // namespace internal
16977 } // namespace v8 17111 } // namespace v8
OLDNEW
« no previous file with comments | « src/objects.h ('k') | src/rewriter.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698