src/scanner.cc - Issue 196133017: Experimental parser: merge r19949

Side by Side Diff: src/scanner.cc

Issue 196133017: Experimental parser: merge r19949 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 17 matching lines...) Expand all Loading...
28 // Features shared by parsing and pre-parsing scanners.	28 // Features shared by parsing and pre-parsing scanners.

29	29

30 #include <cmath>	30 #include <cmath>

31	31

32 #include "scanner.h"	32 #include "scanner.h"

33	33

34 #include "../include/v8stdint.h"	34 #include "../include/v8stdint.h"

35 #include "char-predicates-inl.h"	35 #include "char-predicates-inl.h"

36 #include "conversions-inl.h"	36 #include "conversions-inl.h"

37 #include "list-inl.h"	37 #include "list-inl.h"

	38 #include "v8.h"

	39 #include "parser.h"

38 #include "lexer/lexer.h"	40 #include "lexer/lexer.h"

39	41

40 namespace v8 {	42 namespace v8 {

41 namespace internal {	43 namespace internal {

42	44

43	45

44 #ifndef V8_USE_GENERATED_LEXER	46 #ifndef V8_USE_GENERATED_LEXER

45 // ----------------------------------------------------------------------------	47 // ----------------------------------------------------------------------------

46 // Scanner	48 // Scanner

47	49

(...skipping 855 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
903 KEYWORD_GROUP('v') \	905 KEYWORD_GROUP('v') \

904 KEYWORD("var", Token::VAR) \	906 KEYWORD("var", Token::VAR) \

905 KEYWORD("void", Token::VOID) \	907 KEYWORD("void", Token::VOID) \

906 KEYWORD_GROUP('w') \	908 KEYWORD_GROUP('w') \

907 KEYWORD("while", Token::WHILE) \	909 KEYWORD("while", Token::WHILE) \

908 KEYWORD("with", Token::WITH) \	910 KEYWORD("with", Token::WITH) \

909 KEYWORD_GROUP('y') \	911 KEYWORD_GROUP('y') \

910 KEYWORD("yield", Token::YIELD)	912 KEYWORD("yield", Token::YIELD)

911	913

912	914

913 static Token::Value KeywordOrIdentifierToken(const char* input,	915 static Token::Value KeywordOrIdentifierToken(const uint8_t* input,

914 int input_length,	916 int input_length,

915 bool harmony_scoping,	917 bool harmony_scoping,

916 bool harmony_modules) {	918 bool harmony_modules) {

917 ASSERT(input_length >= 1);	919 ASSERT(input_length >= 1);

918 const int kMinLength = 2;	920 const int kMinLength = 2;

919 const int kMaxLength = 10;	921 const int kMaxLength = 10;

920 if (input_length < kMinLength \|\| input_length > kMaxLength) {	922 if (input_length < kMinLength \|\| input_length > kMaxLength) {

921 return Token::IDENTIFIER;	923 return Token::IDENTIFIER;

922 }	924 }

923 switch (input[0]) {	925 switch (input[0]) {

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
978 Advance();	980 Advance();

979 AddLiteralChar(next_char);	981 AddLiteralChar(next_char);

980 continue;	982 continue;

981 }	983 }

982 // Fallthrough if no longer able to complete keyword.	984 // Fallthrough if no longer able to complete keyword.

983 return ScanIdentifierSuffix(&literal);	985 return ScanIdentifierSuffix(&literal);

984 }	986 }

985	987

986 literal.Complete();	988 literal.Complete();

987	989

988 if (next_.literal_chars->is_ascii()) {	990 if (next_.literal_chars->is_one_byte()) {

989 Vector<const char> chars = next_.literal_chars->ascii_literal();	991 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

990 return KeywordOrIdentifierToken(chars.start(),	992 return KeywordOrIdentifierToken(chars.start(),

991 chars.length(),	993 chars.length(),

992 harmony_scoping_,	994 harmony_scoping_,

993 harmony_modules_);	995 harmony_modules_);

994 }	996 }

995	997

996 return Token::IDENTIFIER;	998 return Token::IDENTIFIER;

997 }	999 }

998	1000

999	1001

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1110 Advance();	1112 Advance();

1111 }	1113 }

1112 }	1114 }

1113 literal.Complete();	1115 literal.Complete();

1114	1116

1115 next_.location.end_pos = source_pos() - 1;	1117 next_.location.end_pos = source_pos() - 1;

1116 return true;	1118 return true;

1117 }	1119 }

1118	1120

1119	1121

	1122 Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,

	1123 PretenureFlag tenured) {

	1124 if (is_next_literal_one_byte()) {

	1125 return isolate->factory()->NewStringFromOneByte(

	1126 Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured);

	1127 } else {

	1128 return isolate->factory()->NewStringFromTwoByte(

	1129 next_literal_two_byte_string(), tenured);

	1130 }

	1131 }

	1132

	1133

	1134 Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {

	1135 if (is_literal_one_byte()) {

	1136 return isolate->factory()->InternalizeOneByteString(

	1137 literal_one_byte_string());

	1138 } else {

	1139 return isolate->factory()->InternalizeTwoByteString(

	1140 literal_two_byte_string());

	1141 }

	1142 }

	1143

	1144

	1145 double Scanner::DoubleValue() {

	1146 ASSERT(is_literal_one_byte());

	1147 return StringToDouble(

	1148 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()),

	1149 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY);

	1150 }

	1151

	1152

	1153 int Scanner::FindNumber(DuplicateFinder* finder, int value) {

	1154 return finder->AddNumber(literal_one_byte_string(), value);

	1155 }

	1156

	1157

	1158 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {

	1159 if (is_literal_one_byte()) {

	1160 return finder->AddOneByteSymbol(literal_one_byte_string(), value);

	1161 }

	1162 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);

	1163 }

	1164

	1165

	1166 void Scanner::LogSymbol(ParserRecorder* log, int position) {

	1167 if (is_literal_one_byte()) {

	1168 log->LogOneByteSymbol(position, literal_one_byte_string());

	1169 } else {

	1170 log->LogTwoByteSymbol(position, literal_two_byte_string());

	1171 }

	1172 }

	1173

	1174

1120 #endif	1175 #endif

1121	1176

1122	1177

1123 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {	1178 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {

1124 return AddSymbol(Vector<const byte>::cast(key), true, value);	1179 return AddSymbol(key, true, value);

1125 }	1180 }

1126	1181

1127	1182

1128 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {	1183 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {

1129 return AddSymbol(Vector<const byte>::cast(key), false, value);	1184 return AddSymbol(Vector<const uint8_t>::cast(key), false, value);

1130 }	1185 }

1131	1186

1132	1187

1133 int DuplicateFinder::AddSymbol(Vector<const byte> key,	1188 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,

1134 bool is_ascii,	1189 bool is_one_byte,

1135 int value) {	1190 int value) {

1136 uint32_t hash = Hash(key, is_ascii);	1191 uint32_t hash = Hash(key, is_one_byte);

1137 byte* encoding = BackupKey(key, is_ascii);	1192 byte* encoding = BackupKey(key, is_one_byte);

1138 HashMap::Entry* entry = map_.Lookup(encoding, hash, true);	1193 HashMap::Entry* entry = map_.Lookup(encoding, hash, true);

1139 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));	1194 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));

1140 entry->value =	1195 entry->value =

1141 reinterpret_cast<void*>(static_cast<intptr_t>(value \| old_value));	1196 reinterpret_cast<void*>(static_cast<intptr_t>(value \| old_value));

1142 return old_value;	1197 return old_value;

1143 }	1198 }

1144	1199

1145	1200

1146 int DuplicateFinder::AddNumber(Vector<const char> key, int value) {	1201 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {

1147 ASSERT(key.length() > 0);	1202 ASSERT(key.length() > 0);

1148 // Quick check for already being in canonical form.	1203 // Quick check for already being in canonical form.

1149 if (IsNumberCanonical(key)) {	1204 if (IsNumberCanonical(key)) {

1150 return AddAsciiSymbol(key, value);	1205 return AddOneByteSymbol(key, value);

1151 }	1206 }

1152	1207

1153 int flags = ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY;	1208 int flags = ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY;

1154 double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);	1209 double double_value = StringToDouble(

	1210 unicode_constants_, Vector<const char>::cast(key), flags, 0.0);

1155 int length;	1211 int length;

1156 const char* string;	1212 const char* string;

1157 if (!std::isfinite(double_value)) {	1213 if (!std::isfinite(double_value)) {

1158 string = "Infinity";	1214 string = "Infinity";

1159 length = 8; // strlen("Infinity");	1215 length = 8; // strlen("Infinity");

1160 } else {	1216 } else {

1161 string = DoubleToCString(double_value,	1217 string = DoubleToCString(double_value,

1162 Vector<char>(number_buffer_, kBufferSize));	1218 Vector<char>(number_buffer_, kBufferSize));

1163 length = StrLength(string);	1219 length = StrLength(string);

1164 }	1220 }

1165 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),	1221 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),

1166 length), true, value);	1222 length), true, value);

1167 }	1223 }

1168	1224

1169	1225

1170 bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {	1226 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {

1171 // Test for a safe approximation of number literals that are already	1227 // Test for a safe approximation of number literals that are already

1172 // in canonical form: max 15 digits, no leading zeroes, except an	1228 // in canonical form: max 15 digits, no leading zeroes, except an

1173 // integer part that is a single zero, and no trailing zeros below	1229 // integer part that is a single zero, and no trailing zeros below

1174 // the decimal point.	1230 // the decimal point.

1175 int pos = 0;	1231 int pos = 0;

1176 int length = number.length();	1232 int length = number.length();

1177 if (number.length() > 15) return false;	1233 if (number.length() > 15) return false;

1178 if (number[pos] == '0') {	1234 if (number[pos] == '0') {

1179 pos++;	1235 pos++;

1180 } else {	1236 } else {

1181 while (pos < length &&	1237 while (pos < length &&

1182 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;	1238 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;

1183 }	1239 }

1184 if (length == pos) return true;	1240 if (length == pos) return true;

1185 if (number[pos] != '.') return false;	1241 if (number[pos] != '.') return false;

1186 pos++;	1242 pos++;

1187 bool invalid_last_digit = true;	1243 bool invalid_last_digit = true;

1188 while (pos < length) {	1244 while (pos < length) {

1189 byte digit = number[pos] - '0';	1245 uint8_t digit = number[pos] - '0';

1190 if (digit > '9' - '0') return false;	1246 if (digit > '9' - '0') return false;

1191 invalid_last_digit = (digit == 0);	1247 invalid_last_digit = (digit == 0);

1192 pos++;	1248 pos++;

1193 }	1249 }

1194 return !invalid_last_digit;	1250 return !invalid_last_digit;

1195 }	1251 }

1196	1252

1197	1253

1198 uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_ascii) {	1254 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {

1199 // Primitive hash function, almost identical to the one used	1255 // Primitive hash function, almost identical to the one used

1200 // for strings (except that it's seeded by the length and ASCII-ness).	1256 // for strings (except that it's seeded by the length and ASCII-ness).

1201 int length = key.length();	1257 int length = key.length();

1202 uint32_t hash = (length << 1) \| (is_ascii ? 1 : 0) ;	1258 uint32_t hash = (length << 1) \| (is_one_byte ? 1 : 0) ;

1203 for (int i = 0; i < length; i++) {	1259 for (int i = 0; i < length; i++) {

1204 uint32_t c = key[i];	1260 uint32_t c = key[i];

1205 hash = (hash + c) * 1025;	1261 hash = (hash + c) * 1025;

1206 hash ^= (hash >> 6);	1262 hash ^= (hash >> 6);

1207 }	1263 }

1208 return hash;	1264 return hash;

1209 }	1265 }

1210	1266

1211	1267

1212 bool DuplicateFinder::Match(void* first, void* second) {	1268 bool DuplicateFinder::Match(void* first, void* second) {

1213 // Decode lengths.	1269 // Decode lengths.

1214 // Length + ASCII-bit is encoded as base 128, most significant heptet first,	1270 // Length + ASCII-bit is encoded as base 128, most significant heptet first,

1215 // with a 8th bit being non-zero while there are more heptets.	1271 // with a 8th bit being non-zero while there are more heptets.

1216 // The value encodes the number of bytes following, and whether the original	1272 // The value encodes the number of bytes following, and whether the original

1217 // was ASCII.	1273 // was ASCII.

1218 byte* s1 = reinterpret_cast<byte*>(first);	1274 byte* s1 = reinterpret_cast<byte*>(first);

1219 byte* s2 = reinterpret_cast<byte*>(second);	1275 byte* s2 = reinterpret_cast<byte*>(second);

1220 uint32_t length_ascii_field = 0;	1276 uint32_t length_one_byte_field = 0;

1221 byte c1;	1277 byte c1;

1222 do {	1278 do {

1223 c1 = *s1;	1279 c1 = *s1;

1224 if (c1 != *s2) return false;	1280 if (c1 != *s2) return false;

1225 length_ascii_field = (length_ascii_field << 7) \| (c1 & 0x7f);	1281 length_one_byte_field = (length_one_byte_field << 7) \| (c1 & 0x7f);

1226 s1++;	1282 s1++;

1227 s2++;	1283 s2++;

1228 } while ((c1 & 0x80) != 0);	1284 } while ((c1 & 0x80) != 0);

1229 int length = static_cast<int>(length_ascii_field >> 1);	1285 int length = static_cast<int>(length_one_byte_field >> 1);

1230 return memcmp(s1, s2, length) == 0;	1286 return memcmp(s1, s2, length) == 0;

1231 }	1287 }

1232	1288

1233	1289

1234 byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,	1290 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,

1235 bool is_ascii) {	1291 bool is_one_byte) {

1236 uint32_t ascii_length = (bytes.length() << 1) \| (is_ascii ? 1 : 0);	1292 uint32_t one_byte_length = (bytes.length() << 1) \| (is_one_byte ? 1 : 0);

1237 backing_store_.StartSequence();	1293 backing_store_.StartSequence();

1238 // Emit ascii_length as base-128 encoded number, with the 7th bit set	1294 // Emit one_byte_length as base-128 encoded number, with the 7th bit set

1239 // on the byte of every heptet except the last, least significant, one.	1295 // on the byte of every heptet except the last, least significant, one.

1240 if (ascii_length >= (1 << 7)) {	1296 if (one_byte_length >= (1 << 7)) {

1241 if (ascii_length >= (1 << 14)) {	1297 if (one_byte_length >= (1 << 14)) {

1242 if (ascii_length >= (1 << 21)) {	1298 if (one_byte_length >= (1 << 21)) {

1243 if (ascii_length >= (1 << 28)) {	1299 if (one_byte_length >= (1 << 28)) {

1244 backing_store_.Add(static_cast<byte>((ascii_length >> 28) \| 0x80));	1300 backing_store_.Add(

	1301 static_cast<uint8_t>((one_byte_length >> 28) \| 0x80));

1245 }	1302 }

1246 backing_store_.Add(static_cast<byte>((ascii_length >> 21) \| 0x80u));	1303 backing_store_.Add(

	1304 static_cast<uint8_t>((one_byte_length >> 21) \| 0x80u));

1247 }	1305 }

1248 backing_store_.Add(static_cast<byte>((ascii_length >> 14) \| 0x80u));	1306 backing_store_.Add(

	1307 static_cast<uint8_t>((one_byte_length >> 14) \| 0x80u));

1249 }	1308 }

1250 backing_store_.Add(static_cast<byte>((ascii_length >> 7) \| 0x80u));	1309 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1251 }	1310 }

1252 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));	1311 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1253	1312

1254 backing_store_.AddBlock(bytes);	1313 backing_store_.AddBlock(bytes);

1255 return backing_store_.EndSequence().start();	1314 return backing_store_.EndSequence().start();

1256 }	1315 }

1257	1316

1258 } } // namespace v8::internal	1317 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/scopeinfo.cc » ('j') | no next file with comments »