src/scanner.cc - Issue 14890: Reverting the BOM changes due to security concerns.

Side by Side Diff: src/scanner.cc

Issue 14890: Reverting the BOM changes due to security concerns. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 12 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 101 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
112 }	112 }

113	113

114	114

115 void UTF16Buffer::PushBack(uc32 ch) {	115 void UTF16Buffer::PushBack(uc32 ch) {

116 pushback_buffer()->Add(last_);	116 pushback_buffer()->Add(last_);

117 last_ = ch;	117 last_ = ch;

118 pos_--;	118 pos_--;

119 }	119 }

120	120

121	121

122 static inline bool IsByteOrderMark(uc32 c) {

123 // The Unicode value U+FFFE is guaranteed never to be assigned as a

124 // Unicode character; this implies that in a Unicode context the

125 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

126 // character expressed in little-endian byte order (since it could

127 // not be a U+FFFE character expressed in big-endian byte

128 // order). Nevertheless, we check for it to be compatible with

129 // Spidermonkey.

130 return c == 0xFEFF \|\| c == 0xFFFE;

131 }

132

133

134 uc32 UTF16Buffer::Advance() {	122 uc32 UTF16Buffer::Advance() {

135 // NOTE: It is of importance to Persian / Farsi resources that we do	123 // NOTE: It is of importance to Persian / Farsi resources that we do

136 // not strip format control characters in the scanner; see	124 // not strip format control characters in the scanner; see

137 //	125 //

138 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152	126 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152

139 //	127 //

140 // So, even though ECMA-262, section 7.1, page 11, dictates that we	128 // So, even though ECMA-262, section 7.1, page 11, dictates that we

141 // must remove Unicode format-control characters, we only remove the BOM.	129 // must remove Unicode format-control characters, we do not. This is

142 // This is in line with how Safari handles it.	130 // in line with how IE and SpiderMonkey handles it.

143 if (!pushback_buffer()->is_empty()) {	131 if (!pushback_buffer()->is_empty()) {

144 pos_++;	132 pos_++;

145 return last_ = pushback_buffer()->RemoveLast();	133 return last_ = pushback_buffer()->RemoveLast();

	134 } else if (stream_->has_more()) {

	135 pos_++;

	136 uc32 next = stream_->GetNext();

	137 return last_ = next;

146 } else {	138 } else {

147 while (stream_->has_more()) {

148 pos_++;

149 uc32 next = stream_->GetNext();

150 if (!IsByteOrderMark(next)) return last_ = next;

151 }

152 // note: currently the following increment is necessary to avoid a	139 // note: currently the following increment is necessary to avoid a

153 // test-parser problem!	140 // test-parser problem!

154 pos_++;	141 pos_++;

155 return last_ = static_cast<uc32>(-1);	142 return last_ = static_cast<uc32>(-1);

156 }	143 }

157 }	144 }

158	145

159	146

160 void UTF16Buffer::SeekForward(int pos) {	147 void UTF16Buffer::SeekForward(int pos) {

161 pos_ = pos;	148 pos_ = pos;

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
240 c0_ = source_.Advance();	227 c0_ = source_.Advance();

241 }	228 }

242	229

243	230

244 void Scanner::PushBack(uc32 ch) {	231 void Scanner::PushBack(uc32 ch) {

245 source_.PushBack(ch);	232 source_.PushBack(ch);

246 c0_ = ch;	233 c0_ = ch;

247 }	234 }

248	235

249	236

	237 static inline bool IsByteOrderMark(uc32 c) {

	238 // The Unicode value U+FFFE is guaranteed never to be assigned as a

	239 // Unicode character; this implies that in a Unicode context the

	240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

	241 // character expressed in little-endian byte order (since it could

	242 // not be a U+FFFE character expressed in big-endian byte

	243 // order). Nevertheless, we check for it to be compatible with

	244 // Spidermonkey.

	245 return c == 0xFEFF \|\| c == 0xFFFE;

	246 }

	247

	248

250 void Scanner::SkipWhiteSpace(bool initial) {	249 void Scanner::SkipWhiteSpace(bool initial) {

251 has_line_terminator_before_next_ = initial;	250 has_line_terminator_before_next_ = initial;

252	251

253 while (true) {	252 while (true) {

254 while (kIsWhiteSpace.get(c0_)) {	253 // We treat byte-order marks (BOMs) as whitespace for better

	254 // compatibility with Spidermonkey and other JavaScript engines.

	255 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

255 // IsWhiteSpace() includes line terminators!	256 // IsWhiteSpace() includes line terminators!

256 if (kIsLineTerminator.get(c0_))	257 if (kIsLineTerminator.get(c0_))

257 // Ignore line terminators, but remember them. This is necessary	258 // Ignore line terminators, but remember them. This is necessary

258 // for automatic semicolon insertion.	259 // for automatic semicolon insertion.

259 has_line_terminator_before_next_ = true;	260 has_line_terminator_before_next_ = true;

260 Advance();	261 Advance();

261 }	262 }

262	263

263 // If there is an HTML comment end '-->' at the beginning of a	264 // If there is an HTML comment end '-->' at the beginning of a

264 // line (with only whitespace in front of it), we treat the rest	265 // line (with only whitespace in front of it), we treat the rest

(...skipping 565 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
830 StartLiteral();	831 StartLiteral();

831 while (kIsIdentifierPart.get(c0_))	832 while (kIsIdentifierPart.get(c0_))

832 AddCharAdvance();	833 AddCharAdvance();

833 TerminateLiteral();	834 TerminateLiteral();

834	835

835 next_.location.end_pos = source_pos() - 1;	836 next_.location.end_pos = source_pos() - 1;

836 return true;	837 return true;

837 }	838 }

838	839

839 } } // namespace v8::internal	840 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | test/mjsunit/bom.js » ('j') | no next file with comments »