Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/jsregexp.cc

Issue 1253008: Fix bug in RegExp first-character-lookahead. (Closed)
Patch Set: Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 4969 matching lines...) Expand 10 before | Expand all | Expand 10 after
4980 break; 4980 break;
4981 } 4981 }
4982 case AT_START: 4982 case AT_START:
4983 case AT_BOUNDARY: 4983 case AT_BOUNDARY:
4984 case AT_NON_BOUNDARY: 4984 case AT_NON_BOUNDARY:
4985 case AFTER_NEWLINE: 4985 case AFTER_NEWLINE:
4986 case AFTER_NONWORD_CHARACTER: 4986 case AFTER_NONWORD_CHARACTER:
4987 case AFTER_WORD_CHARACTER: { 4987 case AFTER_WORD_CHARACTER: {
4988 ASSERT_NOT_NULL(on_success()); 4988 ASSERT_NOT_NULL(on_success());
4989 budget = on_success()->ComputeFirstCharacterSet(budget); 4989 budget = on_success()->ComputeFirstCharacterSet(budget);
4990 set_first_character_set(on_success()->first_character_set()); 4990 if (budget >= 0) {
4991 set_first_character_set(on_success()->first_character_set());
4992 }
4991 break; 4993 break;
4992 } 4994 }
4993 } 4995 }
4994 } 4996 }
4995 return budget; 4997 return budget;
4996 } 4998 }
4997 4999
4998 5000
4999 int ActionNode::ComputeFirstCharacterSet(int budget) { 5001 int ActionNode::ComputeFirstCharacterSet(int budget) {
5000 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail; 5002 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail;
5001 budget--; 5003 budget--;
5002 if (budget >= 0) { 5004 if (budget >= 0) {
5003 ASSERT_NOT_NULL(on_success()); 5005 ASSERT_NOT_NULL(on_success());
5004 budget = on_success()->ComputeFirstCharacterSet(budget); 5006 budget = on_success()->ComputeFirstCharacterSet(budget);
5005 if (budget >= 0) { 5007 if (budget >= 0) {
5006 set_first_character_set(on_success()->first_character_set()); 5008 set_first_character_set(on_success()->first_character_set());
5007 } 5009 }
5008 } 5010 }
5009 return budget; 5011 return budget;
5010 } 5012 }
5011 5013
5012 5014
5013 int BackReferenceNode::ComputeFirstCharacterSet(int budget) { 5015 int BackReferenceNode::ComputeFirstCharacterSet(int budget) {
5014 // We don't know anything about the first character of a backreference 5016 // We don't know anything about the first character of a backreference
5015 // at this point. 5017 // at this point.
5018 // The potential first characters are the first characters of the capture,
5019 // and the first characters of the on_success node, depending on whether the
5020 // capture can be empty and whether it is known to be participating or known
5021 // not to be.
5016 return kComputeFirstCharacterSetFail; 5022 return kComputeFirstCharacterSetFail;
5017 } 5023 }
5018 5024
5019 5025
5020 int TextNode::ComputeFirstCharacterSet(int budget) { 5026 int TextNode::ComputeFirstCharacterSet(int budget) {
5021 budget--; 5027 budget--;
5022 if (budget >= 0) { 5028 if (budget >= 0) {
5023 ASSERT_NE(0, elements()->length()); 5029 ASSERT_NE(0, elements()->length());
5024 TextElement text = elements()->at(0); 5030 TextElement text = elements()->at(0);
5025 if (text.type == TextElement::ATOM) { 5031 if (text.type == TextElement::ATOM) {
5026 RegExpAtom* atom = text.data.u_atom; 5032 RegExpAtom* atom = text.data.u_atom;
5027 ASSERT_NE(0, atom->length()); 5033 ASSERT_NE(0, atom->length());
5028 uc16 first_char = atom->data()[0]; 5034 uc16 first_char = atom->data()[0];
5029 ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1); 5035 ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1);
5030 range->Add(CharacterRange(first_char, first_char)); 5036 range->Add(CharacterRange(first_char, first_char));
5031 set_first_character_set(range); 5037 set_first_character_set(range);
5032 } else { 5038 } else {
5033 ASSERT(text.type == TextElement::CHAR_CLASS); 5039 ASSERT(text.type == TextElement::CHAR_CLASS);
5034 RegExpCharacterClass* char_class = text.data.u_char_class; 5040 RegExpCharacterClass* char_class = text.data.u_char_class;
5041 ZoneList<CharacterRange>* ranges = char_class->ranges();
5042 CharacterRange::Canonicalize(ranges);
5035 if (char_class->is_negated()) { 5043 if (char_class->is_negated()) {
5036 ZoneList<CharacterRange>* ranges = char_class->ranges();
5037 int length = ranges->length(); 5044 int length = ranges->length();
5038 int new_length = length + 1; 5045 int new_length = length + 1;
5039 if (length > 0) { 5046 if (length > 0) {
5040 if (ranges->at(0).from() == 0) new_length--; 5047 if (ranges->at(0).from() == 0) new_length--;
5041 if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) { 5048 if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) {
5042 new_length--; 5049 new_length--;
5043 } 5050 }
5044 } 5051 }
5045 ZoneList<CharacterRange>* negated_ranges = 5052 ZoneList<CharacterRange>* negated_ranges =
5046 new ZoneList<CharacterRange>(new_length); 5053 new ZoneList<CharacterRange>(new_length);
5047 CharacterRange::Negate(ranges, negated_ranges); 5054 CharacterRange::Negate(ranges, negated_ranges);
5048 set_first_character_set(negated_ranges); 5055 set_first_character_set(negated_ranges);
5049 } else { 5056 } else {
5050 set_first_character_set(char_class->ranges()); 5057 // TODO(lrn): Canonicalize ranges when they are created
5058 // instead of waiting until now.
5059 set_first_character_set(ranges);
5051 } 5060 }
5052 } 5061 }
5053 } 5062 }
5054 return budget; 5063 return budget;
5055 } 5064 }
5056 5065
5057 5066
5058 5067
5059 // ------------------------------------------------------------------- 5068 // -------------------------------------------------------------------
5060 // Dispatch table construction 5069 // Dispatch table construction
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
5246 node, 5255 node,
5247 data->capture_count, 5256 data->capture_count,
5248 pattern); 5257 pattern);
5249 } 5258 }
5250 5259
5251 5260
5252 int OffsetsVector::static_offsets_vector_[ 5261 int OffsetsVector::static_offsets_vector_[
5253 OffsetsVector::kStaticOffsetsVectorSize]; 5262 OffsetsVector::kStaticOffsetsVectorSize];
5254 5263
5255 }} // namespace v8::internal 5264 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698