Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(41)

Side by Side Diff: src/jsregexp.cc

Issue 361033: Fix bug 486, Cyrillic character ranges in case independent regexps.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | test/mjsunit/cyrillic.js » ('j') | test/mjsunit/regress/regress-486.js » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2422 matching lines...) Expand 10 before | Expand all | Expand 10 after
2433 2433
2434 2434
2435 void TextNode::MakeCaseIndependent() { 2435 void TextNode::MakeCaseIndependent() {
2436 int element_count = elms_->length(); 2436 int element_count = elms_->length();
2437 for (int i = 0; i < element_count; i++) { 2437 for (int i = 0; i < element_count; i++) {
2438 TextElement elm = elms_->at(i); 2438 TextElement elm = elms_->at(i);
2439 if (elm.type == TextElement::CHAR_CLASS) { 2439 if (elm.type == TextElement::CHAR_CLASS) {
2440 RegExpCharacterClass* cc = elm.data.u_char_class; 2440 RegExpCharacterClass* cc = elm.data.u_char_class;
2441 ZoneList<CharacterRange>* ranges = cc->ranges(); 2441 ZoneList<CharacterRange>* ranges = cc->ranges();
2442 int range_count = ranges->length(); 2442 int range_count = ranges->length();
2443 for (int i = 0; i < range_count; i++) { 2443 for (int j = 0; j < range_count; j++) {
2444 ranges->at(i).AddCaseEquivalents(ranges); 2444 ranges->at(j).AddCaseEquivalents(ranges);
2445 } 2445 }
2446 } 2446 }
2447 } 2447 }
2448 } 2448 }
2449 2449
2450 2450
2451 int TextNode::GreedyLoopTextLength() { 2451 int TextNode::GreedyLoopTextLength() {
2452 TextElement elm = elms_->at(elms_->length() - 1); 2452 TextElement elm = elms_->at(elms_->length() - 1);
2453 if (elm.type == TextElement::CHAR_CLASS) { 2453 if (elm.type == TextElement::CHAR_CLASS) {
2454 return elm.cp_offset + 1; 2454 return elm.cp_offset + 1;
(...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after
3954 // The start of the current block. Note that except for the first 3954 // The start of the current block. Note that except for the first
3955 // iteration 'start' is always equal to 'pos'. 3955 // iteration 'start' is always equal to 'pos'.
3956 int start; 3956 int start;
3957 // If it is not the start point of a block the entry contains the 3957 // If it is not the start point of a block the entry contains the
3958 // offset of the character from the start point. 3958 // offset of the character from the start point.
3959 if ((range[0] & kStartMarker) == 0) { 3959 if ((range[0] & kStartMarker) == 0) {
3960 start = pos - range[0]; 3960 start = pos - range[0];
3961 } else { 3961 } else {
3962 start = pos; 3962 start = pos;
3963 } 3963 }
3964 // Then we add the ranges on at a time, incrementing the current 3964 // Then we add the ranges one at a time, incrementing the current
3965 // position to be after the last block each time. The position 3965 // position to be after the last block each time. The position
3966 // always points to the start of a block. 3966 // always points to the start of a block.
3967 while (pos < to()) { 3967 while (pos < to()) {
3968 length = canonrange.get(start, '\0', range); 3968 length = canonrange.get(start, '\0', range);
3969 if (length == 0) { 3969 if (length == 0) {
3970 range[0] = start; 3970 range[0] = start;
3971 } else { 3971 } else {
3972 ASSERT_EQ(1, length); 3972 ASSERT_EQ(1, length);
3973 } 3973 }
3974 ASSERT((range[0] & kStartMarker) != 0); 3974 ASSERT((range[0] & kStartMarker) != 0);
3975 // The start point of a block contains the distance to the end 3975 // The start point of a block contains the distance to the end
3976 // of the range. 3976 // of the range.
3977 int block_end = start + (range[0] & kPayloadMask) - 1; 3977 int block_end = start + (range[0] & kPayloadMask) - 1;
3978 int end = (block_end > to()) ? to() : block_end; 3978 int end = (block_end > to()) ? to() : block_end;
3979 length = uncanonicalize.get(start, '\0', range); 3979 length = uncanonicalize.get(start, '\0', range);
3980 for (int i = 0; i < length; i++) { 3980 for (int i = 0; i < length; i++) {
3981 uc32 c = range[i]; 3981 uc32 c = range[i];
3982 uc16 range_from = c + (pos - start); 3982 uc16 range_from = c + (pos - start);
3983 uc16 range_to = c + (end - start); 3983 uc16 range_to = c + (end - start);
3984 if (!(from() <= range_from && range_to <= to())) { 3984 if (!(from() <= range_from && range_to <= to())) {
3985 ranges->Add(CharacterRange(range_from, range_to)); 3985 ranges->Add(CharacterRange(range_from, range_to));
3986 } 3986 }
3987 } 3987 }
3988 start = pos = block_end + 1; 3988 start = pos = block_end + 1;
3989 } 3989 }
3990 } else { 3990 } else if (from() > 0 || to() < String::kMaxUC16CharCode) {
3991 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. 3991 // Unibrow ranges don't work for high characters due to the "2^11 bug".
3992 // Therefore we do something dumber for these ranges. We don't bother
3993 // if the range is 0-max (as encountered at the start of an unanchored
3994 // regexp).
3995 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
3996 int bottom = from();
3997 int top = to();
3998 for (int i = bottom; i <= top; i++) {
3999 int length = uncanonicalize.get(i, '\0', chars);
4000 for (int j = 0; j < length; j++) {
4001 uc32 chr = chars[j];
4002 if (chr != i && chr < bottom || chr > top) {
4003 characters->Add(chr);
4004 }
4005 }
4006 }
4007 if (characters->length() > 0) {
4008 int new_from = characters->at(0);
4009 int new_to = new_from;
4010 for (int i = 1; i < characters->length(); i++) {
4011 int chr = characters->at(i);
4012 if (chr == new_to + 1) {
4013 new_to++;
4014 } else {
4015 if (new_to == new_from) {
4016 ranges->Add(CharacterRange::Singleton(new_from));
4017 } else {
4018 ranges->Add(CharacterRange(new_from, new_to));
4019 }
4020 new_from = new_to = chr;
4021 }
4022 }
4023 if (new_to == new_from) {
4024 ranges->Add(CharacterRange::Singleton(new_from));
4025 } else {
4026 ranges->Add(CharacterRange(new_from, new_to));
4027 }
4028 }
3992 } 4029 }
3993 } 4030 }
3994 4031
3995 4032
3996 ZoneList<CharacterRange>* CharacterSet::ranges() { 4033 ZoneList<CharacterRange>* CharacterSet::ranges() {
3997 if (ranges_ == NULL) { 4034 if (ranges_ == NULL) {
3998 ranges_ = new ZoneList<CharacterRange>(2); 4035 ranges_ = new ZoneList<CharacterRange>(2);
3999 CharacterRange::AddClassEscape(standard_set_type_, ranges_); 4036 CharacterRange::AddClassEscape(standard_set_type_, ranges_);
4000 } 4037 }
4001 return ranges_; 4038 return ranges_;
(...skipping 481 matching lines...) Expand 10 before | Expand all | Expand 10 after
4483 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4520 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4484 #endif 4521 #endif
4485 4522
4486 return compiler.Assemble(&macro_assembler, 4523 return compiler.Assemble(&macro_assembler,
4487 node, 4524 node,
4488 data->capture_count, 4525 data->capture_count,
4489 pattern); 4526 pattern);
4490 } 4527 }
4491 4528
4492 }} // namespace v8::internal 4529 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/cyrillic.js » ('j') | test/mjsunit/regress/regress-486.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698