OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2433 | 2433 |
2434 | 2434 |
2435 void TextNode::MakeCaseIndependent() { | 2435 void TextNode::MakeCaseIndependent() { |
2436 int element_count = elms_->length(); | 2436 int element_count = elms_->length(); |
2437 for (int i = 0; i < element_count; i++) { | 2437 for (int i = 0; i < element_count; i++) { |
2438 TextElement elm = elms_->at(i); | 2438 TextElement elm = elms_->at(i); |
2439 if (elm.type == TextElement::CHAR_CLASS) { | 2439 if (elm.type == TextElement::CHAR_CLASS) { |
2440 RegExpCharacterClass* cc = elm.data.u_char_class; | 2440 RegExpCharacterClass* cc = elm.data.u_char_class; |
2441 ZoneList<CharacterRange>* ranges = cc->ranges(); | 2441 ZoneList<CharacterRange>* ranges = cc->ranges(); |
2442 int range_count = ranges->length(); | 2442 int range_count = ranges->length(); |
2443 for (int i = 0; i < range_count; i++) { | 2443 for (int j = 0; j < range_count; j++) { |
2444 ranges->at(i).AddCaseEquivalents(ranges); | 2444 ranges->at(j).AddCaseEquivalents(ranges); |
2445 } | 2445 } |
2446 } | 2446 } |
2447 } | 2447 } |
2448 } | 2448 } |
2449 | 2449 |
2450 | 2450 |
2451 int TextNode::GreedyLoopTextLength() { | 2451 int TextNode::GreedyLoopTextLength() { |
2452 TextElement elm = elms_->at(elms_->length() - 1); | 2452 TextElement elm = elms_->at(elms_->length() - 1); |
2453 if (elm.type == TextElement::CHAR_CLASS) { | 2453 if (elm.type == TextElement::CHAR_CLASS) { |
2454 return elm.cp_offset + 1; | 2454 return elm.cp_offset + 1; |
(...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3954 // The start of the current block. Note that except for the first | 3954 // The start of the current block. Note that except for the first |
3955 // iteration 'start' is always equal to 'pos'. | 3955 // iteration 'start' is always equal to 'pos'. |
3956 int start; | 3956 int start; |
3957 // If it is not the start point of a block the entry contains the | 3957 // If it is not the start point of a block the entry contains the |
3958 // offset of the character from the start point. | 3958 // offset of the character from the start point. |
3959 if ((range[0] & kStartMarker) == 0) { | 3959 if ((range[0] & kStartMarker) == 0) { |
3960 start = pos - range[0]; | 3960 start = pos - range[0]; |
3961 } else { | 3961 } else { |
3962 start = pos; | 3962 start = pos; |
3963 } | 3963 } |
3964 // Then we add the ranges on at a time, incrementing the current | 3964 // Then we add the ranges one at a time, incrementing the current |
3965 // position to be after the last block each time. The position | 3965 // position to be after the last block each time. The position |
3966 // always points to the start of a block. | 3966 // always points to the start of a block. |
3967 while (pos < to()) { | 3967 while (pos < to()) { |
3968 length = canonrange.get(start, '\0', range); | 3968 length = canonrange.get(start, '\0', range); |
3969 if (length == 0) { | 3969 if (length == 0) { |
3970 range[0] = start; | 3970 range[0] = start; |
3971 } else { | 3971 } else { |
3972 ASSERT_EQ(1, length); | 3972 ASSERT_EQ(1, length); |
3973 } | 3973 } |
3974 ASSERT((range[0] & kStartMarker) != 0); | 3974 ASSERT((range[0] & kStartMarker) != 0); |
3975 // The start point of a block contains the distance to the end | 3975 // The start point of a block contains the distance to the end |
3976 // of the range. | 3976 // of the range. |
3977 int block_end = start + (range[0] & kPayloadMask) - 1; | 3977 int block_end = start + (range[0] & kPayloadMask) - 1; |
3978 int end = (block_end > to()) ? to() : block_end; | 3978 int end = (block_end > to()) ? to() : block_end; |
3979 length = uncanonicalize.get(start, '\0', range); | 3979 length = uncanonicalize.get(start, '\0', range); |
3980 for (int i = 0; i < length; i++) { | 3980 for (int i = 0; i < length; i++) { |
3981 uc32 c = range[i]; | 3981 uc32 c = range[i]; |
3982 uc16 range_from = c + (pos - start); | 3982 uc16 range_from = c + (pos - start); |
3983 uc16 range_to = c + (end - start); | 3983 uc16 range_to = c + (end - start); |
3984 if (!(from() <= range_from && range_to <= to())) { | 3984 if (!(from() <= range_from && range_to <= to())) { |
3985 ranges->Add(CharacterRange(range_from, range_to)); | 3985 ranges->Add(CharacterRange(range_from, range_to)); |
3986 } | 3986 } |
3987 } | 3987 } |
3988 start = pos = block_end + 1; | 3988 start = pos = block_end + 1; |
3989 } | 3989 } |
3990 } else { | 3990 } else if (from() > 0 || to() < String::kMaxUC16CharCode) { |
3991 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. | 3991 // Unibrow ranges don't work for high characters due to the "2^11 bug". |
| 3992 // Therefore we do something dumber for these ranges. We don't bother |
| 3993 // if the range is 0-max (as encountered at the start of an unanchored |
| 3994 // regexp). |
| 3995 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
| 3996 int bottom = from(); |
| 3997 int top = to(); |
| 3998 for (int i = bottom; i <= top; i++) { |
| 3999 int length = uncanonicalize.get(i, '\0', chars); |
| 4000 for (int j = 0; j < length; j++) { |
| 4001 uc32 chr = chars[j]; |
| 4002 if (chr != i && chr < bottom || chr > top) { |
| 4003 characters->Add(chr); |
| 4004 } |
| 4005 } |
| 4006 } |
| 4007 if (characters->length() > 0) { |
| 4008 int new_from = characters->at(0); |
| 4009 int new_to = new_from; |
| 4010 for (int i = 1; i < characters->length(); i++) { |
| 4011 int chr = characters->at(i); |
| 4012 if (chr == new_to + 1) { |
| 4013 new_to++; |
| 4014 } else { |
| 4015 if (new_to == new_from) { |
| 4016 ranges->Add(CharacterRange::Singleton(new_from)); |
| 4017 } else { |
| 4018 ranges->Add(CharacterRange(new_from, new_to)); |
| 4019 } |
| 4020 new_from = new_to = chr; |
| 4021 } |
| 4022 } |
| 4023 if (new_to == new_from) { |
| 4024 ranges->Add(CharacterRange::Singleton(new_from)); |
| 4025 } else { |
| 4026 ranges->Add(CharacterRange(new_from, new_to)); |
| 4027 } |
| 4028 } |
3992 } | 4029 } |
3993 } | 4030 } |
3994 | 4031 |
3995 | 4032 |
3996 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4033 ZoneList<CharacterRange>* CharacterSet::ranges() { |
3997 if (ranges_ == NULL) { | 4034 if (ranges_ == NULL) { |
3998 ranges_ = new ZoneList<CharacterRange>(2); | 4035 ranges_ = new ZoneList<CharacterRange>(2); |
3999 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4036 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4000 } | 4037 } |
4001 return ranges_; | 4038 return ranges_; |
(...skipping 481 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4483 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4520 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4484 #endif | 4521 #endif |
4485 | 4522 |
4486 return compiler.Assemble(¯o_assembler, | 4523 return compiler.Assemble(¯o_assembler, |
4487 node, | 4524 node, |
4488 data->capture_count, | 4525 data->capture_count, |
4489 pattern); | 4526 pattern); |
4490 } | 4527 } |
4491 | 4528 |
4492 }} // namespace v8::internal | 4529 }} // namespace v8::internal |
OLD | NEW |