| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2433 | 2433 |
| 2434 | 2434 |
| 2435 void TextNode::MakeCaseIndependent() { | 2435 void TextNode::MakeCaseIndependent() { |
| 2436 int element_count = elms_->length(); | 2436 int element_count = elms_->length(); |
| 2437 for (int i = 0; i < element_count; i++) { | 2437 for (int i = 0; i < element_count; i++) { |
| 2438 TextElement elm = elms_->at(i); | 2438 TextElement elm = elms_->at(i); |
| 2439 if (elm.type == TextElement::CHAR_CLASS) { | 2439 if (elm.type == TextElement::CHAR_CLASS) { |
| 2440 RegExpCharacterClass* cc = elm.data.u_char_class; | 2440 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 2441 ZoneList<CharacterRange>* ranges = cc->ranges(); | 2441 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 2442 int range_count = ranges->length(); | 2442 int range_count = ranges->length(); |
| 2443 for (int i = 0; i < range_count; i++) { | 2443 for (int j = 0; j < range_count; j++) { |
| 2444 ranges->at(i).AddCaseEquivalents(ranges); | 2444 ranges->at(j).AddCaseEquivalents(ranges); |
| 2445 } | 2445 } |
| 2446 } | 2446 } |
| 2447 } | 2447 } |
| 2448 } | 2448 } |
| 2449 | 2449 |
| 2450 | 2450 |
| 2451 int TextNode::GreedyLoopTextLength() { | 2451 int TextNode::GreedyLoopTextLength() { |
| 2452 TextElement elm = elms_->at(elms_->length() - 1); | 2452 TextElement elm = elms_->at(elms_->length() - 1); |
| 2453 if (elm.type == TextElement::CHAR_CLASS) { | 2453 if (elm.type == TextElement::CHAR_CLASS) { |
| 2454 return elm.cp_offset + 1; | 2454 return elm.cp_offset + 1; |
| (...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3954 // The start of the current block. Note that except for the first | 3954 // The start of the current block. Note that except for the first |
| 3955 // iteration 'start' is always equal to 'pos'. | 3955 // iteration 'start' is always equal to 'pos'. |
| 3956 int start; | 3956 int start; |
| 3957 // If it is not the start point of a block the entry contains the | 3957 // If it is not the start point of a block the entry contains the |
| 3958 // offset of the character from the start point. | 3958 // offset of the character from the start point. |
| 3959 if ((range[0] & kStartMarker) == 0) { | 3959 if ((range[0] & kStartMarker) == 0) { |
| 3960 start = pos - range[0]; | 3960 start = pos - range[0]; |
| 3961 } else { | 3961 } else { |
| 3962 start = pos; | 3962 start = pos; |
| 3963 } | 3963 } |
| 3964 // Then we add the ranges on at a time, incrementing the current | 3964 // Then we add the ranges one at a time, incrementing the current |
| 3965 // position to be after the last block each time. The position | 3965 // position to be after the last block each time. The position |
| 3966 // always points to the start of a block. | 3966 // always points to the start of a block. |
| 3967 while (pos < to()) { | 3967 while (pos < to()) { |
| 3968 length = canonrange.get(start, '\0', range); | 3968 length = canonrange.get(start, '\0', range); |
| 3969 if (length == 0) { | 3969 if (length == 0) { |
| 3970 range[0] = start; | 3970 range[0] = start; |
| 3971 } else { | 3971 } else { |
| 3972 ASSERT_EQ(1, length); | 3972 ASSERT_EQ(1, length); |
| 3973 } | 3973 } |
| 3974 ASSERT((range[0] & kStartMarker) != 0); | 3974 ASSERT((range[0] & kStartMarker) != 0); |
| 3975 // The start point of a block contains the distance to the end | 3975 // The start point of a block contains the distance to the end |
| 3976 // of the range. | 3976 // of the range. |
| 3977 int block_end = start + (range[0] & kPayloadMask) - 1; | 3977 int block_end = start + (range[0] & kPayloadMask) - 1; |
| 3978 int end = (block_end > to()) ? to() : block_end; | 3978 int end = (block_end > to()) ? to() : block_end; |
| 3979 length = uncanonicalize.get(start, '\0', range); | 3979 length = uncanonicalize.get(start, '\0', range); |
| 3980 for (int i = 0; i < length; i++) { | 3980 for (int i = 0; i < length; i++) { |
| 3981 uc32 c = range[i]; | 3981 uc32 c = range[i]; |
| 3982 uc16 range_from = c + (pos - start); | 3982 uc16 range_from = c + (pos - start); |
| 3983 uc16 range_to = c + (end - start); | 3983 uc16 range_to = c + (end - start); |
| 3984 if (!(from() <= range_from && range_to <= to())) { | 3984 if (!(from() <= range_from && range_to <= to())) { |
| 3985 ranges->Add(CharacterRange(range_from, range_to)); | 3985 ranges->Add(CharacterRange(range_from, range_to)); |
| 3986 } | 3986 } |
| 3987 } | 3987 } |
| 3988 start = pos = block_end + 1; | 3988 start = pos = block_end + 1; |
| 3989 } | 3989 } |
| 3990 } else { | 3990 } else if (from() > 0 || to() < String::kMaxUC16CharCode) { |
| 3991 // TODO(plesner) when we've fixed the 2^11 bug in unibrow. | 3991 // Unibrow ranges don't work for high characters due to the "2^11 bug". |
| 3992 // Therefore we do something dumber for these ranges. We don't bother |
| 3993 // if the range is 0-max (as encountered at the start of an unanchored |
| 3994 // regexp). |
| 3995 ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
| 3996 int bottom = from(); |
| 3997 int top = to(); |
| 3998 for (int i = bottom; i <= top; i++) { |
| 3999 int length = uncanonicalize.get(i, '\0', chars); |
| 4000 for (int j = 0; j < length; j++) { |
| 4001 uc32 chr = chars[j]; |
| 4002 if (chr != i && chr < bottom || chr > top) { |
| 4003 characters->Add(chr); |
| 4004 } |
| 4005 } |
| 4006 } |
| 4007 if (characters->length() > 0) { |
| 4008 int new_from = characters->at(0); |
| 4009 int new_to = new_from; |
| 4010 for (int i = 1; i < characters->length(); i++) { |
| 4011 int chr = characters->at(i); |
| 4012 if (chr == new_to + 1) { |
| 4013 new_to++; |
| 4014 } else { |
| 4015 if (new_to == new_from) { |
| 4016 ranges->Add(CharacterRange::Singleton(new_from)); |
| 4017 } else { |
| 4018 ranges->Add(CharacterRange(new_from, new_to)); |
| 4019 } |
| 4020 new_from = new_to = chr; |
| 4021 } |
| 4022 } |
| 4023 if (new_to == new_from) { |
| 4024 ranges->Add(CharacterRange::Singleton(new_from)); |
| 4025 } else { |
| 4026 ranges->Add(CharacterRange(new_from, new_to)); |
| 4027 } |
| 4028 } |
| 3992 } | 4029 } |
| 3993 } | 4030 } |
| 3994 | 4031 |
| 3995 | 4032 |
| 3996 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4033 ZoneList<CharacterRange>* CharacterSet::ranges() { |
| 3997 if (ranges_ == NULL) { | 4034 if (ranges_ == NULL) { |
| 3998 ranges_ = new ZoneList<CharacterRange>(2); | 4035 ranges_ = new ZoneList<CharacterRange>(2); |
| 3999 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4036 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
| 4000 } | 4037 } |
| 4001 return ranges_; | 4038 return ranges_; |
| (...skipping 481 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4483 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4520 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 4484 #endif | 4521 #endif |
| 4485 | 4522 |
| 4486 return compiler.Assemble(¯o_assembler, | 4523 return compiler.Assemble(¯o_assembler, |
| 4487 node, | 4524 node, |
| 4488 data->capture_count, | 4525 data->capture_count, |
| 4489 pattern); | 4526 pattern); |
| 4490 } | 4527 } |
| 4491 | 4528 |
| 4492 }} // namespace v8::internal | 4529 }} // namespace v8::internal |
| OLD | NEW |