| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 1856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1867 // character. | 1867 // character. |
| 1868 case '*': | 1868 case '*': |
| 1869 ranges->Add(CharacterRange::Everything()); | 1869 ranges->Add(CharacterRange::Everything()); |
| 1870 break; | 1870 break; |
| 1871 default: | 1871 default: |
| 1872 UNREACHABLE(); | 1872 UNREACHABLE(); |
| 1873 } | 1873 } |
| 1874 } | 1874 } |
| 1875 | 1875 |
| 1876 | 1876 |
| 1877 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; |
| 1878 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; |
| 1879 |
| 1880 |
| 1881 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) { |
| 1882 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1883 if (IsSingleton()) { |
| 1884 // If this is a singleton we just expand the one character. |
| 1885 int length = uncanonicalize.get(from(), '\0', chars); |
| 1886 for (int i = 0; i < length; i++) { |
| 1887 uc32 chr = chars[i]; |
| 1888 if (chr != from()) { |
| 1889 ranges->Add(CharacterRange::Singleton(chars[i])); |
| 1890 } |
| 1891 } |
| 1892 } else if (from() <= kRangeCanonicalizeMax |
| 1893 && to() <= kRangeCanonicalizeMax) { |
| 1894 // If this is a range we expand the characters block by block, |
| 1895 // expanding contiguous subranges (blocks) one at a time. |
| 1896 // The approach is as follows. For a given start character we |
| 1897 // look up the block that contains it, for instance 'a' if the |
| 1898 // start character is 'c'. A block is characterized by the property |
| 1899 // that all characters uncanonicalize in the same way as the first |
| 1900 // element, except that each entry in the result is incremented |
| 1901 // by the distance from the first element. So a-z is a block |
| 1902 // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter |
| 1903 // uncanonicalizes to ['a' + k, 'A' + k]. |
| 1904 // Once we've found the start point we look up its uncanonicalization |
| 1905 // and produce a range for each element. For instance for [c-f] |
| 1906 // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only |
| 1907 // add a range if it is not already contained in the input, so [c-f] |
| 1908 // will be skipped but [C-F] will be added. If this range is not |
| 1909 // completely contained in a block we do this for all the blocks |
| 1910 // covered by the range. |
| 1911 unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1912 // First, look up the block that contains the 'from' character. |
| 1913 int length = canonrange.get(from(), '\0', range); |
| 1914 if (length == 0) { |
| 1915 range[0] = from(); |
| 1916 } else { |
| 1917 ASSERT_EQ(1, length); |
| 1918 } |
| 1919 int pos = from(); |
| 1920 // The start of the current block. Note that except for the first |
| 1921 // iteration 'start' is always equal to 'pos'. |
| 1922 int start; |
| 1923 // If it is not the start point of a block the entry contains the |
| 1924 // offset of the character from the start point. |
| 1925 if ((range[0] & kStartMarker) == 0) { |
| 1926 start = pos - range[0]; |
| 1927 } else { |
| 1928 start = pos; |
| 1929 } |
| 1930 // Then we add the ranges on at a time, incrementing the current |
| 1931 // position to be after the last block each time. The position |
| 1932 // always points to the start of a block. |
| 1933 while (pos < to()) { |
| 1934 length = canonrange.get(start, '\0', range); |
| 1935 if (length == 0) { |
| 1936 range[0] = start; |
| 1937 } else { |
| 1938 ASSERT_EQ(1, length); |
| 1939 } |
| 1940 ASSERT((range[0] & kStartMarker) != 0); |
| 1941 // The start point of a block contains the distance to the end |
| 1942 // of the range. |
| 1943 int block_end = start + (range[0] & kPayloadMask) - 1; |
| 1944 int end = (block_end > to()) ? to() : block_end; |
| 1945 length = uncanonicalize.get(start, '\0', range); |
| 1946 for (int i = 0; i < length; i++) { |
| 1947 uc32 c = range[i]; |
| 1948 uc16 range_from = c + (pos - start); |
| 1949 uc16 range_to = c + (end - start); |
| 1950 if (!(from() <= range_from && range_to <= to())) |
| 1951 ranges->Add(CharacterRange(range_from, range_to)); |
| 1952 } |
| 1953 start = pos = block_end + 1; |
| 1954 } |
| 1955 } else { |
| 1956 // TODO when we've fixed the 2^11 bug in unibrow. |
| 1957 } |
| 1958 } |
| 1959 |
| 1960 |
| 1877 // ------------------------------------------------------------------- | 1961 // ------------------------------------------------------------------- |
| 1878 // Interest propagation | 1962 // Interest propagation |
| 1879 | 1963 |
| 1880 | 1964 |
| 1881 RegExpNode* RegExpNode::GetSibling(NodeInfo* info) { | 1965 RegExpNode* RegExpNode::GetSibling(NodeInfo* info) { |
| 1882 for (int i = 0; i < siblings_.length(); i++) { | 1966 for (int i = 0; i < siblings_.length(); i++) { |
| 1883 RegExpNode* sibling = siblings_.Get(i); | 1967 RegExpNode* sibling = siblings_.Get(i); |
| 1884 if (sibling->info()->SameInterests(info)) | 1968 if (sibling->info()->SameInterests(info)) |
| 1885 return sibling; | 1969 return sibling; |
| 1886 } | 1970 } |
| (...skipping 413 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2300 } | 2384 } |
| 2301 | 2385 |
| 2302 RegExpMacroAssembler::RegExpMacroAssembler() { | 2386 RegExpMacroAssembler::RegExpMacroAssembler() { |
| 2303 } | 2387 } |
| 2304 | 2388 |
| 2305 RegExpMacroAssembler::~RegExpMacroAssembler() { | 2389 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 2306 } | 2390 } |
| 2307 | 2391 |
| 2308 | 2392 |
| 2309 }} // namespace v8::internal | 2393 }} // namespace v8::internal |
| OLD | NEW |