Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(671)

Side by Side Diff: src/jsregexp.cc

Issue 10386090: Implement loop for global regexps in regexp assembler. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution. 11 // with the distribution.
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
190 // Throw an exception if we fail to parse the pattern. 190 // Throw an exception if we fail to parse the pattern.
191 ThrowRegExpException(re, 191 ThrowRegExpException(re,
192 pattern, 192 pattern,
193 parse_result.error, 193 parse_result.error,
194 "malformed_regexp"); 194 "malformed_regexp");
195 return Handle<Object>::null(); 195 return Handle<Object>::null();
196 } 196 }
197 197
198 bool has_been_compiled = false; 198 bool has_been_compiled = false;
199 199
200 if (parse_result.simple && 200 if (parse_result.simple &&
Erik Corry 2012/05/11 11:01:00 It would be interesting to remove this stuff so we
201 !flags.is_ignore_case() && 201 !flags.is_ignore_case() &&
202 !HasFewDifferentCharacters(pattern)) { 202 !HasFewDifferentCharacters(pattern)) {
203 // Parse-tree is a single atom that is equal to the pattern. 203 // Parse-tree is a single atom that is equal to the pattern.
204 AtomCompile(re, pattern, flags, pattern); 204 AtomCompile(re, pattern, flags, pattern);
205 has_been_compiled = true; 205 has_been_compiled = true;
206 } else if (parse_result.tree->IsAtom() && 206 } else if (parse_result.tree->IsAtom() &&
207 !flags.is_ignore_case() && 207 !flags.is_ignore_case() &&
208 parse_result.capture_count == 0) { 208 parse_result.capture_count == 0) {
209 RegExpAtom* atom = parse_result.tree->AsAtom(); 209 RegExpAtom* atom = parse_result.tree->AsAtom();
210 Vector<const uc16> atom_pattern = atom->data(); 210 Vector<const uc16> atom_pattern = atom->data();
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
422 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 422 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
423 ThrowRegExpException(re, 423 ThrowRegExpException(re,
424 pattern, 424 pattern,
425 compile_data.error, 425 compile_data.error,
426 "malformed_regexp"); 426 "malformed_regexp");
427 return false; 427 return false;
428 } 428 }
429 RegExpEngine::CompilationResult result = 429 RegExpEngine::CompilationResult result =
430 RegExpEngine::Compile(&compile_data, 430 RegExpEngine::Compile(&compile_data,
431 flags.is_ignore_case(), 431 flags.is_ignore_case(),
432 flags.is_global(),
432 flags.is_multiline(), 433 flags.is_multiline(),
433 pattern, 434 pattern,
434 sample_subject, 435 sample_subject,
435 is_ascii); 436 is_ascii);
436 if (result.error_message != NULL) { 437 if (result.error_message != NULL) {
437 // Unable to compile regexp. 438 // Unable to compile regexp.
438 Handle<String> error_message = 439 Handle<String> error_message =
439 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message)); 440 isolate->factory()->NewStringFromUtf8(CStrVector(result.error_message));
440 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 441 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
441 return false; 442 return false;
(...skipping 451 matching lines...) Expand 10 before | Expand all | Expand 10 after
893 894
894 895
895 private: 896 private:
896 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 897 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
897 int total_samples_; 898 int total_samples_;
898 }; 899 };
899 900
900 901
901 class RegExpCompiler { 902 class RegExpCompiler {
902 public: 903 public:
903 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); 904 RegExpCompiler(int capture_count,
Erik Corry 2012/05/11 11:01:00 You don't need this change.
Yang 2012/05/16 14:58:47 Done.
905 bool ignore_case,
906 bool ascii);
904 907
905 int AllocateRegister() { 908 int AllocateRegister() {
906 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 909 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
907 reg_exp_too_big_ = true; 910 reg_exp_too_big_ = true;
908 return next_register_; 911 return next_register_;
909 } 912 }
910 return next_register_++; 913 return next_register_++;
911 } 914 }
912 915
913 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 916 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
967 }; 970 };
968 971
969 972
970 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 973 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
971 return RegExpEngine::CompilationResult("RegExp too big"); 974 return RegExpEngine::CompilationResult("RegExp too big");
972 } 975 }
973 976
974 977
975 // Attempts to compile the regexp using an Irregexp code generator. Returns 978 // Attempts to compile the regexp using an Irregexp code generator. Returns
976 // a fixed array or a null handle depending on whether it succeeded. 979 // a fixed array or a null handle depending on whether it succeeded.
977 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 980 RegExpCompiler::RegExpCompiler(int capture_count,
981 bool ignore_case,
Erik Corry 2012/05/11 11:01:00 You don't need this change.
Yang 2012/05/16 14:58:47 Done.
982 bool ascii)
978 : next_register_(2 * (capture_count + 1)), 983 : next_register_(2 * (capture_count + 1)),
979 work_list_(NULL), 984 work_list_(NULL),
980 recursion_depth_(0), 985 recursion_depth_(0),
981 ignore_case_(ignore_case), 986 ignore_case_(ignore_case),
982 ascii_(ascii), 987 ascii_(ascii),
983 reg_exp_too_big_(false), 988 reg_exp_too_big_(false),
984 current_expansion_factor_(1), 989 current_expansion_factor_(1),
985 frequency_collator_() { 990 frequency_collator_() {
986 accept_ = new EndNode(EndNode::ACCEPT); 991 accept_ = new EndNode(EndNode::ACCEPT);
987 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 992 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
(...skipping 4785 matching lines...) Expand 10 before | Expand all | Expand 10 after
5773 5778
5774 void DispatchTableConstructor::VisitAction(ActionNode* that) { 5779 void DispatchTableConstructor::VisitAction(ActionNode* that) {
5775 RegExpNode* target = that->on_success(); 5780 RegExpNode* target = that->on_success();
5776 target->Accept(this); 5781 target->Accept(this);
5777 } 5782 }
5778 5783
5779 5784
5780 RegExpEngine::CompilationResult RegExpEngine::Compile( 5785 RegExpEngine::CompilationResult RegExpEngine::Compile(
5781 RegExpCompileData* data, 5786 RegExpCompileData* data,
5782 bool ignore_case, 5787 bool ignore_case,
5788 bool is_global,
5783 bool is_multiline, 5789 bool is_multiline,
5784 Handle<String> pattern, 5790 Handle<String> pattern,
5785 Handle<String> sample_subject, 5791 Handle<String> sample_subject,
5786 bool is_ascii) { 5792 bool is_ascii) {
5787 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 5793 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
5788 return IrregexpRegExpTooBig(); 5794 return IrregexpRegExpTooBig();
5789 } 5795 }
5790 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 5796 RegExpCompiler compiler(data->capture_count,
Erik Corry 2012/05/11 11:01:00 Or this.
Yang 2012/05/16 14:58:47 Done.
5797 ignore_case,
5798 is_ascii);
5791 5799
5792 // Sample some characters from the middle of the string. 5800 // Sample some characters from the middle of the string.
5793 static const int kSampleSize = 128; 5801 static const int kSampleSize = 128;
5794 5802
5795 FlattenString(sample_subject); 5803 FlattenString(sample_subject);
5796 int chars_sampled = 0; 5804 int chars_sampled = 0;
5797 int half_way = (sample_subject->length() - kSampleSize) / 2; 5805 int half_way = (sample_subject->length() - kSampleSize) / 2;
5798 for (int i = Max(0, half_way); 5806 for (int i = Max(0, half_way);
5799 i < sample_subject->length() && chars_sampled < kSampleSize; 5807 i < sample_subject->length() && chars_sampled < kSampleSize;
5800 i++, chars_sampled++) { 5808 i++, chars_sampled++) {
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
5861 #if V8_TARGET_ARCH_IA32 5869 #if V8_TARGET_ARCH_IA32
5862 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2); 5870 RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2);
5863 #elif V8_TARGET_ARCH_X64 5871 #elif V8_TARGET_ARCH_X64
5864 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2); 5872 RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2);
5865 #elif V8_TARGET_ARCH_ARM 5873 #elif V8_TARGET_ARCH_ARM
5866 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2); 5874 RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2);
5867 #elif V8_TARGET_ARCH_MIPS 5875 #elif V8_TARGET_ARCH_MIPS
5868 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2); 5876 RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2);
5869 #endif 5877 #endif
5870 5878
5879 macro_assembler.set_global(is_global);
5880
5871 #else // V8_INTERPRETED_REGEXP 5881 #else // V8_INTERPRETED_REGEXP
5872 // Interpreted regexp implementation. 5882 // Interpreted regexp implementation.
5873 EmbeddedVector<byte, 1024> codes; 5883 EmbeddedVector<byte, 1024> codes;
5874 RegExpMacroAssemblerIrregexp macro_assembler(codes); 5884 RegExpMacroAssemblerIrregexp macro_assembler(codes);
5875 #endif // V8_INTERPRETED_REGEXP 5885 #endif // V8_INTERPRETED_REGEXP
5876 5886
5877 // Inserted here, instead of in Assembler, because it depends on information 5887 // Inserted here, instead of in Assembler, because it depends on information
5878 // in the AST that isn't replicated in the Node structure. 5888 // in the AST that isn't replicated in the Node structure.
5879 static const int kMaxBacksearchLimit = 1024; 5889 static const int kMaxBacksearchLimit = 1024;
5880 if (is_end_anchored && 5890 if (is_end_anchored &&
5881 !is_start_anchored && 5891 !is_start_anchored &&
5882 max_length < kMaxBacksearchLimit) { 5892 max_length < kMaxBacksearchLimit) {
5883 macro_assembler.SetCurrentPositionFromEnd(max_length); 5893 macro_assembler.SetCurrentPositionFromEnd(max_length);
5884 } 5894 }
5885 5895
5886 return compiler.Assemble(&macro_assembler, 5896 return compiler.Assemble(&macro_assembler,
5887 node, 5897 node,
5888 data->capture_count, 5898 data->capture_count,
5889 pattern); 5899 pattern);
5890 } 5900 }
5891 5901
5892 5902
5893 }} // namespace v8::internal 5903 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698