| Index: third_party/re2/re2/simplify.cc | 
| diff --git a/third_party/re2/re2/simplify.cc b/third_party/re2/re2/simplify.cc | 
| index faf32084e05f9f171b33fdc076d465e0ec693600..ecc60e7d8b127960534182ba03171c6bc1f0e2f0 100644 | 
| --- a/third_party/re2/re2/simplify.cc | 
| +++ b/third_party/re2/re2/simplify.cc | 
| @@ -61,7 +61,7 @@ bool Regexp::ComputeSimple() { | 
| // These are simple as long as the subpieces are simple. | 
| subs = sub(); | 
| for (int i = 0; i < nsub_; i++) | 
| -        if (!subs[i]->simple_) | 
| +        if (!subs[i]->simple()) | 
| return false; | 
| return true; | 
| case kRegexpCharClass: | 
| @@ -71,12 +71,12 @@ bool Regexp::ComputeSimple() { | 
| return !cc_->empty() && !cc_->full(); | 
| case kRegexpCapture: | 
| subs = sub(); | 
| -      return subs[0]->simple_; | 
| +      return subs[0]->simple(); | 
| case kRegexpStar: | 
| case kRegexpPlus: | 
| case kRegexpQuest: | 
| subs = sub(); | 
| -      if (!subs[0]->simple_) | 
| +      if (!subs[0]->simple()) | 
| return false; | 
| switch (subs[0]->op_) { | 
| case kRegexpStar: | 
| @@ -97,6 +97,36 @@ bool Regexp::ComputeSimple() { | 
| } | 
|  | 
| // Walker subclass used by Simplify. | 
| +// Coalesces runs of star/plus/quest/repeat of the same literal along with any | 
| +// occurrences of that literal into repeats of that literal. It also works for | 
| +// char classes, any char and any byte. | 
| +// PostVisit creates the coalesced result, which should then be simplified. | 
| +class CoalesceWalker : public Regexp::Walker<Regexp*> { | 
| + public: | 
| +  CoalesceWalker() {} | 
| +  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, | 
| +                            Regexp** child_args, int nchild_args); | 
| +  virtual Regexp* Copy(Regexp* re); | 
| +  virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); | 
| + | 
| + private: | 
| +  // These functions are declared inside CoalesceWalker so that | 
| +  // they can edit the private fields of the Regexps they construct. | 
| + | 
| +  // Returns true if r1 and r2 can be coalesced. In particular, ensures that | 
| +  // the parse flags are consistent. (They will not be checked again later.) | 
| +  static bool CanCoalesce(Regexp* r1, Regexp* r2); | 
| + | 
| +  // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards | 
| +  // will be empty match and the coalesced op. In other cases, where part of a | 
| +  // literal string was removed to be coalesced, the array elements afterwards | 
| +  // will be the coalesced op and the remainder of the literal string. | 
| +  static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr); | 
| + | 
| +  DISALLOW_COPY_AND_ASSIGN(CoalesceWalker); | 
| +}; | 
| + | 
| +// Walker subclass used by Simplify. | 
| // The simplify walk is purely post-recursive: given the simplified children, | 
| // PostVisit creates the simplified result. | 
| // The child_args are simplified Regexp*s. | 
| @@ -104,9 +134,7 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { | 
| public: | 
| SimplifyWalker() {} | 
| virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop); | 
| -  virtual Regexp* PostVisit(Regexp* re, | 
| -                            Regexp* parent_arg, | 
| -                            Regexp* pre_arg, | 
| +  virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg, | 
| Regexp** child_args, int nchild_args); | 
| virtual Regexp* Copy(Regexp* re); | 
| virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg); | 
| @@ -130,7 +158,7 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { | 
| // Caller must Decref return value when done with it. | 
| static Regexp* SimplifyCharClass(Regexp* re); | 
|  | 
| -  DISALLOW_EVIL_CONSTRUCTORS(SimplifyWalker); | 
| +  DISALLOW_COPY_AND_ASSIGN(SimplifyWalker); | 
| }; | 
|  | 
| // Simplifies a regular expression, returning a new regexp. | 
| @@ -143,14 +171,261 @@ class SimplifyWalker : public Regexp::Walker<Regexp*> { | 
| // Caller must Decref() return value when done with it. | 
|  | 
| Regexp* Regexp::Simplify() { | 
| -  if (simple_) | 
| -    return Incref(); | 
| -  SimplifyWalker w; | 
| -  return w.Walk(this, NULL); | 
| +  CoalesceWalker cw; | 
| +  Regexp* cre = cw.Walk(this, NULL); | 
| +  if (cre == NULL) | 
| +    return cre; | 
| +  SimplifyWalker sw; | 
| +  Regexp* sre = sw.Walk(cre, NULL); | 
| +  cre->Decref(); | 
| +  return sre; | 
| } | 
|  | 
| #define Simplify DontCallSimplify  // Avoid accidental recursion | 
|  | 
| +// Utility function for PostVisit implementations that compares re->sub() with | 
| +// child_args to determine whether any child_args changed. In the common case, | 
| +// where nothing changed, calls Decref() for all child_args and returns false, | 
| +// so PostVisit must return re->Incref(). Otherwise, returns true. | 
| +static bool ChildArgsChanged(Regexp* re, Regexp** child_args) { | 
| +  for (int i = 0; i < re->nsub(); i++) { | 
| +    Regexp* sub = re->sub()[i]; | 
| +    Regexp* newsub = child_args[i]; | 
| +    if (newsub != sub) | 
| +      return true; | 
| +  } | 
| +  for (int i = 0; i < re->nsub(); i++) { | 
| +    Regexp* newsub = child_args[i]; | 
| +    newsub->Decref(); | 
| +  } | 
| +  return false; | 
| +} | 
| + | 
| +Regexp* CoalesceWalker::Copy(Regexp* re) { | 
| +  return re->Incref(); | 
| +} | 
| + | 
| +Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { | 
| +  // This should never be called, since we use Walk and not | 
| +  // WalkExponential. | 
| +  LOG(DFATAL) << "CoalesceWalker::ShortVisit called"; | 
| +  return re->Incref(); | 
| +} | 
| + | 
| +Regexp* CoalesceWalker::PostVisit(Regexp* re, | 
| +                                  Regexp* parent_arg, | 
| +                                  Regexp* pre_arg, | 
| +                                  Regexp** child_args, | 
| +                                  int nchild_args) { | 
| +  if (re->nsub() == 0) | 
| +    return re->Incref(); | 
| + | 
| +  if (re->op() != kRegexpConcat) { | 
| +    if (!ChildArgsChanged(re, child_args)) | 
| +      return re->Incref(); | 
| + | 
| +    // Something changed. Build a new op. | 
| +    Regexp* nre = new Regexp(re->op(), re->parse_flags()); | 
| +    nre->AllocSub(re->nsub()); | 
| +    Regexp** nre_subs = nre->sub(); | 
| +    for (int i = 0; i < re->nsub(); i++) | 
| +      nre_subs[i] = child_args[i]; | 
| +    // Repeats and Captures have additional data that must be copied. | 
| +    if (re->op() == kRegexpRepeat) { | 
| +      nre->min_ = re->min(); | 
| +      nre->max_ = re->max(); | 
| +    } else if (re->op() == kRegexpCapture) { | 
| +      nre->cap_ = re->cap(); | 
| +    } | 
| +    return nre; | 
| +  } | 
| + | 
| +  bool can_coalesce = false; | 
| +  for (int i = 0; i < re->nsub(); i++) { | 
| +    if (i+1 < re->nsub() && | 
| +        CanCoalesce(child_args[i], child_args[i+1])) { | 
| +      can_coalesce = true; | 
| +      break; | 
| +    } | 
| +  } | 
| +  if (!can_coalesce) { | 
| +    if (!ChildArgsChanged(re, child_args)) | 
| +      return re->Incref(); | 
| + | 
| +    // Something changed. Build a new op. | 
| +    Regexp* nre = new Regexp(re->op(), re->parse_flags()); | 
| +    nre->AllocSub(re->nsub()); | 
| +    Regexp** nre_subs = nre->sub(); | 
| +    for (int i = 0; i < re->nsub(); i++) | 
| +      nre_subs[i] = child_args[i]; | 
| +    return nre; | 
| +  } | 
| + | 
| +  for (int i = 0; i < re->nsub(); i++) { | 
| +    if (i+1 < re->nsub() && | 
| +        CanCoalesce(child_args[i], child_args[i+1])) | 
| +      DoCoalesce(&child_args[i], &child_args[i+1]); | 
| +  } | 
| +  // Determine how many empty matches were left by DoCoalesce. | 
| +  int n = 0; | 
| +  for (int i = n; i < re->nsub(); i++) { | 
| +    if (child_args[i]->op() == kRegexpEmptyMatch) | 
| +      n++; | 
| +  } | 
| +  // Build a new op. | 
| +  Regexp* nre = new Regexp(re->op(), re->parse_flags()); | 
| +  nre->AllocSub(re->nsub() - n); | 
| +  Regexp** nre_subs = nre->sub(); | 
| +  for (int i = 0, j = 0; i < re->nsub(); i++) { | 
| +    if (child_args[i]->op() == kRegexpEmptyMatch) { | 
| +      child_args[i]->Decref(); | 
| +      continue; | 
| +    } | 
| +    nre_subs[j] = child_args[i]; | 
| +    j++; | 
| +  } | 
| +  return nre; | 
| +} | 
| + | 
| +bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) { | 
| +  // r1 must be a star/plus/quest/repeat of a literal, char class, any char or | 
| +  // any byte. | 
| +  if ((r1->op() == kRegexpStar || | 
| +       r1->op() == kRegexpPlus || | 
| +       r1->op() == kRegexpQuest || | 
| +       r1->op() == kRegexpRepeat) && | 
| +      (r1->sub()[0]->op() == kRegexpLiteral || | 
| +       r1->sub()[0]->op() == kRegexpCharClass || | 
| +       r1->sub()[0]->op() == kRegexpAnyChar || | 
| +       r1->sub()[0]->op() == kRegexpAnyByte)) { | 
| +    // r2 must be a star/plus/quest/repeat of the same literal, char class, | 
| +    // any char or any byte. | 
| +    if ((r2->op() == kRegexpStar || | 
| +         r2->op() == kRegexpPlus || | 
| +         r2->op() == kRegexpQuest || | 
| +         r2->op() == kRegexpRepeat) && | 
| +        Regexp::Equal(r1->sub()[0], r2->sub()[0]) && | 
| +        // The parse flags must be consistent. | 
| +        ((r1->parse_flags() & Regexp::NonGreedy) == | 
| +         (r2->parse_flags() & Regexp::NonGreedy))) { | 
| +      return true; | 
| +    } | 
| +    // ... OR an occurrence of that literal, char class, any char or any byte | 
| +    if (Regexp::Equal(r1->sub()[0], r2)) { | 
| +      return true; | 
| +    } | 
| +    // ... OR a literal string that begins with that literal. | 
| +    if (r1->sub()[0]->op() == kRegexpLiteral && | 
| +        r2->op() == kRegexpLiteralString && | 
| +        r2->runes()[0] == r1->sub()[0]->rune() && | 
| +        // The parse flags must be consistent. | 
| +        ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) == | 
| +         (r2->parse_flags() & Regexp::FoldCase))) { | 
| +      return true; | 
| +    } | 
| +  } | 
| +  return false; | 
| +} | 
| + | 
| +void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) { | 
| +  Regexp* r1 = *r1ptr; | 
| +  Regexp* r2 = *r2ptr; | 
| + | 
| +  Regexp* nre = Regexp::Repeat( | 
| +      r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0); | 
| + | 
| +  switch (r1->op()) { | 
| +    case kRegexpStar: | 
| +      nre->min_ = 0; | 
| +      nre->max_ = -1; | 
| +      break; | 
| + | 
| +    case kRegexpPlus: | 
| +      nre->min_ = 1; | 
| +      nre->max_ = -1; | 
| +      break; | 
| + | 
| +    case kRegexpQuest: | 
| +      nre->min_ = 0; | 
| +      nre->max_ = 1; | 
| +      break; | 
| + | 
| +    case kRegexpRepeat: | 
| +      nre->min_ = r1->min(); | 
| +      nre->max_ = r1->max(); | 
| +      break; | 
| + | 
| +    default: | 
| +      LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op(); | 
| +      nre->Decref(); | 
| +      return; | 
| +  } | 
| + | 
| +  switch (r2->op()) { | 
| +    case kRegexpStar: | 
| +      nre->max_ = -1; | 
| +      goto LeaveEmpty; | 
| + | 
| +    case kRegexpPlus: | 
| +      nre->min_++; | 
| +      nre->max_ = -1; | 
| +      goto LeaveEmpty; | 
| + | 
| +    case kRegexpQuest: | 
| +      if (nre->max() != -1) | 
| +        nre->max_++; | 
| +      goto LeaveEmpty; | 
| + | 
| +    case kRegexpRepeat: | 
| +      nre->min_ += r2->min(); | 
| +      if (r2->max() == -1) | 
| +        nre->max_ = -1; | 
| +      else if (nre->max() != -1) | 
| +        nre->max_ += r2->max(); | 
| +      goto LeaveEmpty; | 
| + | 
| +    case kRegexpLiteral: | 
| +    case kRegexpCharClass: | 
| +    case kRegexpAnyChar: | 
| +    case kRegexpAnyByte: | 
| +      nre->min_++; | 
| +      if (nre->max() != -1) | 
| +        nre->max_++; | 
| +      goto LeaveEmpty; | 
| + | 
| +    LeaveEmpty: | 
| +      *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags); | 
| +      *r2ptr = nre; | 
| +      break; | 
| + | 
| +    case kRegexpLiteralString: { | 
| +      Rune r = r1->sub()[0]->rune(); | 
| +      // Determine how much of the literal string is removed. | 
| +      // We know that we have at least one rune. :) | 
| +      int n = 1; | 
| +      while (n < r2->nrunes() && r2->runes()[n] == r) | 
| +        n++; | 
| +      nre->min_ += n; | 
| +      if (nre->max() != -1) | 
| +        nre->max_ += n; | 
| +      if (n == r2->nrunes()) | 
| +        goto LeaveEmpty; | 
| +      *r1ptr = nre; | 
| +      *r2ptr = Regexp::LiteralString( | 
| +          &r2->runes()[n], r2->nrunes() - n, r2->parse_flags()); | 
| +      break; | 
| +    } | 
| + | 
| +    default: | 
| +      LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op(); | 
| +      nre->Decref(); | 
| +      return; | 
| +  } | 
| + | 
| +  r1->Decref(); | 
| +  r2->Decref(); | 
| +} | 
| + | 
| Regexp* SimplifyWalker::Copy(Regexp* re) { | 
| return re->Incref(); | 
| } | 
| @@ -163,7 +438,7 @@ Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) { | 
| } | 
|  | 
| Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) { | 
| -  if (re->simple_) { | 
| +  if (re->simple()) { | 
| *stop = true; | 
| return re->Incref(); | 
| } | 
| @@ -196,29 +471,14 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re, | 
| case kRegexpConcat: | 
| case kRegexpAlternate: { | 
| // These are simple as long as the subpieces are simple. | 
| -      // Two passes to avoid allocation in the common case. | 
| -      bool changed = false; | 
| -      Regexp** subs = re->sub(); | 
| -      for (int i = 0; i < re->nsub_; i++) { | 
| -        Regexp* sub = subs[i]; | 
| -        Regexp* newsub = child_args[i]; | 
| -        if (newsub != sub) { | 
| -          changed = true; | 
| -          break; | 
| -        } | 
| -      } | 
| -      if (!changed) { | 
| -        for (int i = 0; i < re->nsub_; i++) { | 
| -          Regexp* newsub = child_args[i]; | 
| -          newsub->Decref(); | 
| -        } | 
| +      if (!ChildArgsChanged(re, child_args)) { | 
| re->simple_ = true; | 
| return re->Incref(); | 
| } | 
| Regexp* nre = new Regexp(re->op(), re->parse_flags()); | 
| -      nre->AllocSub(re->nsub_); | 
| +      nre->AllocSub(re->nsub()); | 
| Regexp** nre_subs = nre->sub(); | 
| -      for (int i = 0; i <re->nsub_; i++) | 
| +      for (int i = 0; i < re->nsub(); i++) | 
| nre_subs[i] = child_args[i]; | 
| nre->simple_ = true; | 
| return nre; | 
| @@ -234,7 +494,7 @@ Regexp* SimplifyWalker::PostVisit(Regexp* re, | 
| Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags()); | 
| nre->AllocSub(1); | 
| nre->sub()[0] = newsub; | 
| -      nre->cap_ = re->cap_; | 
| +      nre->cap_ = re->cap(); | 
| nre->simple_ = true; | 
| return nre; | 
| } | 
| @@ -325,7 +585,6 @@ Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max, | 
| // General case: x{4,} is xxxx+ | 
| Regexp* nre = new Regexp(kRegexpConcat, f); | 
| nre->AllocSub(min); | 
| -    VLOG(1) << "Simplify " << min; | 
| Regexp** nre_subs = nre->sub(); | 
| for (int i = 0; i < min-1; i++) | 
| nre_subs[i] = re->Incref(); | 
|  |