$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
Subject: [Boost-commit] svn:boost r72612 - in trunk: boost/regex/v4 libs/regex/test/regress
From: john_at_[hidden]
Date: 2011-06-16 07:27:17
Author: johnmaddock
Date: 2011-06-16 07:27:13 EDT (Thu, 16 Jun 2011)
New Revision: 72612
URL: http://svn.boost.org/trac/boost/changeset/72612
Log:
Fix infinite recursion in bad recursive expressions.
Fix bug that allows invalid regex to go unnoticed and crash later.
Fixes #5613.
Fixes #5612.
Text files modified: 
   trunk/boost/regex/v4/basic_regex_creator.hpp          |    23 ++++++++++++++++++-----                 
   trunk/boost/regex/v4/basic_regex_parser.hpp           |     3 ++-                                     
   trunk/libs/regex/test/regress/test_perl_ex.cpp        |     1 +                                       
   trunk/libs/regex/test/regress/test_simple_repeats.cpp |     3 +++                                     
   4 files changed, 24 insertions(+), 6 deletions(-)
Modified: trunk/boost/regex/v4/basic_regex_creator.hpp
==============================================================================
--- trunk/boost/regex/v4/basic_regex_creator.hpp	(original)
+++ trunk/boost/regex/v4/basic_regex_creator.hpp	2011-06-16 07:27:13 EDT (Thu, 16 Jun 2011)
@@ -241,6 +241,7 @@
    unsigned                      m_backrefs;           // bitmask of permitted backrefs
    boost::uintmax_t              m_bad_repeats;        // bitmask of repeats we can't deduce a startmap for;
    bool                          m_has_recursions;     // set when we have recursive expresisons to fixup
+   std::vector<bool>             m_recursion_checks;   // notes which recursions we've followed while analysing this expression
    typename traits::char_class_type m_word_mask;       // mask used to determine if a character is a word character
    typename traits::char_class_type m_mask_space;      // mask used to determine if a character is a word character
    typename traits::char_class_type m_lower_mask;       // mask used to determine if a character is a lowercase character
@@ -712,6 +713,8 @@
    m_pdata->m_can_be_null = 0;
 
    m_bad_repeats = 0;
+   if(m_has_recursions)
+      m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
    create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
    // get the restart type:
    m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
@@ -948,9 +951,14 @@
          state = state->next.p;
       }
    }
+
    // now work through our list, building all the maps as we go:
    while(v.size())
    {
+      // Initialize m_recursion_checks if we need it:
+      if(m_has_recursions)
+         m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
+
       const std::pair<bool, re_syntax_base*>& p = v.back();
       m_icase = p.first;
       state = p.second;
@@ -960,6 +968,9 @@
       m_bad_repeats = 0;
       create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
       m_bad_repeats = 0;
+
+      if(m_has_recursions)
+         m_recursion_checks.assign(1 + m_pdata->m_mark_count, false);
       create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
       // adjust the type of the state to allow for faster matching:
       state->type = this->get_repeat_type(state);
@@ -1114,7 +1125,11 @@
       }
       case syntax_element_recurse:
          {
-            if(recursion_start == state)
+            if(state->type == syntax_element_startmark)
+               recursion_sub = static_cast<re_brace*>(state)->index;
+            else
+               recursion_sub = 0;
+            if(m_recursion_checks[recursion_sub])
             {
                // Infinite recursion!!
                if(0 == this->m_pdata->m_status) // update the error code if not already set
@@ -1139,12 +1154,10 @@
                recursion_start = state;
                recursion_restart = state->next.p;
                state = static_cast<re_jump*>(state)->alt.p;
-               if(state->type == syntax_element_startmark)
-                  recursion_sub = static_cast<re_brace*>(state)->index;
-               else
-                  recursion_sub = 0;
+               m_recursion_checks[recursion_sub] = true;
                break;
             }
+            m_recursion_checks[recursion_sub] = true;
             // fall through, can't handle nested recursion here...
          }
       case syntax_element_backref:
Modified: trunk/boost/regex/v4/basic_regex_parser.hpp
==============================================================================
--- trunk/boost/regex/v4/basic_regex_parser.hpp	(original)
+++ trunk/boost/regex/v4/basic_regex_parser.hpp	2011-06-16 07:27:13 EDT (Thu, 16 Jun 2011)
@@ -1026,13 +1026,14 @@
       {
          //
          // Check for illegal following quantifier, we have to do this here, because
-         // the extra states we insert below circumvents are usual error checking :-(
+         // the extra states we insert below circumvents our usual error checking :-(
          //
          switch(this->m_traits.syntax_type(*m_position))
          {
          case regex_constants::syntax_star:
          case regex_constants::syntax_plus:
          case regex_constants::syntax_question:
+         case regex_constants::syntax_open_brace:
             fail(regex_constants::error_badrepeat, m_position - m_base);
             return false;
          }
Modified: trunk/libs/regex/test/regress/test_perl_ex.cpp
==============================================================================
--- trunk/libs/regex/test/regress/test_perl_ex.cpp	(original)
+++ trunk/libs/regex/test/regress/test_perl_ex.cpp	2011-06-16 07:27:13 EDT (Thu, 16 Jun 2011)
@@ -908,6 +908,7 @@
    // Bugs:
    TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(); } }", match_default, make_array(0, 46, 10, 13, 14, 46, -2, -2));
    TEST_REGEX_SEARCH("namespace\\s+(\\w+)\\s+(\\{(?:[^{}]*(?:(?2)[^{}]*)*)?\\})", perl, "namespace one { namespace two { int foo(){} } { {{{ }  } } } {}}", match_default, make_array(0, 64, 10, 13, 14, 64, -2, -2));
+   TEST_INVALID_REGEX("((?1)|a)", perl);
 
    // Recursion to a named sub with a name that is used multiple times:
    TEST_REGEX_SEARCH("(?:(?<A>a+)|(?<A>b+))\\.(?&A)", perl, "aaaa.aa", match_default, make_array(0, 7, 0, 4, -1, -1, -2, -2));
Modified: trunk/libs/regex/test/regress/test_simple_repeats.cpp
==============================================================================
--- trunk/libs/regex/test/regress/test_simple_repeats.cpp	(original)
+++ trunk/libs/regex/test/regress/test_simple_repeats.cpp	2011-06-16 07:27:13 EDT (Thu, 16 Jun 2011)
@@ -477,6 +477,9 @@
    TEST_REGEX_SEARCH("x{1,4}+\\w", perl, "xxxxxa", match_default, make_array(0, 5, -2, -2));
    TEST_REGEX_SEARCH("x{1,3}+\\w", perl, "xxxxxa", match_default, make_array(0, 4, -2, 4, 6, -2, -2));
    TEST_INVALID_REGEX("\\d+++", perl);
+   TEST_INVALID_REGEX("\\d++*", perl);
+   TEST_INVALID_REGEX("\\d++?", perl);
+   TEST_INVALID_REGEX("\\d++{3}", perl);
    TEST_INVALID_REGEX("\\d*++", perl);
    TEST_INVALID_REGEX("\\d?++", perl);
    TEST_INVALID_REGEX("\\d{1,2}++", perl);