Line data Source code
1 : // class template regex -*- C++ -*- 2 : 3 : // Copyright (C) 2013-2019 Free Software Foundation, Inc. 4 : // 5 : // This file is part of the GNU ISO C++ Library. This library is free 6 : // software; you can redistribute it and/or modify it under the 7 : // terms of the GNU General Public License as published by the 8 : // Free Software Foundation; either version 3, or (at your option) 9 : // any later version. 10 : 11 : // This library is distributed in the hope that it will be useful, 12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 : // GNU General Public License for more details. 15 : 16 : // Under Section 7 of GPL version 3, you are granted additional 17 : // permissions described in the GCC Runtime Library Exception, version 18 : // 3.1, as published by the Free Software Foundation. 19 : 20 : // You should have received a copy of the GNU General Public License and 21 : // a copy of the GCC Runtime Library Exception along with this program; 22 : // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 : // <http://www.gnu.org/licenses/>. 24 : 25 : /** 26 : * @file bits/regex_automaton.tcc 27 : * This is an internal header file, included by other library headers. 28 : * Do not attempt to use it directly. @headername{regex} 29 : */ 30 : 31 : namespace std _GLIBCXX_VISIBILITY(default) 32 : { 33 : _GLIBCXX_BEGIN_NAMESPACE_VERSION 34 : 35 : namespace __detail 36 : { 37 : #ifdef _GLIBCXX_DEBUG 38 : inline std::ostream& 39 : _State_base::_M_print(std::ostream& ostr) const 40 : { 41 : switch (_M_opcode) 42 : { 43 : case _S_opcode_alternative: 44 : case _S_opcode_repeat: 45 : ostr << "alt next=" << _M_next << " alt=" << _M_alt; 46 : break; 47 : case _S_opcode_subexpr_begin: 48 : ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; 49 : break; 50 : case _S_opcode_subexpr_end: 51 : ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; 52 : break; 53 : case _S_opcode_backref: 54 : ostr << "backref next=" << _M_next << " index=" << _M_backref_index; 55 : break; 56 : case _S_opcode_match: 57 : ostr << "match next=" << _M_next; 58 : break; 59 : case _S_opcode_accept: 60 : ostr << "accept next=" << _M_next; 61 : break; 62 : default: 63 : ostr << "unknown next=" << _M_next; 64 : break; 65 : } 66 : return ostr; 67 : } 68 : 69 : // Prints graphviz dot commands for state. 70 : inline std::ostream& 71 : _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const 72 : { 73 : switch (_M_opcode) 74 : { 75 : case _S_opcode_alternative: 76 : case _S_opcode_repeat: 77 : __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" 78 : << __id << " -> " << _M_next 79 : << " [label=\"next\", tailport=\"s\"];\n" 80 : << __id << " -> " << _M_alt 81 : << " [label=\"alt\", tailport=\"n\"];\n"; 82 : break; 83 : case _S_opcode_backref: 84 : __ostr << __id << " [label=\"" << __id << "\\nBACKREF " 85 : << _M_subexpr << "\"];\n" 86 : << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 87 : break; 88 : case _S_opcode_line_begin_assertion: 89 : __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n" 90 : << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 91 : break; 92 : case _S_opcode_line_end_assertion: 93 : __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n" 94 : << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 95 : break; 96 : case _S_opcode_word_boundary: 97 : __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY " 98 : << _M_neg << "\"];\n" 99 : << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 100 : break; 101 : case _S_opcode_subexpr_lookahead: 102 : __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n" 103 : << __id << " -> " << _M_next 104 : << " [label=\"epsilon\", tailport=\"s\"];\n" 105 : << __id << " -> " << _M_alt 106 : << " [label=\"<assert>\", tailport=\"n\"];\n"; 107 : break; 108 : case _S_opcode_subexpr_begin: 109 : __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " 110 : << _M_subexpr << "\"];\n" 111 : << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 112 : break; 113 : case _S_opcode_subexpr_end: 114 : __ostr << __id << " [label=\"" << __id << "\\nSEND " 115 : << _M_subexpr << "\"];\n" 116 : << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 117 : break; 118 : case _S_opcode_dummy: 119 : break; 120 : case _S_opcode_match: 121 : __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" 122 : << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 123 : break; 124 : case _S_opcode_accept: 125 : __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; 126 : break; 127 : default: 128 : _GLIBCXX_DEBUG_ASSERT(false); 129 : break; 130 : } 131 : return __ostr; 132 : } 133 : 134 : template<typename _TraitsT> 135 : std::ostream& 136 : _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const 137 : { 138 : __ostr << "digraph _Nfa {\n" 139 : " rankdir=LR;\n"; 140 : for (size_t __i = 0; __i < this->size(); ++__i) 141 : (*this)[__i]._M_dot(__ostr, __i); 142 : __ostr << "}\n"; 143 : return __ostr; 144 : } 145 : #endif 146 : 147 : template<typename _TraitsT> 148 : _StateIdT 149 0 : _NFA<_TraitsT>::_M_insert_backref(size_t __index) 150 : { 151 0 : if (this->_M_flags & regex_constants::__polynomial) 152 0 : __throw_regex_error(regex_constants::error_complexity, 153 : "Unexpected back-reference in polynomial mode."); 154 : // To figure out whether a backref is valid, a stack is used to store 155 : // unfinished sub-expressions. For example, when parsing 156 : // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3 157 : // sub expressions are parsed or partially parsed(in the stack), aka, 158 : // "(a..", "(b)" and "(c.."). 159 : // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this 160 : // time, "\\2" is valid, but "\\1" and "\\3" are not. 161 0 : if (__index >= _M_subexpr_count) 162 0 : __throw_regex_error( 163 : regex_constants::error_backref, 164 : "Back-reference index exceeds current sub-expression count."); 165 0 : for (auto __it : this->_M_paren_stack) 166 0 : if (__index == __it) 167 0 : __throw_regex_error( 168 : regex_constants::error_backref, 169 : "Back-reference referred to an opened sub-expression."); 170 0 : this->_M_has_backref = true; 171 0 : _StateT __tmp(_S_opcode_backref); 172 0 : __tmp._M_backref_index = __index; 173 0 : return _M_insert_state(std::move(__tmp)); 174 : } 175 : 176 : template<typename _TraitsT> 177 : void 178 955 : _NFA<_TraitsT>::_M_eliminate_dummy() 179 : { 180 9550 : for (auto& __it : *this) 181 : { 182 9550 : while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode() 183 8595 : == _S_opcode_dummy) 184 955 : __it._M_next = (*this)[__it._M_next]._M_next; 185 8595 : if (__it._M_has_alt()) 186 0 : while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode() 187 0 : == _S_opcode_dummy) 188 0 : __it._M_alt = (*this)[__it._M_alt]._M_next; 189 : } 190 955 : } 191 : 192 : // Just apply DFS on the sequence and re-link their links. 193 : template<typename _TraitsT> 194 : _StateSeq<_TraitsT> 195 0 : _StateSeq<_TraitsT>::_M_clone() 196 : { 197 0 : std::map<_StateIdT, _StateIdT> __m; 198 0 : std::stack<_StateIdT> __stack; 199 0 : __stack.push(_M_start); 200 0 : while (!__stack.empty()) 201 : { 202 0 : auto __u = __stack.top(); 203 0 : __stack.pop(); 204 0 : auto __dup = _M_nfa[__u]; 205 : // _M_insert_state() never return -1 206 0 : auto __id = _M_nfa._M_insert_state(std::move(__dup)); 207 0 : __m[__u] = __id; 208 0 : if (__dup._M_has_alt()) 209 0 : if (__dup._M_alt != _S_invalid_state_id 210 0 : && __m.count(__dup._M_alt) == 0) 211 0 : __stack.push(__dup._M_alt); 212 0 : if (__u == _M_end) 213 0 : continue; 214 0 : if (__dup._M_next != _S_invalid_state_id 215 0 : && __m.count(__dup._M_next) == 0) 216 0 : __stack.push(__dup._M_next); 217 : } 218 0 : for (auto __it : __m) 219 : { 220 0 : auto __v = __it.second; 221 0 : auto& __ref = _M_nfa[__v]; 222 0 : if (__ref._M_next != _S_invalid_state_id) 223 0 : __ref._M_next = __m.find(__ref._M_next)->second; 224 0 : if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id) 225 0 : __ref._M_alt = __m.find(__ref._M_alt)->second; 226 : } 227 0 : return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]); 228 : } 229 : } // namespace __detail 230 : 231 : _GLIBCXX_END_NAMESPACE_VERSION 232 : } // namespace