libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_automaton.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 // This macro defines the maximal state number a NFA can have. 00032 #ifndef _GLIBCXX_REGEX_STATE_LIMIT 00033 #define _GLIBCXX_REGEX_STATE_LIMIT 100000 00034 #endif 00035 00036 namespace std _GLIBCXX_VISIBILITY(default) 00037 { 00038 namespace __detail 00039 { 00040 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00041 00042 /** 00043 * @defgroup regex-detail Base and Implementation Classes 00044 * @ingroup regex 00045 * @{ 00046 */ 00047 00048 typedef long _StateIdT; 00049 static const _StateIdT _S_invalid_state_id = -1; 00050 00051 template<typename _CharT> 00052 using _Matcher = std::function<bool (_CharT)>; 00053 00054 /// Operation codes that define the type of transitions within the base NFA 00055 /// that represents the regular expression. 00056 enum _Opcode : int 00057 { 00058 _S_opcode_unknown, 00059 _S_opcode_alternative, 00060 _S_opcode_repeat, 00061 _S_opcode_backref, 00062 _S_opcode_line_begin_assertion, 00063 _S_opcode_line_end_assertion, 00064 _S_opcode_word_boundary, 00065 _S_opcode_subexpr_lookahead, 00066 _S_opcode_subexpr_begin, 00067 _S_opcode_subexpr_end, 00068 _S_opcode_dummy, 00069 _S_opcode_match, 00070 _S_opcode_accept, 00071 }; 00072 00073 struct _State_base 00074 { 00075 _Opcode _M_opcode; // type of outgoing transition 00076 _StateIdT _M_next; // outgoing transition 00077 union // Since they are mutually exclusive. 00078 { 00079 size_t _M_subexpr; // for _S_opcode_subexpr_* 00080 size_t _M_backref_index; // for _S_opcode_backref 00081 struct 00082 { 00083 // for _S_opcode_alternative, _S_opcode_repeat and 00084 // _S_opcode_subexpr_lookahead 00085 _StateIdT _M_alt; 00086 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or 00087 // quantifiers (ungreedy if set true) 00088 bool _M_neg; 00089 }; 00090 }; 00091 00092 explicit _State_base(_Opcode __opcode) 00093 : _M_opcode(__opcode), _M_next(_S_invalid_state_id) 00094 { } 00095 00096 protected: 00097 ~_State_base() = default; 00098 00099 public: 00100 #ifdef _GLIBCXX_DEBUG 00101 std::ostream& 00102 _M_print(std::ostream& ostr) const; 00103 00104 // Prints graphviz dot commands for state. 00105 std::ostream& 00106 _M_dot(std::ostream& __ostr, _StateIdT __id) const; 00107 #endif 00108 }; 00109 00110 template<typename _TraitsT> 00111 struct _State : _State_base 00112 { 00113 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 00114 00115 _MatcherT _M_matches; // for _S_opcode_match 00116 00117 explicit _State(_Opcode __opcode) : _State_base(__opcode) { } 00118 }; 00119 00120 struct _NFA_base 00121 { 00122 typedef size_t _SizeT; 00123 typedef regex_constants::syntax_option_type _FlagT; 00124 00125 explicit 00126 _NFA_base(_FlagT __f) 00127 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), 00128 _M_has_backref(false) 00129 { } 00130 00131 _NFA_base(_NFA_base&&) = default; 00132 00133 protected: 00134 ~_NFA_base() = default; 00135 00136 public: 00137 _FlagT 00138 _M_options() const 00139 { return _M_flags; } 00140 00141 _StateIdT 00142 _M_start() const 00143 { return _M_start_state; } 00144 00145 _SizeT 00146 _M_sub_count() const 00147 { return _M_subexpr_count; } 00148 00149 std::vector<size_t> _M_paren_stack; 00150 _FlagT _M_flags; 00151 _StateIdT _M_start_state; 00152 _SizeT _M_subexpr_count; 00153 bool _M_has_backref; 00154 }; 00155 00156 template<typename _TraitsT> 00157 struct _NFA 00158 : _NFA_base, std::vector<_State<_TraitsT>> 00159 { 00160 typedef _State<_TraitsT> _StateT; 00161 typedef _Matcher<typename _TraitsT::char_type> _MatcherT; 00162 00163 _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags) 00164 : _NFA_base(__flags) 00165 { _M_traits.imbue(__loc); } 00166 00167 // for performance reasons _NFA objects should only be moved not copied 00168 _NFA(const _NFA&) = delete; 00169 _NFA(_NFA&&) = default; 00170 00171 _StateIdT 00172 _M_insert_accept() 00173 { 00174 auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); 00175 return __ret; 00176 } 00177 00178 _StateIdT 00179 _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg) 00180 { 00181 _StateT __tmp(_S_opcode_alternative); 00182 // It labels every quantifier to make greedy comparison easier in BFS 00183 // approach. 00184 __tmp._M_next = __next; 00185 __tmp._M_alt = __alt; 00186 return _M_insert_state(std::move(__tmp)); 00187 } 00188 00189 _StateIdT 00190 _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg) 00191 { 00192 _StateT __tmp(_S_opcode_repeat); 00193 // It labels every quantifier to make greedy comparison easier in BFS 00194 // approach. 00195 __tmp._M_next = __next; 00196 __tmp._M_alt = __alt; 00197 __tmp._M_neg = __neg; 00198 return _M_insert_state(std::move(__tmp)); 00199 } 00200 00201 _StateIdT 00202 _M_insert_matcher(_MatcherT __m) 00203 { 00204 _StateT __tmp(_S_opcode_match); 00205 __tmp._M_matches = std::move(__m); 00206 return _M_insert_state(std::move(__tmp)); 00207 } 00208 00209 _StateIdT 00210 _M_insert_subexpr_begin() 00211 { 00212 auto __id = this->_M_subexpr_count++; 00213 this->_M_paren_stack.push_back(__id); 00214 _StateT __tmp(_S_opcode_subexpr_begin); 00215 __tmp._M_subexpr = __id; 00216 return _M_insert_state(std::move(__tmp)); 00217 } 00218 00219 _StateIdT 00220 _M_insert_subexpr_end() 00221 { 00222 _StateT __tmp(_S_opcode_subexpr_end); 00223 __tmp._M_subexpr = this->_M_paren_stack.back(); 00224 this->_M_paren_stack.pop_back(); 00225 return _M_insert_state(std::move(__tmp)); 00226 } 00227 00228 _StateIdT 00229 _M_insert_backref(size_t __index); 00230 00231 _StateIdT 00232 _M_insert_line_begin() 00233 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); } 00234 00235 _StateIdT 00236 _M_insert_line_end() 00237 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); } 00238 00239 _StateIdT 00240 _M_insert_word_bound(bool __neg) 00241 { 00242 _StateT __tmp(_S_opcode_word_boundary); 00243 __tmp._M_neg = __neg; 00244 return _M_insert_state(std::move(__tmp)); 00245 } 00246 00247 _StateIdT 00248 _M_insert_lookahead(_StateIdT __alt, bool __neg) 00249 { 00250 _StateT __tmp(_S_opcode_subexpr_lookahead); 00251 __tmp._M_alt = __alt; 00252 __tmp._M_neg = __neg; 00253 return _M_insert_state(std::move(__tmp)); 00254 } 00255 00256 _StateIdT 00257 _M_insert_dummy() 00258 { return _M_insert_state(_StateT(_S_opcode_dummy)); } 00259 00260 _StateIdT 00261 _M_insert_state(_StateT __s) 00262 { 00263 this->push_back(std::move(__s)); 00264 if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT) 00265 __throw_regex_error(regex_constants::error_space); 00266 return this->size()-1; 00267 } 00268 00269 // Eliminate dummy node in this NFA to make it compact. 00270 void 00271 _M_eliminate_dummy(); 00272 00273 #ifdef _GLIBCXX_DEBUG 00274 std::ostream& 00275 _M_dot(std::ostream& __ostr) const; 00276 #endif 00277 public: 00278 _TraitsT _M_traits; 00279 }; 00280 00281 /// Describes a sequence of one or more %_State, its current start 00282 /// and end(s). This structure contains fragments of an NFA during 00283 /// construction. 00284 template<typename _TraitsT> 00285 class _StateSeq 00286 { 00287 public: 00288 typedef _NFA<_TraitsT> _RegexT; 00289 00290 public: 00291 _StateSeq(_RegexT& __nfa, _StateIdT __s) 00292 : _M_nfa(__nfa), _M_start(__s), _M_end(__s) 00293 { } 00294 00295 _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end) 00296 : _M_nfa(__nfa), _M_start(__s), _M_end(__end) 00297 { } 00298 00299 // Append a state on *this and change *this to the new sequence. 00300 void 00301 _M_append(_StateIdT __id) 00302 { 00303 _M_nfa[_M_end]._M_next = __id; 00304 _M_end = __id; 00305 } 00306 00307 // Append a sequence on *this and change *this to the new sequence. 00308 void 00309 _M_append(const _StateSeq& __s) 00310 { 00311 _M_nfa[_M_end]._M_next = __s._M_start; 00312 _M_end = __s._M_end; 00313 } 00314 00315 // Clones an entire sequence. 00316 _StateSeq 00317 _M_clone(); 00318 00319 public: 00320 _RegexT& _M_nfa; 00321 _StateIdT _M_start; 00322 _StateIdT _M_end; 00323 }; 00324 00325 //@} regex-detail 00326 _GLIBCXX_END_NAMESPACE_VERSION 00327 } // namespace __detail 00328 } // namespace std 00329 00330 #include <bits/regex_automaton.tcc>