libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_executor.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 // FIXME convert comments to doxygen format. 00032 00033 namespace std _GLIBCXX_VISIBILITY(default) 00034 { 00035 namespace __detail 00036 { 00037 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00038 00039 /** 00040 * @addtogroup regex-detail 00041 * @{ 00042 */ 00043 00044 /** 00045 * @brief Takes a regex and an input string and does the matching. 00046 * 00047 * The %_Executor class has two modes: DFS mode and BFS mode, controlled 00048 * by the template parameter %__dfs_mode. 00049 */ 00050 template<typename _BiIter, typename _Alloc, typename _TraitsT, 00051 bool __dfs_mode> 00052 class _Executor 00053 { 00054 using __search_mode = integral_constant<bool, __dfs_mode>; 00055 using __dfs = true_type; 00056 using __bfs = false_type; 00057 00058 enum class _Match_mode : unsigned char { _Exact, _Prefix }; 00059 00060 public: 00061 typedef typename iterator_traits<_BiIter>::value_type _CharT; 00062 typedef basic_regex<_CharT, _TraitsT> _RegexT; 00063 typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; 00064 typedef regex_constants::match_flag_type _FlagT; 00065 typedef typename _TraitsT::char_class_type _ClassT; 00066 typedef _NFA<_TraitsT> _NFAT; 00067 00068 public: 00069 _Executor(_BiIter __begin, 00070 _BiIter __end, 00071 _ResultsVec& __results, 00072 const _RegexT& __re, 00073 _FlagT __flags) 00074 : _M_begin(__begin), 00075 _M_end(__end), 00076 _M_re(__re), 00077 _M_nfa(*__re._M_automaton), 00078 _M_results(__results), 00079 _M_rep_count(_M_nfa.size()), 00080 _M_states(_M_nfa._M_start(), _M_nfa.size()), 00081 _M_flags((__flags & regex_constants::match_prev_avail) 00082 ? (__flags 00083 & ~regex_constants::match_not_bol 00084 & ~regex_constants::match_not_bow) 00085 : __flags) 00086 { } 00087 00088 // Set matched when string exactly matches the pattern. 00089 bool 00090 _M_match() 00091 { 00092 _M_current = _M_begin; 00093 return _M_main(_Match_mode::_Exact); 00094 } 00095 00096 // Set matched when some prefix of the string matches the pattern. 00097 bool 00098 _M_search_from_first() 00099 { 00100 _M_current = _M_begin; 00101 return _M_main(_Match_mode::_Prefix); 00102 } 00103 00104 bool 00105 _M_search(); 00106 00107 private: 00108 void 00109 _M_rep_once_more(_Match_mode __match_mode, _StateIdT); 00110 00111 void 00112 _M_dfs(_Match_mode __match_mode, _StateIdT __start); 00113 00114 bool 00115 _M_main(_Match_mode __match_mode) 00116 { return _M_main_dispatch(__match_mode, __search_mode{}); } 00117 00118 bool 00119 _M_main_dispatch(_Match_mode __match_mode, __dfs); 00120 00121 bool 00122 _M_main_dispatch(_Match_mode __match_mode, __bfs); 00123 00124 bool 00125 _M_is_word(_CharT __ch) const 00126 { 00127 static const _CharT __s[2] = { 'w' }; 00128 return _M_re._M_automaton->_M_traits.isctype 00129 (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1)); 00130 } 00131 00132 bool 00133 _M_at_begin() const 00134 { 00135 return _M_current == _M_begin 00136 && !(_M_flags & (regex_constants::match_not_bol 00137 | regex_constants::match_prev_avail)); 00138 } 00139 00140 bool 00141 _M_at_end() const 00142 { 00143 return _M_current == _M_end 00144 && !(_M_flags & regex_constants::match_not_eol); 00145 } 00146 00147 bool 00148 _M_word_boundary() const; 00149 00150 bool 00151 _M_lookahead(_State<_TraitsT> __state); 00152 00153 // Holds additional information used in BFS-mode. 00154 template<typename _SearchMode, typename _ResultsVec> 00155 struct _State_info; 00156 00157 template<typename _ResultsVec> 00158 struct _State_info<__bfs, _ResultsVec> 00159 { 00160 explicit 00161 _State_info(_StateIdT __start, size_t __n) 00162 : _M_visited_states(new bool[__n]()), _M_start(__start) 00163 { } 00164 00165 bool _M_visited(_StateIdT __i) 00166 { 00167 if (_M_visited_states[__i]) 00168 return true; 00169 _M_visited_states[__i] = true; 00170 return false; 00171 } 00172 00173 void _M_queue(_StateIdT __i, const _ResultsVec& __res) 00174 { _M_match_queue.emplace_back(__i, __res); } 00175 00176 // Dummy implementations for BFS mode. 00177 _BiIter* _M_get_sol_pos() { return nullptr; } 00178 00179 // Saves states that need to be considered for the next character. 00180 vector<pair<_StateIdT, _ResultsVec>> _M_match_queue; 00181 // Indicates which states are already visited. 00182 unique_ptr<bool[]> _M_visited_states; 00183 // To record current solution. 00184 _StateIdT _M_start; 00185 }; 00186 00187 template<typename _ResultsVec> 00188 struct _State_info<__dfs, _ResultsVec> 00189 { 00190 explicit 00191 _State_info(_StateIdT __start, size_t) : _M_start(__start) 00192 { } 00193 00194 // Dummy implementations for DFS mode. 00195 bool _M_visited(_StateIdT) const { return false; } 00196 void _M_queue(_StateIdT, const _ResultsVec&) { } 00197 00198 _BiIter* _M_get_sol_pos() { return &_M_sol_pos; } 00199 00200 // To record current solution. 00201 _StateIdT _M_start; 00202 _BiIter _M_sol_pos; 00203 }; 00204 00205 public: 00206 _ResultsVec _M_cur_results; 00207 _BiIter _M_current; 00208 _BiIter _M_begin; 00209 const _BiIter _M_end; 00210 const _RegexT& _M_re; 00211 const _NFAT& _M_nfa; 00212 _ResultsVec& _M_results; 00213 vector<pair<_BiIter, int>> _M_rep_count; 00214 _State_info<__search_mode, _ResultsVec> _M_states; 00215 _FlagT _M_flags; 00216 // Do we have a solution so far? 00217 bool _M_has_sol; 00218 }; 00219 00220 //@} regex-detail 00221 _GLIBCXX_END_NAMESPACE_VERSION 00222 } // namespace __detail 00223 } // namespace std 00224 00225 #include <bits/regex_executor.tcc>