libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex.tcc 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 // A non-standard switch to let the user pick the matching algorithm. 00032 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA 00033 // algorithm will be used. This algorithm is not enabled by default, 00034 // and cannot be used if the regex contains back-references, but has better 00035 // (polynomial instead of exponential) worst case performance. 00036 // See __regex_algo_impl below. 00037 00038 namespace std _GLIBCXX_VISIBILITY(default) 00039 { 00040 namespace __detail 00041 { 00042 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00043 00044 // Result of merging regex_match and regex_search. 00045 // 00046 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 00047 // the other one if possible, for test purpose). 00048 // 00049 // That __match_mode is true means regex_match, else regex_search. 00050 template<typename _BiIter, typename _Alloc, 00051 typename _CharT, typename _TraitsT, 00052 _RegexExecutorPolicy __policy, 00053 bool __match_mode> 00054 bool 00055 __regex_algo_impl(_BiIter __s, 00056 _BiIter __e, 00057 match_results<_BiIter, _Alloc>& __m, 00058 const basic_regex<_CharT, _TraitsT>& __re, 00059 regex_constants::match_flag_type __flags) 00060 { 00061 if (__re._M_automaton == nullptr) 00062 return false; 00063 00064 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 00065 __m._M_begin = __s; 00066 __m._M_resize(__re._M_automaton->_M_sub_count()); 00067 for (auto& __it : __res) 00068 __it.matched = false; 00069 00070 // __policy is used by testsuites so that they can use Thompson NFA 00071 // without defining a macro. Users should define 00072 // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach. 00073 bool __ret; 00074 if (!__re._M_automaton->_M_has_backref 00075 && !(__re._M_flags & regex_constants::ECMAScript) 00076 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA 00077 && __policy == _RegexExecutorPolicy::_S_alternate 00078 #endif 00079 ) 00080 { 00081 _Executor<_BiIter, _Alloc, _TraitsT, false> 00082 __executor(__s, __e, __m, __re, __flags); 00083 if (__match_mode) 00084 __ret = __executor._M_match(); 00085 else 00086 __ret = __executor._M_search(); 00087 } 00088 else 00089 { 00090 _Executor<_BiIter, _Alloc, _TraitsT, true> 00091 __executor(__s, __e, __m, __re, __flags); 00092 if (__match_mode) 00093 __ret = __executor._M_match(); 00094 else 00095 __ret = __executor._M_search(); 00096 } 00097 if (__ret) 00098 { 00099 for (auto& __it : __res) 00100 if (!__it.matched) 00101 __it.first = __it.second = __e; 00102 auto& __pre = __m._M_prefix(); 00103 auto& __suf = __m._M_suffix(); 00104 if (__match_mode) 00105 { 00106 __pre.matched = false; 00107 __pre.first = __s; 00108 __pre.second = __s; 00109 __suf.matched = false; 00110 __suf.first = __e; 00111 __suf.second = __e; 00112 } 00113 else 00114 { 00115 __pre.first = __s; 00116 __pre.second = __res[0].first; 00117 __pre.matched = (__pre.first != __pre.second); 00118 __suf.first = __res[0].second; 00119 __suf.second = __e; 00120 __suf.matched = (__suf.first != __suf.second); 00121 } 00122 } 00123 else 00124 { 00125 __m._M_resize(0); 00126 for (auto& __it : __res) 00127 { 00128 __it.matched = false; 00129 __it.first = __it.second = __e; 00130 } 00131 } 00132 return __ret; 00133 } 00134 00135 _GLIBCXX_END_NAMESPACE_VERSION 00136 } 00137 00138 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00139 00140 template<typename _Ch_type> 00141 template<typename _Fwd_iter> 00142 typename regex_traits<_Ch_type>::string_type 00143 regex_traits<_Ch_type>:: 00144 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 00145 { 00146 typedef std::ctype<char_type> __ctype_type; 00147 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00148 00149 static const char* __collatenames[] = 00150 { 00151 "NUL", 00152 "SOH", 00153 "STX", 00154 "ETX", 00155 "EOT", 00156 "ENQ", 00157 "ACK", 00158 "alert", 00159 "backspace", 00160 "tab", 00161 "newline", 00162 "vertical-tab", 00163 "form-feed", 00164 "carriage-return", 00165 "SO", 00166 "SI", 00167 "DLE", 00168 "DC1", 00169 "DC2", 00170 "DC3", 00171 "DC4", 00172 "NAK", 00173 "SYN", 00174 "ETB", 00175 "CAN", 00176 "EM", 00177 "SUB", 00178 "ESC", 00179 "IS4", 00180 "IS3", 00181 "IS2", 00182 "IS1", 00183 "space", 00184 "exclamation-mark", 00185 "quotation-mark", 00186 "number-sign", 00187 "dollar-sign", 00188 "percent-sign", 00189 "ampersand", 00190 "apostrophe", 00191 "left-parenthesis", 00192 "right-parenthesis", 00193 "asterisk", 00194 "plus-sign", 00195 "comma", 00196 "hyphen", 00197 "period", 00198 "slash", 00199 "zero", 00200 "one", 00201 "two", 00202 "three", 00203 "four", 00204 "five", 00205 "six", 00206 "seven", 00207 "eight", 00208 "nine", 00209 "colon", 00210 "semicolon", 00211 "less-than-sign", 00212 "equals-sign", 00213 "greater-than-sign", 00214 "question-mark", 00215 "commercial-at", 00216 "A", 00217 "B", 00218 "C", 00219 "D", 00220 "E", 00221 "F", 00222 "G", 00223 "H", 00224 "I", 00225 "J", 00226 "K", 00227 "L", 00228 "M", 00229 "N", 00230 "O", 00231 "P", 00232 "Q", 00233 "R", 00234 "S", 00235 "T", 00236 "U", 00237 "V", 00238 "W", 00239 "X", 00240 "Y", 00241 "Z", 00242 "left-square-bracket", 00243 "backslash", 00244 "right-square-bracket", 00245 "circumflex", 00246 "underscore", 00247 "grave-accent", 00248 "a", 00249 "b", 00250 "c", 00251 "d", 00252 "e", 00253 "f", 00254 "g", 00255 "h", 00256 "i", 00257 "j", 00258 "k", 00259 "l", 00260 "m", 00261 "n", 00262 "o", 00263 "p", 00264 "q", 00265 "r", 00266 "s", 00267 "t", 00268 "u", 00269 "v", 00270 "w", 00271 "x", 00272 "y", 00273 "z", 00274 "left-curly-bracket", 00275 "vertical-line", 00276 "right-curly-bracket", 00277 "tilde", 00278 "DEL", 00279 }; 00280 00281 string __s; 00282 for (; __first != __last; ++__first) 00283 __s += __fctyp.narrow(*__first, 0); 00284 00285 for (const auto& __it : __collatenames) 00286 if (__s == __it) 00287 return string_type(1, __fctyp.widen( 00288 static_cast<char>(&__it - __collatenames))); 00289 00290 // TODO Add digraph support: 00291 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 00292 00293 return string_type(); 00294 } 00295 00296 template<typename _Ch_type> 00297 template<typename _Fwd_iter> 00298 typename regex_traits<_Ch_type>::char_class_type 00299 regex_traits<_Ch_type>:: 00300 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 00301 { 00302 typedef std::ctype<char_type> __ctype_type; 00303 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00304 00305 // Mappings from class name to class mask. 00306 static const pair<const char*, char_class_type> __classnames[] = 00307 { 00308 {"d", ctype_base::digit}, 00309 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 00310 {"s", ctype_base::space}, 00311 {"alnum", ctype_base::alnum}, 00312 {"alpha", ctype_base::alpha}, 00313 {"blank", ctype_base::blank}, 00314 {"cntrl", ctype_base::cntrl}, 00315 {"digit", ctype_base::digit}, 00316 {"graph", ctype_base::graph}, 00317 {"lower", ctype_base::lower}, 00318 {"print", ctype_base::print}, 00319 {"punct", ctype_base::punct}, 00320 {"space", ctype_base::space}, 00321 {"upper", ctype_base::upper}, 00322 {"xdigit", ctype_base::xdigit}, 00323 }; 00324 00325 string __s; 00326 for (; __first != __last; ++__first) 00327 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 00328 00329 for (const auto& __it : __classnames) 00330 if (__s == __it.first) 00331 { 00332 if (__icase 00333 && ((__it.second 00334 & (ctype_base::lower | ctype_base::upper)) != 0)) 00335 return ctype_base::alpha; 00336 return __it.second; 00337 } 00338 return 0; 00339 } 00340 00341 template<typename _Ch_type> 00342 bool 00343 regex_traits<_Ch_type>:: 00344 isctype(_Ch_type __c, char_class_type __f) const 00345 { 00346 typedef std::ctype<char_type> __ctype_type; 00347 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00348 00349 return __fctyp.is(__f._M_base, __c) 00350 // [[:w:]] 00351 || ((__f._M_extended & _RegexMask::_S_under) 00352 && __c == __fctyp.widen('_')); 00353 } 00354 00355 template<typename _Ch_type> 00356 int 00357 regex_traits<_Ch_type>:: 00358 value(_Ch_type __ch, int __radix) const 00359 { 00360 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 00361 long __v; 00362 if (__radix == 8) 00363 __is >> std::oct; 00364 else if (__radix == 16) 00365 __is >> std::hex; 00366 __is >> __v; 00367 return __is.fail() ? -1 : __v; 00368 } 00369 00370 template<typename _Bi_iter, typename _Alloc> 00371 template<typename _Out_iter> 00372 _Out_iter match_results<_Bi_iter, _Alloc>:: 00373 format(_Out_iter __out, 00374 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 00375 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 00376 match_flag_type __flags) const 00377 { 00378 _GLIBCXX_DEBUG_ASSERT( ready() ); 00379 regex_traits<char_type> __traits; 00380 typedef std::ctype<char_type> __ctype_type; 00381 const __ctype_type& 00382 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 00383 00384 auto __output = [&](size_t __idx) 00385 { 00386 auto& __sub = (*this)[__idx]; 00387 if (__sub.matched) 00388 __out = std::copy(__sub.first, __sub.second, __out); 00389 }; 00390 00391 if (__flags & regex_constants::format_sed) 00392 { 00393 for (; __fmt_first != __fmt_last;) 00394 if (*__fmt_first == '&') 00395 { 00396 __output(0); 00397 ++__fmt_first; 00398 } 00399 else if (*__fmt_first == '\\') 00400 { 00401 if (++__fmt_first != __fmt_last 00402 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 00403 __output(__traits.value(*__fmt_first++, 10)); 00404 else 00405 *__out++ = '\\'; 00406 } 00407 else 00408 *__out++ = *__fmt_first++; 00409 } 00410 else 00411 { 00412 while (1) 00413 { 00414 auto __next = std::find(__fmt_first, __fmt_last, '$'); 00415 if (__next == __fmt_last) 00416 break; 00417 00418 __out = std::copy(__fmt_first, __next, __out); 00419 00420 auto __eat = [&](char __ch) -> bool 00421 { 00422 if (*__next == __ch) 00423 { 00424 ++__next; 00425 return true; 00426 } 00427 return false; 00428 }; 00429 00430 if (++__next == __fmt_last) 00431 *__out++ = '$'; 00432 else if (__eat('$')) 00433 *__out++ = '$'; 00434 else if (__eat('&')) 00435 __output(0); 00436 else if (__eat('`')) 00437 { 00438 auto& __sub = _M_prefix(); 00439 if (__sub.matched) 00440 __out = std::copy(__sub.first, __sub.second, __out); 00441 } 00442 else if (__eat('\'')) 00443 { 00444 auto& __sub = _M_suffix(); 00445 if (__sub.matched) 00446 __out = std::copy(__sub.first, __sub.second, __out); 00447 } 00448 else if (__fctyp.is(__ctype_type::digit, *__next)) 00449 { 00450 long __num = __traits.value(*__next, 10); 00451 if (++__next != __fmt_last 00452 && __fctyp.is(__ctype_type::digit, *__next)) 00453 { 00454 __num *= 10; 00455 __num += __traits.value(*__next++, 10); 00456 } 00457 if (0 <= __num && __num < this->size()) 00458 __output(__num); 00459 } 00460 else 00461 *__out++ = '$'; 00462 __fmt_first = __next; 00463 } 00464 __out = std::copy(__fmt_first, __fmt_last, __out); 00465 } 00466 return __out; 00467 } 00468 00469 template<typename _Out_iter, typename _Bi_iter, 00470 typename _Rx_traits, typename _Ch_type> 00471 _Out_iter 00472 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 00473 const basic_regex<_Ch_type, _Rx_traits>& __e, 00474 const _Ch_type* __fmt, 00475 regex_constants::match_flag_type __flags) 00476 { 00477 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 00478 _IterT __i(__first, __last, __e, __flags); 00479 _IterT __end; 00480 if (__i == __end) 00481 { 00482 if (!(__flags & regex_constants::format_no_copy)) 00483 __out = std::copy(__first, __last, __out); 00484 } 00485 else 00486 { 00487 sub_match<_Bi_iter> __last; 00488 auto __len = char_traits<_Ch_type>::length(__fmt); 00489 for (; __i != __end; ++__i) 00490 { 00491 if (!(__flags & regex_constants::format_no_copy)) 00492 __out = std::copy(__i->prefix().first, __i->prefix().second, 00493 __out); 00494 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 00495 __last = __i->suffix(); 00496 if (__flags & regex_constants::format_first_only) 00497 break; 00498 } 00499 if (!(__flags & regex_constants::format_no_copy)) 00500 __out = std::copy(__last.first, __last.second, __out); 00501 } 00502 return __out; 00503 } 00504 00505 template<typename _Bi_iter, 00506 typename _Ch_type, 00507 typename _Rx_traits> 00508 bool 00509 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00510 operator==(const regex_iterator& __rhs) const 00511 { 00512 return (_M_match.empty() && __rhs._M_match.empty()) 00513 || (_M_begin == __rhs._M_begin 00514 && _M_end == __rhs._M_end 00515 && _M_pregex == __rhs._M_pregex 00516 && _M_flags == __rhs._M_flags 00517 && _M_match[0] == __rhs._M_match[0]); 00518 } 00519 00520 template<typename _Bi_iter, 00521 typename _Ch_type, 00522 typename _Rx_traits> 00523 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00524 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00525 operator++() 00526 { 00527 // In all cases in which the call to regex_search returns true, 00528 // match.prefix().first shall be equal to the previous value of 00529 // match[0].second, and for each index i in the half-open range 00530 // [0, match.size()) for which match[i].matched is true, 00531 // match[i].position() shall return distance(begin, match[i].first). 00532 // [28.12.1.4.5] 00533 if (_M_match[0].matched) 00534 { 00535 auto __start = _M_match[0].second; 00536 auto __prefix_first = _M_match[0].second; 00537 if (_M_match[0].first == _M_match[0].second) 00538 { 00539 if (__start == _M_end) 00540 { 00541 _M_match = value_type(); 00542 return *this; 00543 } 00544 else 00545 { 00546 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 00547 _M_flags 00548 | regex_constants::match_not_null 00549 | regex_constants::match_continuous)) 00550 { 00551 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 00552 auto& __prefix = _M_match._M_prefix(); 00553 __prefix.first = __prefix_first; 00554 __prefix.matched = __prefix.first != __prefix.second; 00555 // [28.12.1.4.5] 00556 _M_match._M_begin = _M_begin; 00557 return *this; 00558 } 00559 else 00560 ++__start; 00561 } 00562 } 00563 _M_flags |= regex_constants::match_prev_avail; 00564 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 00565 { 00566 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 00567 auto& __prefix = _M_match._M_prefix(); 00568 __prefix.first = __prefix_first; 00569 __prefix.matched = __prefix.first != __prefix.second; 00570 // [28.12.1.4.5] 00571 _M_match._M_begin = _M_begin; 00572 } 00573 else 00574 _M_match = value_type(); 00575 } 00576 return *this; 00577 } 00578 00579 template<typename _Bi_iter, 00580 typename _Ch_type, 00581 typename _Rx_traits> 00582 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00583 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00584 operator=(const regex_token_iterator& __rhs) 00585 { 00586 _M_position = __rhs._M_position; 00587 _M_subs = __rhs._M_subs; 00588 _M_n = __rhs._M_n; 00589 _M_suffix = __rhs._M_suffix; 00590 _M_has_m1 = __rhs._M_has_m1; 00591 _M_normalize_result(); 00592 return *this; 00593 } 00594 00595 template<typename _Bi_iter, 00596 typename _Ch_type, 00597 typename _Rx_traits> 00598 bool 00599 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00600 operator==(const regex_token_iterator& __rhs) const 00601 { 00602 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 00603 return true; 00604 if (_M_suffix.matched && __rhs._M_suffix.matched 00605 && _M_suffix == __rhs._M_suffix) 00606 return true; 00607 if (_M_end_of_seq() || _M_suffix.matched 00608 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 00609 return false; 00610 return _M_position == __rhs._M_position 00611 && _M_n == __rhs._M_n 00612 && _M_subs == __rhs._M_subs; 00613 } 00614 00615 template<typename _Bi_iter, 00616 typename _Ch_type, 00617 typename _Rx_traits> 00618 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00619 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00620 operator++() 00621 { 00622 _Position __prev = _M_position; 00623 if (_M_suffix.matched) 00624 *this = regex_token_iterator(); 00625 else if (_M_n + 1 < _M_subs.size()) 00626 { 00627 _M_n++; 00628 _M_result = &_M_current_match(); 00629 } 00630 else 00631 { 00632 _M_n = 0; 00633 ++_M_position; 00634 if (_M_position != _Position()) 00635 _M_result = &_M_current_match(); 00636 else if (_M_has_m1 && __prev->suffix().length() != 0) 00637 { 00638 _M_suffix.matched = true; 00639 _M_suffix.first = __prev->suffix().first; 00640 _M_suffix.second = __prev->suffix().second; 00641 _M_result = &_M_suffix; 00642 } 00643 else 00644 *this = regex_token_iterator(); 00645 } 00646 return *this; 00647 } 00648 00649 template<typename _Bi_iter, 00650 typename _Ch_type, 00651 typename _Rx_traits> 00652 void 00653 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00654 _M_init(_Bi_iter __a, _Bi_iter __b) 00655 { 00656 _M_has_m1 = false; 00657 for (auto __it : _M_subs) 00658 if (__it == -1) 00659 { 00660 _M_has_m1 = true; 00661 break; 00662 } 00663 if (_M_position != _Position()) 00664 _M_result = &_M_current_match(); 00665 else if (_M_has_m1) 00666 { 00667 _M_suffix.matched = true; 00668 _M_suffix.first = __a; 00669 _M_suffix.second = __b; 00670 _M_result = &_M_suffix; 00671 } 00672 else 00673 _M_result = nullptr; 00674 } 00675 00676 _GLIBCXX_END_NAMESPACE_VERSION 00677 } // namespace 00678