libstdc++
|
00001 // wstring_convert implementation -*- C++ -*- 00002 00003 // Copyright (C) 2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/locale_conv.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 #ifndef _LOCALE_CONV_H 00031 #define _LOCALE_CONV_H 1 00032 00033 #if __cplusplus < 201103L 00034 # include <bits/c++0x_warning.h> 00035 #else 00036 00037 #include <streambuf> 00038 #include "stringfwd.h" 00039 #include "allocator.h" 00040 #include "codecvt.h" 00041 #include "unique_ptr.h" 00042 00043 namespace std _GLIBCXX_VISIBILITY(default) 00044 { 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 /** 00048 * @addtogroup locales 00049 * @{ 00050 */ 00051 00052 template<typename _OutStr, typename _InChar, typename _Codecvt, 00053 typename _State, typename _Fn> 00054 bool 00055 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 00056 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 00057 size_t& __count, _Fn __fn) 00058 { 00059 if (__first == __last) 00060 { 00061 __outstr.clear(); 00062 __count = 0; 00063 return true; 00064 } 00065 00066 size_t __outchars = 0; 00067 auto __next = __first; 00068 const auto __maxlen = __cvt.max_length() + 1; 00069 00070 codecvt_base::result __result; 00071 do 00072 { 00073 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 00074 auto __outnext = &__outstr.front() + __outchars; 00075 auto const __outlast = &__outstr.back() + 1; 00076 __result = (__cvt.*__fn)(__state, __next, __last, __next, 00077 __outnext, __outlast, __outnext); 00078 __outchars = __outnext - &__outstr.front(); 00079 } 00080 while (__result == codecvt_base::partial && __next != __last 00081 && (__outstr.size() - __outchars) < __maxlen); 00082 00083 if (__result == codecvt_base::error) 00084 return false; 00085 00086 if (__result == codecvt_base::noconv) 00087 { 00088 __outstr.assign(__first, __last); 00089 __count = __last - __first; 00090 } 00091 else 00092 { 00093 __outstr.resize(__outchars); 00094 __count = __next - __first; 00095 } 00096 00097 return true; 00098 } 00099 00100 // Convert narrow character string to wide. 00101 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00102 inline bool 00103 __str_codecvt_in(const char* __first, const char* __last, 00104 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00105 const codecvt<_CharT, char, _State>& __cvt, 00106 _State& __state, size_t& __count) 00107 { 00108 using _Codecvt = codecvt<_CharT, char, _State>; 00109 using _ConvFn 00110 = codecvt_base::result 00111 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 00112 _CharT*, _CharT*, _CharT*&) const; 00113 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 00114 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00115 __count, __fn); 00116 } 00117 00118 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00119 inline bool 00120 __str_codecvt_in(const char* __first, const char* __last, 00121 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00122 const codecvt<_CharT, char, _State>& __cvt) 00123 { 00124 _State __state = {}; 00125 size_t __n; 00126 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 00127 } 00128 00129 // Convert wide character string to narrow. 00130 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00131 inline bool 00132 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00133 basic_string<char, _Traits, _Alloc>& __outstr, 00134 const codecvt<_CharT, char, _State>& __cvt, 00135 _State& __state, size_t& __count) 00136 { 00137 using _Codecvt = codecvt<_CharT, char, _State>; 00138 using _ConvFn 00139 = codecvt_base::result 00140 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00141 char*, char*, char*&) const; 00142 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 00143 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00144 __count, __fn); 00145 } 00146 00147 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00148 inline bool 00149 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00150 basic_string<char, _Traits, _Alloc>& __outstr, 00151 const codecvt<_CharT, char, _State>& __cvt) 00152 { 00153 _State __state = {}; 00154 size_t __n; 00155 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00156 } 00157 00158 #ifdef _GLIBCXX_USE_WCHAR_T 00159 00160 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00161 00162 /// String conversions 00163 template<typename _Codecvt, typename _Elem = wchar_t, 00164 typename _Wide_alloc = allocator<_Elem>, 00165 typename _Byte_alloc = allocator<char>> 00166 class wstring_convert 00167 { 00168 public: 00169 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 00170 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 00171 typedef typename _Codecvt::state_type state_type; 00172 typedef typename wide_string::traits_type::int_type int_type; 00173 00174 /** Default constructor. 00175 * 00176 * @param __pcvt The facet to use for conversions. 00177 * 00178 * Takes ownership of @p __pcvt and will delete it in the destructor. 00179 */ 00180 explicit 00181 wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) 00182 { 00183 if (!_M_cvt) 00184 __throw_logic_error("wstring_convert"); 00185 } 00186 00187 /** Construct with an initial converstion state. 00188 * 00189 * @param __pcvt The facet to use for conversions. 00190 * @param __state Initial conversion state. 00191 * 00192 * Takes ownership of @p __pcvt and will delete it in the destructor. 00193 * The object's conversion state will persist between conversions. 00194 */ 00195 wstring_convert(_Codecvt* __pcvt, state_type __state) 00196 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 00197 { 00198 if (!_M_cvt) 00199 __throw_logic_error("wstring_convert"); 00200 } 00201 00202 /** Construct with error strings. 00203 * 00204 * @param __byte_err A string to return on failed conversions. 00205 * @param __wide_err A wide string to return on failed conversions. 00206 */ 00207 explicit 00208 wstring_convert(const byte_string& __byte_err, 00209 const wide_string& __wide_err = wide_string()) 00210 : _M_cvt(new _Codecvt), 00211 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 00212 _M_with_strings(true) 00213 { 00214 if (!_M_cvt) 00215 __throw_logic_error("wstring_convert"); 00216 } 00217 00218 ~wstring_convert() = default; 00219 00220 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00221 // 2176. Special members for wstring_convert and wbuffer_convert 00222 wstring_convert(const wstring_convert&) = delete; 00223 wstring_convert& operator=(const wstring_convert&) = delete; 00224 00225 /// @{ Convert from bytes. 00226 wide_string 00227 from_bytes(char __byte) 00228 { 00229 char __bytes[2] = { __byte }; 00230 return from_bytes(__bytes, __bytes+1); 00231 } 00232 00233 wide_string 00234 from_bytes(const char* __ptr) 00235 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 00236 00237 wide_string 00238 from_bytes(const byte_string& __str) 00239 { 00240 auto __ptr = __str.data(); 00241 return from_bytes(__ptr, __ptr + __str.size()); 00242 } 00243 00244 wide_string 00245 from_bytes(const char* __first, const char* __last) 00246 { 00247 if (!_M_with_cvtstate) 00248 _M_state = state_type(); 00249 wide_string __out{ _M_wide_err_string.get_allocator() }; 00250 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 00251 _M_count)) 00252 return __out; 00253 if (_M_with_strings) 00254 return _M_wide_err_string; 00255 __throw_range_error("wstring_convert::from_bytes"); 00256 } 00257 /// @} 00258 00259 /// @{ Convert to bytes. 00260 byte_string 00261 to_bytes(_Elem __wchar) 00262 { 00263 _Elem __wchars[2] = { __wchar }; 00264 return to_bytes(__wchars, __wchars+1); 00265 } 00266 00267 byte_string 00268 to_bytes(const _Elem* __ptr) 00269 { 00270 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 00271 } 00272 00273 byte_string 00274 to_bytes(const wide_string& __wstr) 00275 { 00276 auto __ptr = __wstr.data(); 00277 return to_bytes(__ptr, __ptr + __wstr.size()); 00278 } 00279 00280 byte_string 00281 to_bytes(const _Elem* __first, const _Elem* __last) 00282 { 00283 if (!_M_with_cvtstate) 00284 _M_state = state_type(); 00285 byte_string __out{ _M_byte_err_string.get_allocator() }; 00286 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 00287 _M_count)) 00288 return __out; 00289 if (_M_with_strings) 00290 return _M_byte_err_string; 00291 __throw_range_error("wstring_convert::to_bytes"); 00292 } 00293 /// @} 00294 00295 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00296 // 2174. wstring_convert::converted() should be noexcept 00297 /// The number of elements successfully converted in the last conversion. 00298 size_t converted() const noexcept { return _M_count; } 00299 00300 /// The final conversion state of the last conversion. 00301 state_type state() const { return _M_state; } 00302 00303 private: 00304 unique_ptr<_Codecvt> _M_cvt; 00305 byte_string _M_byte_err_string; 00306 wide_string _M_wide_err_string; 00307 state_type _M_state = state_type(); 00308 size_t _M_count = 0; 00309 bool _M_with_cvtstate = false; 00310 bool _M_with_strings = false; 00311 }; 00312 00313 _GLIBCXX_END_NAMESPACE_CXX11 00314 00315 /// Buffer conversions 00316 template<typename _Codecvt, typename _Elem = wchar_t, 00317 typename _Tr = char_traits<_Elem>> 00318 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 00319 { 00320 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 00321 00322 public: 00323 typedef typename _Codecvt::state_type state_type; 00324 00325 /** Default constructor. 00326 * 00327 * @param __bytebuf The underlying byte stream buffer. 00328 * @param __pcvt The facet to use for conversions. 00329 * @param __state Initial conversion state. 00330 * 00331 * Takes ownership of @p __pcvt and will delete it in the destructor. 00332 */ 00333 explicit 00334 wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, 00335 state_type __state = state_type()) 00336 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 00337 { 00338 if (!_M_cvt) 00339 __throw_logic_error("wbuffer_convert"); 00340 00341 _M_always_noconv = _M_cvt->always_noconv(); 00342 00343 if (_M_buf) 00344 { 00345 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 00346 this->setg(_M_get_area + _S_putback_length, 00347 _M_get_area + _S_putback_length, 00348 _M_get_area + _S_putback_length); 00349 } 00350 } 00351 00352 ~wbuffer_convert() = default; 00353 00354 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00355 // 2176. Special members for wstring_convert and wbuffer_convert 00356 wbuffer_convert(const wbuffer_convert&) = delete; 00357 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 00358 00359 streambuf* rdbuf() const noexcept { return _M_buf; } 00360 00361 streambuf* 00362 rdbuf(streambuf *__bytebuf) noexcept 00363 { 00364 auto __prev = _M_buf; 00365 _M_buf = __bytebuf; 00366 return __prev; 00367 } 00368 00369 /// The conversion state following the last conversion. 00370 state_type state() const noexcept { return _M_state; } 00371 00372 protected: 00373 int 00374 sync() 00375 { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; } 00376 00377 typename _Wide_streambuf::int_type 00378 overflow(typename _Wide_streambuf::int_type __out) 00379 { 00380 if (!_M_buf || !_M_conv_put()) 00381 return _Tr::eof(); 00382 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 00383 return this->sputc(__out); 00384 return _Tr::not_eof(__out); 00385 } 00386 00387 typename _Wide_streambuf::int_type 00388 underflow() 00389 { 00390 if (!_M_buf) 00391 return _Tr::eof(); 00392 00393 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 00394 return _Tr::to_int_type(*this->gptr()); 00395 else 00396 return _Tr::eof(); 00397 } 00398 00399 streamsize 00400 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 00401 { 00402 if (!_M_buf || __n == 0) 00403 return 0; 00404 streamsize __done = 0; 00405 do 00406 { 00407 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 00408 __n - __done); 00409 _Tr::copy(this->pptr(), __s + __done, __nn); 00410 this->pbump(__nn); 00411 __done += __nn; 00412 } while (__done < __n && _M_conv_put()); 00413 return __done; 00414 } 00415 00416 private: 00417 // fill the get area from converted contents of the byte stream buffer 00418 bool 00419 _M_conv_get() 00420 { 00421 const streamsize __pb1 = this->gptr() - this->eback(); 00422 const streamsize __pb2 = _S_putback_length; 00423 const streamsize __npb = std::min(__pb1, __pb2); 00424 00425 _Tr::move(_M_get_area + _S_putback_length - __npb, 00426 this->gptr() - __npb, __npb); 00427 00428 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 00429 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 00430 if (__nbytes < 1) 00431 __nbytes == 1; 00432 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 00433 if (__nbytes < 1) 00434 return false; 00435 __nbytes += _M_unconv; 00436 00437 // convert _M_get_buf into _M_get_area 00438 00439 _Elem* __outbuf = _M_get_area + _S_putback_length; 00440 _Elem* __outnext = __outbuf; 00441 const char* __bnext = _M_get_buf; 00442 00443 codecvt_base::result __result; 00444 if (_M_always_noconv) 00445 __result = codecvt_base::noconv; 00446 else 00447 { 00448 _Elem* __outend = _M_get_area + _S_buffer_length; 00449 00450 __result = _M_cvt->in(_M_state, 00451 __bnext, __bnext + __nbytes, __bnext, 00452 __outbuf, __outend, __outnext); 00453 } 00454 00455 if (__result == codecvt_base::noconv) 00456 { 00457 // cast is safe because noconv means _Elem is same type as char 00458 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 00459 _Tr::copy(__outbuf, __get_buf, __nbytes); 00460 _M_unconv = 0; 00461 return true; 00462 } 00463 00464 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 00465 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 00466 00467 this->setg(__outbuf, __outbuf, __outnext); 00468 00469 return __result != codecvt_base::error; 00470 } 00471 00472 // unused 00473 bool 00474 _M_put(...) 00475 { return false; } 00476 00477 bool 00478 _M_put(const char* __p, streamsize __n) 00479 { 00480 if (_M_buf->sputn(__p, __n) < __n) 00481 return false; 00482 } 00483 00484 // convert the put area and write to the byte stream buffer 00485 bool 00486 _M_conv_put() 00487 { 00488 _Elem* const __first = this->pbase(); 00489 const _Elem* const __last = this->pptr(); 00490 const streamsize __pending = __last - __first; 00491 00492 if (_M_always_noconv) 00493 return _M_put(__first, __pending); 00494 00495 char __outbuf[2 * _S_buffer_length]; 00496 00497 const _Elem* __next = __first; 00498 const _Elem* __start; 00499 do 00500 { 00501 __start = __next; 00502 char* __outnext = __outbuf; 00503 char* const __outlast = __outbuf + sizeof(__outbuf); 00504 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 00505 __outnext, __outlast, __outnext); 00506 if (__result == codecvt_base::error) 00507 return false; 00508 else if (__result == codecvt_base::noconv) 00509 return _M_put(__next, __pending); 00510 00511 if (!_M_put(__outbuf, __outnext - __outbuf)) 00512 return false; 00513 } 00514 while (__next != __last && __next != __start); 00515 00516 if (__next != __last) 00517 _Tr::move(__first, __next, __last - __next); 00518 00519 this->pbump(__first - __next); 00520 return __next != __first; 00521 } 00522 00523 streambuf* _M_buf; 00524 unique_ptr<_Codecvt> _M_cvt; 00525 state_type _M_state; 00526 00527 static const streamsize _S_buffer_length = 32; 00528 static const streamsize _S_putback_length = 3; 00529 _Elem _M_put_area[_S_buffer_length]; 00530 _Elem _M_get_area[_S_buffer_length]; 00531 streamsize _M_unconv = 0; 00532 char _M_get_buf[_S_buffer_length-_S_putback_length]; 00533 bool _M_always_noconv; 00534 }; 00535 00536 #endif // _GLIBCXX_USE_WCHAR_T 00537 00538 /// @} group locales 00539 00540 _GLIBCXX_END_NAMESPACE_VERSION 00541 } // namespace 00542 00543 #endif // __cplusplus 00544 00545 #endif /* _LOCALE_CONV_H */