libstdc++
locale_conv.h
Go to the documentation of this file.
00001 // wstring_convert implementation -*- C++ -*-
00002 
00003 // Copyright (C) 2015 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/locale_conv.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 #ifndef _LOCALE_CONV_H
00031 #define _LOCALE_CONV_H 1
00032 
00033 #if __cplusplus < 201103L
00034 # include <bits/c++0x_warning.h>
00035 #else
00036 
00037 #include <streambuf>
00038 #include "stringfwd.h"
00039 #include "allocator.h"
00040 #include "codecvt.h"
00041 #include "unique_ptr.h"
00042 
00043 namespace std _GLIBCXX_VISIBILITY(default)
00044 {
00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00046 
00047   /**
00048    * @addtogroup locales
00049    * @{
00050    */
00051 
00052   template<typename _OutStr, typename _InChar, typename _Codecvt,
00053            typename _State, typename _Fn>
00054     bool
00055     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
00056                      _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
00057                      size_t& __count, _Fn __fn)
00058     {
00059       if (__first == __last)
00060         {
00061           __outstr.clear();
00062           __count = 0;
00063           return true;
00064         }
00065 
00066       size_t __outchars = 0;
00067       auto __next = __first;
00068       const auto __maxlen = __cvt.max_length() + 1;
00069 
00070       codecvt_base::result __result;
00071       do
00072         {
00073           __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
00074           auto __outnext = &__outstr.front() + __outchars;
00075           auto const __outlast = &__outstr.back() + 1;
00076           __result = (__cvt.*__fn)(__state, __next, __last, __next,
00077                                         __outnext, __outlast, __outnext);
00078           __outchars = __outnext - &__outstr.front();
00079         }
00080       while (__result == codecvt_base::partial && __next != __last
00081              && (__outstr.size() - __outchars) < __maxlen);
00082 
00083       if (__result == codecvt_base::error)
00084         return false;
00085 
00086       if (__result == codecvt_base::noconv)
00087         {
00088           __outstr.assign(__first, __last);
00089           __count = __last - __first;
00090         }
00091       else
00092         {
00093           __outstr.resize(__outchars);
00094           __count = __next - __first;
00095         }
00096 
00097       return true;
00098     }
00099 
00100   // Convert narrow character string to wide.
00101   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00102     inline bool
00103     __str_codecvt_in(const char* __first, const char* __last,
00104                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00105                      const codecvt<_CharT, char, _State>& __cvt,
00106                      _State& __state, size_t& __count)
00107     {
00108       using _Codecvt = codecvt<_CharT, char, _State>;
00109       using _ConvFn
00110         = codecvt_base::result
00111           (_Codecvt::*)(_State&, const char*, const char*, const char*&,
00112                         _CharT*, _CharT*, _CharT*&) const;
00113       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
00114       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00115                               __count, __fn);
00116     }
00117 
00118   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00119     inline bool
00120     __str_codecvt_in(const char* __first, const char* __last,
00121                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00122                      const codecvt<_CharT, char, _State>& __cvt)
00123     {
00124       _State __state = {};
00125       size_t __n;
00126       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
00127     }
00128 
00129   // Convert wide character string to narrow.
00130   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00131     inline bool
00132     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00133                       basic_string<char, _Traits, _Alloc>& __outstr,
00134                       const codecvt<_CharT, char, _State>& __cvt,
00135                       _State& __state, size_t& __count)
00136     {
00137       using _Codecvt = codecvt<_CharT, char, _State>;
00138       using _ConvFn
00139         = codecvt_base::result
00140           (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
00141                         char*, char*, char*&) const;
00142       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
00143       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00144                               __count, __fn);
00145     }
00146 
00147   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00148     inline bool
00149     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00150                       basic_string<char, _Traits, _Alloc>& __outstr,
00151                       const codecvt<_CharT, char, _State>& __cvt)
00152     {
00153       _State __state = {};
00154       size_t __n;
00155       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
00156     }
00157 
00158 #ifdef _GLIBCXX_USE_WCHAR_T
00159 
00160 _GLIBCXX_BEGIN_NAMESPACE_CXX11
00161 
00162   /// String conversions
00163   template<typename _Codecvt, typename _Elem = wchar_t,
00164            typename _Wide_alloc = allocator<_Elem>,
00165            typename _Byte_alloc = allocator<char>>
00166     class wstring_convert
00167     {
00168     public:
00169       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
00170       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
00171       typedef typename _Codecvt::state_type                        state_type;
00172       typedef typename wide_string::traits_type::int_type          int_type;
00173 
00174       /** Default constructor.
00175        *
00176        * @param  __pcvt The facet to use for conversions.
00177        *
00178        * Takes ownership of @p __pcvt and will delete it in the destructor.
00179        */
00180       explicit
00181       wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
00182       {
00183         if (!_M_cvt)
00184           __throw_logic_error("wstring_convert");
00185       }
00186 
00187       /** Construct with an initial converstion state.
00188        *
00189        * @param  __pcvt The facet to use for conversions.
00190        * @param  __state Initial conversion state.
00191        *
00192        * Takes ownership of @p __pcvt and will delete it in the destructor.
00193        * The object's conversion state will persist between conversions.
00194        */
00195       wstring_convert(_Codecvt* __pcvt, state_type __state)
00196       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
00197       {
00198         if (!_M_cvt)
00199           __throw_logic_error("wstring_convert");
00200       }
00201 
00202       /** Construct with error strings.
00203        *
00204        * @param  __byte_err A string to return on failed conversions.
00205        * @param  __wide_err A wide string to return on failed conversions.
00206        */
00207       explicit
00208       wstring_convert(const byte_string& __byte_err,
00209                       const wide_string& __wide_err = wide_string())
00210       : _M_cvt(new _Codecvt),
00211         _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
00212         _M_with_strings(true)
00213       {
00214         if (!_M_cvt)
00215           __throw_logic_error("wstring_convert");
00216       }
00217 
00218       ~wstring_convert() = default;
00219 
00220       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00221       // 2176. Special members for wstring_convert and wbuffer_convert
00222       wstring_convert(const wstring_convert&) = delete;
00223       wstring_convert& operator=(const wstring_convert&) = delete;
00224 
00225       /// @{ Convert from bytes.
00226       wide_string
00227       from_bytes(char __byte)
00228       {
00229         char __bytes[2] = { __byte };
00230         return from_bytes(__bytes, __bytes+1);
00231       }
00232 
00233       wide_string
00234       from_bytes(const char* __ptr)
00235       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
00236 
00237       wide_string
00238       from_bytes(const byte_string& __str)
00239       {
00240         auto __ptr = __str.data();
00241         return from_bytes(__ptr, __ptr + __str.size());
00242       }
00243 
00244       wide_string
00245       from_bytes(const char* __first, const char* __last)
00246       {
00247         if (!_M_with_cvtstate)
00248           _M_state = state_type();
00249         wide_string __out{ _M_wide_err_string.get_allocator() };
00250         if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
00251                              _M_count))
00252           return __out;
00253         if (_M_with_strings)
00254           return _M_wide_err_string;
00255         __throw_range_error("wstring_convert::from_bytes");
00256       }
00257       /// @}
00258 
00259       /// @{ Convert to bytes.
00260       byte_string
00261       to_bytes(_Elem __wchar)
00262       {
00263         _Elem __wchars[2] = { __wchar };
00264         return to_bytes(__wchars, __wchars+1);
00265       }
00266 
00267       byte_string
00268       to_bytes(const _Elem* __ptr)
00269       {
00270         return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
00271       }
00272 
00273       byte_string
00274       to_bytes(const wide_string& __wstr)
00275       {
00276         auto __ptr = __wstr.data();
00277         return to_bytes(__ptr, __ptr + __wstr.size());
00278       }
00279 
00280       byte_string
00281       to_bytes(const _Elem* __first, const _Elem* __last)
00282       {
00283         if (!_M_with_cvtstate)
00284           _M_state = state_type();
00285         byte_string __out{ _M_byte_err_string.get_allocator() };
00286         if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
00287                               _M_count))
00288           return __out;
00289         if (_M_with_strings)
00290           return _M_byte_err_string;
00291         __throw_range_error("wstring_convert::to_bytes");
00292       }
00293       /// @}
00294 
00295       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00296       // 2174. wstring_convert::converted() should be noexcept
00297       /// The number of elements successfully converted in the last conversion.
00298       size_t converted() const noexcept { return _M_count; }
00299 
00300       /// The final conversion state of the last conversion.
00301       state_type state() const { return _M_state; }
00302 
00303     private:
00304       unique_ptr<_Codecvt>      _M_cvt;
00305       byte_string               _M_byte_err_string;
00306       wide_string               _M_wide_err_string;
00307       state_type                _M_state = state_type();
00308       size_t                    _M_count = 0;
00309       bool                      _M_with_cvtstate = false;
00310       bool                      _M_with_strings = false;
00311     };
00312 
00313 _GLIBCXX_END_NAMESPACE_CXX11
00314 
00315   /// Buffer conversions
00316   template<typename _Codecvt, typename _Elem = wchar_t,
00317            typename _Tr = char_traits<_Elem>>
00318     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
00319     {
00320       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
00321 
00322     public:
00323       typedef typename _Codecvt::state_type state_type;
00324 
00325       /** Default constructor.
00326        *
00327        * @param  __bytebuf The underlying byte stream buffer.
00328        * @param  __pcvt    The facet to use for conversions.
00329        * @param  __state   Initial conversion state.
00330        *
00331        * Takes ownership of @p __pcvt and will delete it in the destructor.
00332        */
00333       explicit
00334       wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
00335                       state_type __state = state_type())
00336       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
00337       {
00338         if (!_M_cvt)
00339           __throw_logic_error("wbuffer_convert");
00340 
00341         _M_always_noconv = _M_cvt->always_noconv();
00342 
00343         if (_M_buf)
00344           {
00345             this->setp(_M_put_area, _M_put_area + _S_buffer_length);
00346             this->setg(_M_get_area + _S_putback_length,
00347                        _M_get_area + _S_putback_length,
00348                        _M_get_area + _S_putback_length);
00349           }
00350       }
00351 
00352       ~wbuffer_convert() = default;
00353 
00354       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00355       // 2176. Special members for wstring_convert and wbuffer_convert
00356       wbuffer_convert(const wbuffer_convert&) = delete;
00357       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
00358 
00359       streambuf* rdbuf() const noexcept { return _M_buf; }
00360 
00361       streambuf*
00362       rdbuf(streambuf *__bytebuf) noexcept
00363       {
00364         auto __prev = _M_buf;
00365         _M_buf = __bytebuf;
00366         return __prev;
00367       }
00368 
00369       /// The conversion state following the last conversion.
00370       state_type state() const noexcept { return _M_state; }
00371 
00372     protected:
00373       int
00374       sync()
00375       { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
00376 
00377       typename _Wide_streambuf::int_type
00378       overflow(typename _Wide_streambuf::int_type __out)
00379       {
00380         if (!_M_buf || !_M_conv_put())
00381           return _Tr::eof();
00382         else if (!_Tr::eq_int_type(__out, _Tr::eof()))
00383           return this->sputc(__out);
00384         return _Tr::not_eof(__out);
00385       }
00386 
00387       typename _Wide_streambuf::int_type
00388       underflow()
00389       {
00390         if (!_M_buf)
00391           return _Tr::eof();
00392 
00393         if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
00394           return _Tr::to_int_type(*this->gptr());
00395         else
00396           return _Tr::eof();
00397       }
00398 
00399       streamsize
00400       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
00401       {
00402         if (!_M_buf || __n == 0)
00403           return 0;
00404         streamsize __done = 0;
00405         do
00406         {
00407           auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
00408                                            __n - __done);
00409           _Tr::copy(this->pptr(), __s + __done, __nn);
00410           this->pbump(__nn);
00411           __done += __nn;
00412         } while (__done < __n && _M_conv_put());
00413         return __done;
00414       }
00415 
00416     private:
00417       // fill the get area from converted contents of the byte stream buffer
00418       bool
00419       _M_conv_get()
00420       {
00421         const streamsize __pb1 = this->gptr() - this->eback();
00422         const streamsize __pb2 = _S_putback_length;
00423         const streamsize __npb = std::min(__pb1, __pb2);
00424 
00425         _Tr::move(_M_get_area + _S_putback_length - __npb,
00426                   this->gptr() - __npb, __npb);
00427 
00428         streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
00429         __nbytes = std::min(__nbytes, _M_buf->in_avail());
00430         if (__nbytes < 1)
00431           __nbytes == 1;
00432         __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
00433         if (__nbytes < 1)
00434           return false;
00435         __nbytes += _M_unconv;
00436 
00437         // convert _M_get_buf into _M_get_area
00438 
00439         _Elem* __outbuf = _M_get_area + _S_putback_length;
00440         _Elem* __outnext = __outbuf;
00441         const char* __bnext = _M_get_buf;
00442 
00443         codecvt_base::result __result;
00444         if (_M_always_noconv)
00445           __result = codecvt_base::noconv;
00446         else
00447           {
00448             _Elem* __outend = _M_get_area + _S_buffer_length;
00449 
00450             __result = _M_cvt->in(_M_state,
00451                                   __bnext, __bnext + __nbytes, __bnext,
00452                                   __outbuf, __outend, __outnext);
00453           }
00454 
00455         if (__result == codecvt_base::noconv)
00456           {
00457             // cast is safe because noconv means _Elem is same type as char
00458             auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
00459             _Tr::copy(__outbuf, __get_buf, __nbytes);
00460             _M_unconv = 0;
00461             return true;
00462           }
00463 
00464         if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
00465           char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
00466 
00467         this->setg(__outbuf, __outbuf, __outnext);
00468 
00469         return __result != codecvt_base::error;
00470       }
00471 
00472       // unused
00473       bool
00474       _M_put(...)
00475       { return false; }
00476 
00477       bool
00478       _M_put(const char* __p, streamsize __n)
00479       {
00480         if (_M_buf->sputn(__p, __n) < __n)
00481           return false;
00482       }
00483 
00484       // convert the put area and write to the byte stream buffer
00485       bool
00486       _M_conv_put()
00487       {
00488         _Elem* const __first = this->pbase();
00489         const _Elem* const __last = this->pptr();
00490         const streamsize __pending = __last - __first;
00491 
00492         if (_M_always_noconv)
00493           return _M_put(__first, __pending);
00494 
00495         char __outbuf[2 * _S_buffer_length];
00496 
00497         const _Elem* __next = __first;
00498         const _Elem* __start;
00499         do
00500           {
00501             __start = __next;
00502             char* __outnext = __outbuf;
00503             char* const __outlast = __outbuf + sizeof(__outbuf);
00504             auto __result = _M_cvt->out(_M_state, __next, __last, __next,
00505                                         __outnext, __outlast, __outnext);
00506             if (__result == codecvt_base::error)
00507               return false;
00508             else if (__result == codecvt_base::noconv)
00509               return _M_put(__next, __pending);
00510 
00511             if (!_M_put(__outbuf, __outnext - __outbuf))
00512               return false;
00513           }
00514         while (__next != __last && __next != __start);
00515 
00516         if (__next != __last)
00517           _Tr::move(__first, __next, __last - __next);
00518 
00519         this->pbump(__first - __next);
00520         return __next != __first;
00521       }
00522 
00523       streambuf*                _M_buf;
00524       unique_ptr<_Codecvt>      _M_cvt;
00525       state_type                _M_state;
00526 
00527       static const streamsize   _S_buffer_length = 32;
00528       static const streamsize   _S_putback_length = 3;
00529       _Elem                     _M_put_area[_S_buffer_length];
00530       _Elem                     _M_get_area[_S_buffer_length];
00531       streamsize                _M_unconv = 0;
00532       char                      _M_get_buf[_S_buffer_length-_S_putback_length];
00533       bool                      _M_always_noconv;
00534     };
00535 
00536 #endif  // _GLIBCXX_USE_WCHAR_T
00537 
00538   /// @} group locales
00539 
00540 _GLIBCXX_END_NAMESPACE_VERSION
00541 } // namespace
00542 
00543 #endif // __cplusplus
00544 
00545 #endif /* _LOCALE_CONV_H */