libstdc++
codecvt.h
Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000-2015 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/codecvt.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 //
00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00032 //
00033 
00034 // Written by Benjamin Kosnik <bkoz@redhat.com>
00035 
00036 #ifndef _CODECVT_H
00037 #define _CODECVT_H 1
00038 
00039 #pragma GCC system_header
00040 
00041 namespace std _GLIBCXX_VISIBILITY(default)
00042 {
00043 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00044 
00045   /// Empty base class for codecvt facet [22.2.1.5].
00046   class codecvt_base
00047   {
00048   public:
00049     enum result
00050     {
00051       ok,
00052       partial,
00053       error,
00054       noconv
00055     };
00056   };
00057 
00058   /**
00059    *  @brief  Common base for codecvt functions.
00060    *
00061    *  This template class provides implementations of the public functions
00062    *  that forward to the protected virtual functions.
00063    *
00064    *  This template also provides abstract stubs for the protected virtual
00065    *  functions.
00066   */
00067   template<typename _InternT, typename _ExternT, typename _StateT>
00068     class __codecvt_abstract_base
00069     : public locale::facet, public codecvt_base
00070     {
00071     public:
00072       // Types:
00073       typedef codecvt_base::result      result;
00074       typedef _InternT                  intern_type;
00075       typedef _ExternT                  extern_type;
00076       typedef _StateT                   state_type;
00077 
00078       // 22.2.1.5.1 codecvt members
00079       /**
00080        *  @brief  Convert from internal to external character set.
00081        *
00082        *  Converts input string of intern_type to output string of
00083        *  extern_type.  This is analogous to wcsrtombs.  It does this by
00084        *  calling codecvt::do_out.
00085        *
00086        *  The source and destination character sets are determined by the
00087        *  facet's locale, internal and external types.
00088        *
00089        *  The characters in [from,from_end) are converted and written to
00090        *  [to,to_end).  from_next and to_next are set to point to the
00091        *  character following the last successfully converted character,
00092        *  respectively.  If the result needed no conversion, from_next and
00093        *  to_next are not affected.
00094        *
00095        *  The @a state argument should be initialized if the input is at the
00096        *  beginning and carried from a previous call if continuing
00097        *  conversion.  There are no guarantees about how @a state is used.
00098        *
00099        *  The result returned is a member of codecvt_base::result.  If
00100        *  all the input is converted, returns codecvt_base::ok.  If no
00101        *  conversion is necessary, returns codecvt_base::noconv.  If
00102        *  the input ends early or there is insufficient space in the
00103        *  output, returns codecvt_base::partial.  Otherwise the
00104        *  conversion failed and codecvt_base::error is returned.
00105        *
00106        *  @param  __state  Persistent conversion state data.
00107        *  @param  __from  Start of input.
00108        *  @param  __from_end  End of input.
00109        *  @param  __from_next  Returns start of unconverted data.
00110        *  @param  __to  Start of output buffer.
00111        *  @param  __to_end  End of output buffer.
00112        *  @param  __to_next  Returns start of unused output area.
00113        *  @return  codecvt_base::result.
00114       */
00115       result
00116       out(state_type& __state, const intern_type* __from,
00117           const intern_type* __from_end, const intern_type*& __from_next,
00118           extern_type* __to, extern_type* __to_end,
00119           extern_type*& __to_next) const
00120       {
00121         return this->do_out(__state, __from, __from_end, __from_next,
00122                             __to, __to_end, __to_next);
00123       }
00124 
00125       /**
00126        *  @brief  Reset conversion state.
00127        *
00128        *  Writes characters to output that would restore @a state to initial
00129        *  conditions.  The idea is that if a partial conversion occurs, then
00130        *  the converting the characters written by this function would leave
00131        *  the state in initial conditions, rather than partial conversion
00132        *  state.  It does this by calling codecvt::do_unshift().
00133        *
00134        *  For example, if 4 external characters always converted to 1 internal
00135        *  character, and input to in() had 6 external characters with state
00136        *  saved, this function would write two characters to the output and
00137        *  set the state to initialized conditions.
00138        *
00139        *  The source and destination character sets are determined by the
00140        *  facet's locale, internal and external types.
00141        *
00142        *  The result returned is a member of codecvt_base::result.  If the
00143        *  state could be reset and data written, returns codecvt_base::ok.  If
00144        *  no conversion is necessary, returns codecvt_base::noconv.  If the
00145        *  output has insufficient space, returns codecvt_base::partial.
00146        *  Otherwise the reset failed and codecvt_base::error is returned.
00147        *
00148        *  @param  __state  Persistent conversion state data.
00149        *  @param  __to  Start of output buffer.
00150        *  @param  __to_end  End of output buffer.
00151        *  @param  __to_next  Returns start of unused output area.
00152        *  @return  codecvt_base::result.
00153       */
00154       result
00155       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
00156               extern_type*& __to_next) const
00157       { return this->do_unshift(__state, __to,__to_end,__to_next); }
00158 
00159       /**
00160        *  @brief  Convert from external to internal character set.
00161        *
00162        *  Converts input string of extern_type to output string of
00163        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
00164        *  calling codecvt::do_in.
00165        *
00166        *  The source and destination character sets are determined by the
00167        *  facet's locale, internal and external types.
00168        *
00169        *  The characters in [from,from_end) are converted and written to
00170        *  [to,to_end).  from_next and to_next are set to point to the
00171        *  character following the last successfully converted character,
00172        *  respectively.  If the result needed no conversion, from_next and
00173        *  to_next are not affected.
00174        *
00175        *  The @a state argument should be initialized if the input is at the
00176        *  beginning and carried from a previous call if continuing
00177        *  conversion.  There are no guarantees about how @a state is used.
00178        *
00179        *  The result returned is a member of codecvt_base::result.  If
00180        *  all the input is converted, returns codecvt_base::ok.  If no
00181        *  conversion is necessary, returns codecvt_base::noconv.  If
00182        *  the input ends early or there is insufficient space in the
00183        *  output, returns codecvt_base::partial.  Otherwise the
00184        *  conversion failed and codecvt_base::error is returned.
00185        *
00186        *  @param  __state  Persistent conversion state data.
00187        *  @param  __from  Start of input.
00188        *  @param  __from_end  End of input.
00189        *  @param  __from_next  Returns start of unconverted data.
00190        *  @param  __to  Start of output buffer.
00191        *  @param  __to_end  End of output buffer.
00192        *  @param  __to_next  Returns start of unused output area.
00193        *  @return  codecvt_base::result.
00194       */
00195       result
00196       in(state_type& __state, const extern_type* __from,
00197          const extern_type* __from_end, const extern_type*& __from_next,
00198          intern_type* __to, intern_type* __to_end,
00199          intern_type*& __to_next) const
00200       {
00201         return this->do_in(__state, __from, __from_end, __from_next,
00202                            __to, __to_end, __to_next);
00203       }
00204 
00205       int
00206       encoding() const throw()
00207       { return this->do_encoding(); }
00208 
00209       bool
00210       always_noconv() const throw()
00211       { return this->do_always_noconv(); }
00212 
00213       int
00214       length(state_type& __state, const extern_type* __from,
00215              const extern_type* __end, size_t __max) const
00216       { return this->do_length(__state, __from, __end, __max); }
00217 
00218       int
00219       max_length() const throw()
00220       { return this->do_max_length(); }
00221 
00222     protected:
00223       explicit
00224       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
00225 
00226       virtual
00227       ~__codecvt_abstract_base() { }
00228 
00229       /**
00230        *  @brief  Convert from internal to external character set.
00231        *
00232        *  Converts input string of intern_type to output string of
00233        *  extern_type.  This function is a hook for derived classes to change
00234        *  the value returned.  @see out for more information.
00235       */
00236       virtual result
00237       do_out(state_type& __state, const intern_type* __from,
00238              const intern_type* __from_end, const intern_type*& __from_next,
00239              extern_type* __to, extern_type* __to_end,
00240              extern_type*& __to_next) const = 0;
00241 
00242       virtual result
00243       do_unshift(state_type& __state, extern_type* __to,
00244                  extern_type* __to_end, extern_type*& __to_next) const = 0;
00245 
00246       virtual result
00247       do_in(state_type& __state, const extern_type* __from,
00248             const extern_type* __from_end, const extern_type*& __from_next,
00249             intern_type* __to, intern_type* __to_end,
00250             intern_type*& __to_next) const = 0;
00251 
00252       virtual int
00253       do_encoding() const throw() = 0;
00254 
00255       virtual bool
00256       do_always_noconv() const throw() = 0;
00257 
00258       virtual int
00259       do_length(state_type&, const extern_type* __from,
00260                 const extern_type* __end, size_t __max) const = 0;
00261 
00262       virtual int
00263       do_max_length() const throw() = 0;
00264     };
00265 
00266   /**
00267    *  @brief  Primary class template codecvt.
00268    *  @ingroup locales
00269    *
00270    *  NB: Generic, mostly useless implementation.
00271    *
00272   */
00273    template<typename _InternT, typename _ExternT, typename _StateT>
00274     class codecvt
00275     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
00276     {
00277     public:
00278       // Types:
00279       typedef codecvt_base::result      result;
00280       typedef _InternT                  intern_type;
00281       typedef _ExternT                  extern_type;
00282       typedef _StateT                   state_type;
00283 
00284     protected:
00285       __c_locale                        _M_c_locale_codecvt;
00286 
00287     public:
00288       static locale::id                 id;
00289 
00290       explicit
00291       codecvt(size_t __refs = 0)
00292       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
00293         _M_c_locale_codecvt(0)
00294       { }
00295 
00296       explicit
00297       codecvt(__c_locale __cloc, size_t __refs = 0);
00298 
00299     protected:
00300       virtual
00301       ~codecvt() { }
00302 
00303       virtual result
00304       do_out(state_type& __state, const intern_type* __from,
00305              const intern_type* __from_end, const intern_type*& __from_next,
00306              extern_type* __to, extern_type* __to_end,
00307              extern_type*& __to_next) const;
00308 
00309       virtual result
00310       do_unshift(state_type& __state, extern_type* __to,
00311                  extern_type* __to_end, extern_type*& __to_next) const;
00312 
00313       virtual result
00314       do_in(state_type& __state, const extern_type* __from,
00315             const extern_type* __from_end, const extern_type*& __from_next,
00316             intern_type* __to, intern_type* __to_end,
00317             intern_type*& __to_next) const;
00318 
00319       virtual int
00320       do_encoding() const throw();
00321 
00322       virtual bool
00323       do_always_noconv() const throw();
00324 
00325       virtual int
00326       do_length(state_type&, const extern_type* __from,
00327                 const extern_type* __end, size_t __max) const;
00328 
00329       virtual int
00330       do_max_length() const throw();
00331     };
00332 
00333   template<typename _InternT, typename _ExternT, typename _StateT>
00334     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
00335 
00336   /// class codecvt<char, char, mbstate_t> specialization.
00337   template<>
00338     class codecvt<char, char, mbstate_t>
00339     : public __codecvt_abstract_base<char, char, mbstate_t>
00340     {
00341       friend class messages<char>;
00342 
00343     public:
00344       // Types:
00345       typedef char                      intern_type;
00346       typedef char                      extern_type;
00347       typedef mbstate_t                 state_type;
00348 
00349     protected:
00350       __c_locale                        _M_c_locale_codecvt;
00351 
00352     public:
00353       static locale::id id;
00354 
00355       explicit
00356       codecvt(size_t __refs = 0);
00357 
00358       explicit
00359       codecvt(__c_locale __cloc, size_t __refs = 0);
00360 
00361     protected:
00362       virtual
00363       ~codecvt();
00364 
00365       virtual result
00366       do_out(state_type& __state, const intern_type* __from,
00367              const intern_type* __from_end, const intern_type*& __from_next,
00368              extern_type* __to, extern_type* __to_end,
00369              extern_type*& __to_next) const;
00370 
00371       virtual result
00372       do_unshift(state_type& __state, extern_type* __to,
00373                  extern_type* __to_end, extern_type*& __to_next) const;
00374 
00375       virtual result
00376       do_in(state_type& __state, const extern_type* __from,
00377             const extern_type* __from_end, const extern_type*& __from_next,
00378             intern_type* __to, intern_type* __to_end,
00379             intern_type*& __to_next) const;
00380 
00381       virtual int
00382       do_encoding() const throw();
00383 
00384       virtual bool
00385       do_always_noconv() const throw();
00386 
00387       virtual int
00388       do_length(state_type&, const extern_type* __from,
00389                 const extern_type* __end, size_t __max) const;
00390 
00391       virtual int
00392       do_max_length() const throw();
00393   };
00394 
00395 #ifdef _GLIBCXX_USE_WCHAR_T
00396   /** @brief  Class codecvt<wchar_t, char, mbstate_t> specialization.
00397    *
00398    *  Converts between narrow and wide characters in the native character set
00399    */
00400   template<>
00401     class codecvt<wchar_t, char, mbstate_t>
00402     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
00403     {
00404       friend class messages<wchar_t>;
00405 
00406     public:
00407       // Types:
00408       typedef wchar_t                   intern_type;
00409       typedef char                      extern_type;
00410       typedef mbstate_t                 state_type;
00411 
00412     protected:
00413       __c_locale                        _M_c_locale_codecvt;
00414 
00415     public:
00416       static locale::id                 id;
00417 
00418       explicit
00419       codecvt(size_t __refs = 0);
00420 
00421       explicit
00422       codecvt(__c_locale __cloc, size_t __refs = 0);
00423 
00424     protected:
00425       virtual
00426       ~codecvt();
00427 
00428       virtual result
00429       do_out(state_type& __state, const intern_type* __from,
00430              const intern_type* __from_end, const intern_type*& __from_next,
00431              extern_type* __to, extern_type* __to_end,
00432              extern_type*& __to_next) const;
00433 
00434       virtual result
00435       do_unshift(state_type& __state,
00436                  extern_type* __to, extern_type* __to_end,
00437                  extern_type*& __to_next) const;
00438 
00439       virtual result
00440       do_in(state_type& __state,
00441              const extern_type* __from, const extern_type* __from_end,
00442              const extern_type*& __from_next,
00443              intern_type* __to, intern_type* __to_end,
00444              intern_type*& __to_next) const;
00445 
00446       virtual
00447       int do_encoding() const throw();
00448 
00449       virtual
00450       bool do_always_noconv() const throw();
00451 
00452       virtual
00453       int do_length(state_type&, const extern_type* __from,
00454                     const extern_type* __end, size_t __max) const;
00455 
00456       virtual int
00457       do_max_length() const throw();
00458     };
00459 #endif //_GLIBCXX_USE_WCHAR_T
00460 
00461 #if __cplusplus >= 201103L
00462 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
00463   /** @brief  Class codecvt<char16_t, char, mbstate_t> specialization.
00464    *
00465    *  Converts between UTF-16 and UTF-8.
00466    */
00467   template<>
00468     class codecvt<char16_t, char, mbstate_t>
00469     : public __codecvt_abstract_base<char16_t, char, mbstate_t>
00470     {
00471     public:
00472       // Types:
00473       typedef char16_t                  intern_type;
00474       typedef char                      extern_type;
00475       typedef mbstate_t                 state_type;
00476 
00477     public:
00478       static locale::id                 id;
00479 
00480       explicit
00481       codecvt(size_t __refs = 0)
00482       : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
00483 
00484     protected:
00485       virtual
00486       ~codecvt();
00487 
00488       virtual result
00489       do_out(state_type& __state, const intern_type* __from,
00490              const intern_type* __from_end, const intern_type*& __from_next,
00491              extern_type* __to, extern_type* __to_end,
00492              extern_type*& __to_next) const;
00493 
00494       virtual result
00495       do_unshift(state_type& __state,
00496                  extern_type* __to, extern_type* __to_end,
00497                  extern_type*& __to_next) const;
00498 
00499       virtual result
00500       do_in(state_type& __state,
00501              const extern_type* __from, const extern_type* __from_end,
00502              const extern_type*& __from_next,
00503              intern_type* __to, intern_type* __to_end,
00504              intern_type*& __to_next) const;
00505 
00506       virtual
00507       int do_encoding() const throw();
00508 
00509       virtual
00510       bool do_always_noconv() const throw();
00511 
00512       virtual
00513       int do_length(state_type&, const extern_type* __from,
00514                     const extern_type* __end, size_t __max) const;
00515 
00516       virtual int
00517       do_max_length() const throw();
00518     };
00519 
00520   /** @brief  Class codecvt<char32_t, char, mbstate_t> specialization.
00521    *
00522    *  Converts between UTF-32 and UTF-8.
00523    */
00524   template<>
00525     class codecvt<char32_t, char, mbstate_t>
00526     : public __codecvt_abstract_base<char32_t, char, mbstate_t>
00527     {
00528     public:
00529       // Types:
00530       typedef char32_t                  intern_type;
00531       typedef char                      extern_type;
00532       typedef mbstate_t                 state_type;
00533 
00534     public:
00535       static locale::id                 id;
00536 
00537       explicit
00538       codecvt(size_t __refs = 0)
00539       : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
00540 
00541     protected:
00542       virtual
00543       ~codecvt();
00544 
00545       virtual result
00546       do_out(state_type& __state, const intern_type* __from,
00547              const intern_type* __from_end, const intern_type*& __from_next,
00548              extern_type* __to, extern_type* __to_end,
00549              extern_type*& __to_next) const;
00550 
00551       virtual result
00552       do_unshift(state_type& __state,
00553                  extern_type* __to, extern_type* __to_end,
00554                  extern_type*& __to_next) const;
00555 
00556       virtual result
00557       do_in(state_type& __state,
00558              const extern_type* __from, const extern_type* __from_end,
00559              const extern_type*& __from_next,
00560              intern_type* __to, intern_type* __to_end,
00561              intern_type*& __to_next) const;
00562 
00563       virtual
00564       int do_encoding() const throw();
00565 
00566       virtual
00567       bool do_always_noconv() const throw();
00568 
00569       virtual
00570       int do_length(state_type&, const extern_type* __from,
00571                     const extern_type* __end, size_t __max) const;
00572 
00573       virtual int
00574       do_max_length() const throw();
00575     };
00576 
00577 #endif // _GLIBCXX_USE_C99_STDINT_TR1
00578 #endif // C++11
00579 
00580   /// class codecvt_byname [22.2.1.6].
00581   template<typename _InternT, typename _ExternT, typename _StateT>
00582     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
00583     {
00584     public:
00585       explicit
00586       codecvt_byname(const char* __s, size_t __refs = 0)
00587       : codecvt<_InternT, _ExternT, _StateT>(__refs)
00588       {
00589         if (__builtin_strcmp(__s, "C") != 0
00590             && __builtin_strcmp(__s, "POSIX") != 0)
00591           {
00592             this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
00593             this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
00594           }
00595       }
00596 
00597 #if __cplusplus >= 201103L
00598       explicit
00599       codecvt_byname(const string& __s, size_t __refs = 0)
00600       : codecvt_byname(__s.c_str(), __refs) { }
00601 #endif
00602 
00603     protected:
00604       virtual
00605       ~codecvt_byname() { }
00606     };
00607 
00608 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
00609   template<>
00610     class codecvt_byname<char16_t, char, mbstate_t>
00611     : public codecvt<char16_t, char, mbstate_t>
00612     {
00613     public:
00614       explicit
00615       codecvt_byname(const char* __s, size_t __refs = 0)
00616       : codecvt<char16_t, char, mbstate_t>(__refs) { }
00617 
00618       explicit
00619       codecvt_byname(const string& __s, size_t __refs = 0)
00620       : codecvt_byname(__s.c_str(), __refs) { }
00621 
00622     protected:
00623       virtual
00624       ~codecvt_byname() { }
00625     };
00626 
00627   template<>
00628     class codecvt_byname<char32_t, char, mbstate_t>
00629     : public codecvt<char32_t, char, mbstate_t>
00630     {
00631     public:
00632       explicit
00633       codecvt_byname(const char* __s, size_t __refs = 0)
00634       : codecvt<char32_t, char, mbstate_t>(__refs) { }
00635 
00636       explicit
00637       codecvt_byname(const string& __s, size_t __refs = 0)
00638       : codecvt_byname(__s.c_str(), __refs) { }
00639 
00640     protected:
00641       virtual
00642       ~codecvt_byname() { }
00643     };
00644 #endif
00645 
00646   // Inhibit implicit instantiations for required instantiations,
00647   // which are defined via explicit instantiations elsewhere.
00648 #if _GLIBCXX_EXTERN_TEMPLATE
00649   extern template class codecvt_byname<char, char, mbstate_t>;
00650 
00651   extern template
00652     const codecvt<char, char, mbstate_t>&
00653     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
00654 
00655   extern template
00656     bool
00657     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
00658 
00659 #ifdef _GLIBCXX_USE_WCHAR_T
00660   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
00661 
00662   extern template
00663     const codecvt<wchar_t, char, mbstate_t>&
00664     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00665 
00666   extern template
00667     bool
00668     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00669 #endif
00670 
00671 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
00672   extern template class codecvt_byname<char16_t, char, mbstate_t>;
00673   extern template class codecvt_byname<char32_t, char, mbstate_t>;
00674 #endif
00675 
00676 #endif
00677 
00678 _GLIBCXX_END_NAMESPACE_VERSION
00679 } // namespace std
00680 
00681 #endif // _CODECVT_H