libstdc++
|
00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 // 00026 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00027 // 00028 00029 // Written by Benjamin Kosnik <bkoz@redhat.com> 00030 00031 /** @file ext/codecvt_specializations.h 00032 * This file is a GNU extension to the Standard C++ Library. 00033 */ 00034 00035 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H 00036 #define _EXT_CODECVT_SPECIALIZATIONS_H 1 00037 00038 #include <bits/c++config.h> 00039 #include <locale> 00040 #include <iconv.h> 00041 00042 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) 00043 { 00044 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 /// Extension to use iconv for dealing with character encodings. 00048 // This includes conversions and comparisons between various character 00049 // sets. This object encapsulates data that may need to be shared between 00050 // char_traits, codecvt and ctype. 00051 class encoding_state 00052 { 00053 public: 00054 // Types: 00055 // NB: A conversion descriptor subsumes and enhances the 00056 // functionality of a simple state type such as mbstate_t. 00057 typedef iconv_t descriptor_type; 00058 00059 protected: 00060 // Name of internal character set encoding. 00061 std::string _M_int_enc; 00062 00063 // Name of external character set encoding. 00064 std::string _M_ext_enc; 00065 00066 // Conversion descriptor between external encoding to internal encoding. 00067 descriptor_type _M_in_desc; 00068 00069 // Conversion descriptor between internal encoding to external encoding. 00070 descriptor_type _M_out_desc; 00071 00072 // The byte-order marker for the external encoding, if necessary. 00073 int _M_ext_bom; 00074 00075 // The byte-order marker for the internal encoding, if necessary. 00076 int _M_int_bom; 00077 00078 // Number of external bytes needed to construct one complete 00079 // character in the internal encoding. 00080 // NB: -1 indicates variable, or stateful, encodings. 00081 int _M_bytes; 00082 00083 public: 00084 explicit 00085 encoding_state() 00086 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 00087 { } 00088 00089 explicit 00090 encoding_state(const char* __int, const char* __ext, 00091 int __ibom = 0, int __ebom = 0, int __bytes = 1) 00092 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 00093 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 00094 { init(); } 00095 00096 // 21.1.2 traits typedefs 00097 // p4 00098 // typedef STATE_T state_type 00099 // requires: state_type shall meet the requirements of 00100 // CopyConstructible types (20.1.3) 00101 // NB: This does not preserve the actual state of the conversion 00102 // descriptor member, but it does duplicate the encoding 00103 // information. 00104 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 00105 { construct(__obj); } 00106 00107 // Need assignment operator as well. 00108 encoding_state& 00109 operator=(const encoding_state& __obj) 00110 { 00111 construct(__obj); 00112 return *this; 00113 } 00114 00115 ~encoding_state() 00116 { destroy(); } 00117 00118 bool 00119 good() const throw() 00120 { 00121 const descriptor_type __err = (iconv_t)(-1); 00122 bool __test = _M_in_desc && _M_in_desc != __err; 00123 __test &= _M_out_desc && _M_out_desc != __err; 00124 return __test; 00125 } 00126 00127 int 00128 character_ratio() const 00129 { return _M_bytes; } 00130 00131 const std::string 00132 internal_encoding() const 00133 { return _M_int_enc; } 00134 00135 int 00136 internal_bom() const 00137 { return _M_int_bom; } 00138 00139 const std::string 00140 external_encoding() const 00141 { return _M_ext_enc; } 00142 00143 int 00144 external_bom() const 00145 { return _M_ext_bom; } 00146 00147 const descriptor_type& 00148 in_descriptor() const 00149 { return _M_in_desc; } 00150 00151 const descriptor_type& 00152 out_descriptor() const 00153 { return _M_out_desc; } 00154 00155 protected: 00156 void 00157 init() 00158 { 00159 const descriptor_type __err = (iconv_t)(-1); 00160 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 00161 if (!_M_in_desc && __have_encodings) 00162 { 00163 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 00164 if (_M_in_desc == __err) 00165 std::__throw_runtime_error(__N("encoding_state::_M_init " 00166 "creating iconv input descriptor failed")); 00167 } 00168 if (!_M_out_desc && __have_encodings) 00169 { 00170 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 00171 if (_M_out_desc == __err) 00172 std::__throw_runtime_error(__N("encoding_state::_M_init " 00173 "creating iconv output descriptor failed")); 00174 } 00175 } 00176 00177 void 00178 construct(const encoding_state& __obj) 00179 { 00180 destroy(); 00181 _M_int_enc = __obj._M_int_enc; 00182 _M_ext_enc = __obj._M_ext_enc; 00183 _M_ext_bom = __obj._M_ext_bom; 00184 _M_int_bom = __obj._M_int_bom; 00185 _M_bytes = __obj._M_bytes; 00186 init(); 00187 } 00188 00189 void 00190 destroy() throw() 00191 { 00192 const descriptor_type __err = (iconv_t)(-1); 00193 if (_M_in_desc && _M_in_desc != __err) 00194 { 00195 iconv_close(_M_in_desc); 00196 _M_in_desc = 0; 00197 } 00198 if (_M_out_desc && _M_out_desc != __err) 00199 { 00200 iconv_close(_M_out_desc); 00201 _M_out_desc = 0; 00202 } 00203 } 00204 }; 00205 00206 /// encoding_char_traits 00207 // Custom traits type with encoding_state for the state type, and the 00208 // associated fpos<encoding_state> for the position type, all other 00209 // bits equivalent to the required char_traits instantiations. 00210 template<typename _CharT> 00211 struct encoding_char_traits 00212 : public std::char_traits<_CharT> 00213 { 00214 typedef encoding_state state_type; 00215 typedef typename std::fpos<state_type> pos_type; 00216 }; 00217 00218 _GLIBCXX_END_NAMESPACE_VERSION 00219 _GLIBCXX_END_NAMESPACE_CXX11 00220 } // namespace 00221 00222 00223 namespace std _GLIBCXX_VISIBILITY(default) 00224 { 00225 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00226 00227 using __gnu_cxx::encoding_state; 00228 00229 /// codecvt<InternT, _ExternT, encoding_state> specialization. 00230 // This partial specialization takes advantage of iconv to provide 00231 // code conversions between a large number of character encodings. 00232 template<typename _InternT, typename _ExternT> 00233 class codecvt<_InternT, _ExternT, encoding_state> 00234 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 00235 { 00236 public: 00237 // Types: 00238 typedef codecvt_base::result result; 00239 typedef _InternT intern_type; 00240 typedef _ExternT extern_type; 00241 typedef __gnu_cxx::encoding_state state_type; 00242 typedef state_type::descriptor_type descriptor_type; 00243 00244 // Data Members: 00245 static locale::id id; 00246 00247 explicit 00248 codecvt(size_t __refs = 0) 00249 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00250 { } 00251 00252 explicit 00253 codecvt(state_type& __enc, size_t __refs = 0) 00254 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 00255 { } 00256 00257 protected: 00258 virtual 00259 ~codecvt() { } 00260 00261 virtual result 00262 do_out(state_type& __state, const intern_type* __from, 00263 const intern_type* __from_end, const intern_type*& __from_next, 00264 extern_type* __to, extern_type* __to_end, 00265 extern_type*& __to_next) const; 00266 00267 virtual result 00268 do_unshift(state_type& __state, extern_type* __to, 00269 extern_type* __to_end, extern_type*& __to_next) const; 00270 00271 virtual result 00272 do_in(state_type& __state, const extern_type* __from, 00273 const extern_type* __from_end, const extern_type*& __from_next, 00274 intern_type* __to, intern_type* __to_end, 00275 intern_type*& __to_next) const; 00276 00277 virtual int 00278 do_encoding() const throw(); 00279 00280 virtual bool 00281 do_always_noconv() const throw(); 00282 00283 virtual int 00284 do_length(state_type&, const extern_type* __from, 00285 const extern_type* __end, size_t __max) const; 00286 00287 virtual int 00288 do_max_length() const throw(); 00289 }; 00290 00291 template<typename _InternT, typename _ExternT> 00292 locale::id 00293 codecvt<_InternT, _ExternT, encoding_state>::id; 00294 00295 // This adaptor works around the signature problems of the second 00296 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 00297 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 00298 // Using this adaptor, g++ will do the work for us. 00299 template<typename _Tp> 00300 inline size_t 00301 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 00302 iconv_t __cd, char** __inbuf, size_t* __inbytes, 00303 char** __outbuf, size_t* __outbytes) 00304 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 00305 00306 template<typename _InternT, typename _ExternT> 00307 codecvt_base::result 00308 codecvt<_InternT, _ExternT, encoding_state>:: 00309 do_out(state_type& __state, const intern_type* __from, 00310 const intern_type* __from_end, const intern_type*& __from_next, 00311 extern_type* __to, extern_type* __to_end, 00312 extern_type*& __to_next) const 00313 { 00314 result __ret = codecvt_base::error; 00315 if (__state.good()) 00316 { 00317 const descriptor_type& __desc = __state.out_descriptor(); 00318 const size_t __fmultiple = sizeof(intern_type); 00319 size_t __fbytes = __fmultiple * (__from_end - __from); 00320 const size_t __tmultiple = sizeof(extern_type); 00321 size_t __tbytes = __tmultiple * (__to_end - __to); 00322 00323 // Argument list for iconv specifies a byte sequence. Thus, 00324 // all to/from arrays must be brutally casted to char*. 00325 char* __cto = reinterpret_cast<char*>(__to); 00326 char* __cfrom; 00327 size_t __conv; 00328 00329 // Some encodings need a byte order marker as the first item 00330 // in the byte stream, to designate endian-ness. The default 00331 // value for the byte order marker is NULL, so if this is 00332 // the case, it's not necessary and we can just go on our 00333 // merry way. 00334 int __int_bom = __state.internal_bom(); 00335 if (__int_bom) 00336 { 00337 size_t __size = __from_end - __from; 00338 intern_type* __cfixed = static_cast<intern_type*> 00339 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 00340 __cfixed[0] = static_cast<intern_type>(__int_bom); 00341 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 00342 __cfrom = reinterpret_cast<char*>(__cfixed); 00343 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00344 &__fbytes, &__cto, &__tbytes); 00345 } 00346 else 00347 { 00348 intern_type* __cfixed = const_cast<intern_type*>(__from); 00349 __cfrom = reinterpret_cast<char*>(__cfixed); 00350 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 00351 &__cto, &__tbytes); 00352 } 00353 00354 if (__conv != size_t(-1)) 00355 { 00356 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00357 __to_next = reinterpret_cast<extern_type*>(__cto); 00358 __ret = codecvt_base::ok; 00359 } 00360 else 00361 { 00362 if (__fbytes < __fmultiple * (__from_end - __from)) 00363 { 00364 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 00365 __to_next = reinterpret_cast<extern_type*>(__cto); 00366 __ret = codecvt_base::partial; 00367 } 00368 else 00369 __ret = codecvt_base::error; 00370 } 00371 } 00372 return __ret; 00373 } 00374 00375 template<typename _InternT, typename _ExternT> 00376 codecvt_base::result 00377 codecvt<_InternT, _ExternT, encoding_state>:: 00378 do_unshift(state_type& __state, extern_type* __to, 00379 extern_type* __to_end, extern_type*& __to_next) const 00380 { 00381 result __ret = codecvt_base::error; 00382 if (__state.good()) 00383 { 00384 const descriptor_type& __desc = __state.in_descriptor(); 00385 const size_t __tmultiple = sizeof(intern_type); 00386 size_t __tlen = __tmultiple * (__to_end - __to); 00387 00388 // Argument list for iconv specifies a byte sequence. Thus, 00389 // all to/from arrays must be brutally casted to char*. 00390 char* __cto = reinterpret_cast<char*>(__to); 00391 size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0, 00392 &__cto, &__tlen); 00393 00394 if (__conv != size_t(-1)) 00395 { 00396 __to_next = reinterpret_cast<extern_type*>(__cto); 00397 if (__tlen == __tmultiple * (__to_end - __to)) 00398 __ret = codecvt_base::noconv; 00399 else if (__tlen == 0) 00400 __ret = codecvt_base::ok; 00401 else 00402 __ret = codecvt_base::partial; 00403 } 00404 else 00405 __ret = codecvt_base::error; 00406 } 00407 return __ret; 00408 } 00409 00410 template<typename _InternT, typename _ExternT> 00411 codecvt_base::result 00412 codecvt<_InternT, _ExternT, encoding_state>:: 00413 do_in(state_type& __state, const extern_type* __from, 00414 const extern_type* __from_end, const extern_type*& __from_next, 00415 intern_type* __to, intern_type* __to_end, 00416 intern_type*& __to_next) const 00417 { 00418 result __ret = codecvt_base::error; 00419 if (__state.good()) 00420 { 00421 const descriptor_type& __desc = __state.in_descriptor(); 00422 const size_t __fmultiple = sizeof(extern_type); 00423 size_t __flen = __fmultiple * (__from_end - __from); 00424 const size_t __tmultiple = sizeof(intern_type); 00425 size_t __tlen = __tmultiple * (__to_end - __to); 00426 00427 // Argument list for iconv specifies a byte sequence. Thus, 00428 // all to/from arrays must be brutally casted to char*. 00429 char* __cto = reinterpret_cast<char*>(__to); 00430 char* __cfrom; 00431 size_t __conv; 00432 00433 // Some encodings need a byte order marker as the first item 00434 // in the byte stream, to designate endian-ness. The default 00435 // value for the byte order marker is NULL, so if this is 00436 // the case, it's not necessary and we can just go on our 00437 // merry way. 00438 int __ext_bom = __state.external_bom(); 00439 if (__ext_bom) 00440 { 00441 size_t __size = __from_end - __from; 00442 extern_type* __cfixed = static_cast<extern_type*> 00443 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 00444 __cfixed[0] = static_cast<extern_type>(__ext_bom); 00445 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 00446 __cfrom = reinterpret_cast<char*>(__cfixed); 00447 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00448 &__flen, &__cto, &__tlen); 00449 } 00450 else 00451 { 00452 extern_type* __cfixed = const_cast<extern_type*>(__from); 00453 __cfrom = reinterpret_cast<char*>(__cfixed); 00454 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 00455 &__flen, &__cto, &__tlen); 00456 } 00457 00458 00459 if (__conv != size_t(-1)) 00460 { 00461 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00462 __to_next = reinterpret_cast<intern_type*>(__cto); 00463 __ret = codecvt_base::ok; 00464 } 00465 else 00466 { 00467 if (__flen < static_cast<size_t>(__from_end - __from)) 00468 { 00469 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 00470 __to_next = reinterpret_cast<intern_type*>(__cto); 00471 __ret = codecvt_base::partial; 00472 } 00473 else 00474 __ret = codecvt_base::error; 00475 } 00476 } 00477 return __ret; 00478 } 00479 00480 template<typename _InternT, typename _ExternT> 00481 int 00482 codecvt<_InternT, _ExternT, encoding_state>:: 00483 do_encoding() const throw() 00484 { 00485 int __ret = 0; 00486 if (sizeof(_ExternT) <= sizeof(_InternT)) 00487 __ret = sizeof(_InternT) / sizeof(_ExternT); 00488 return __ret; 00489 } 00490 00491 template<typename _InternT, typename _ExternT> 00492 bool 00493 codecvt<_InternT, _ExternT, encoding_state>:: 00494 do_always_noconv() const throw() 00495 { return false; } 00496 00497 template<typename _InternT, typename _ExternT> 00498 int 00499 codecvt<_InternT, _ExternT, encoding_state>:: 00500 do_length(state_type&, const extern_type* __from, 00501 const extern_type* __end, size_t __max) const 00502 { return std::min(__max, static_cast<size_t>(__end - __from)); } 00503 00504 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00505 // 74. Garbled text for codecvt::do_max_length 00506 template<typename _InternT, typename _ExternT> 00507 int 00508 codecvt<_InternT, _ExternT, encoding_state>:: 00509 do_max_length() const throw() 00510 { return 1; } 00511 00512 _GLIBCXX_END_NAMESPACE_VERSION 00513 } // namespace 00514 00515 #endif