libstdc++
|
00001 // Locale support (codecvt) -*- C++ -*- 00002 00003 // Copyright (C) 2000-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/codecvt.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 // 00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt 00032 // 00033 00034 // Written by Benjamin Kosnik <bkoz@redhat.com> 00035 00036 #ifndef _CODECVT_H 00037 #define _CODECVT_H 1 00038 00039 #pragma GCC system_header 00040 00041 namespace std _GLIBCXX_VISIBILITY(default) 00042 { 00043 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00044 00045 /// Empty base class for codecvt facet [22.2.1.5]. 00046 class codecvt_base 00047 { 00048 public: 00049 enum result 00050 { 00051 ok, 00052 partial, 00053 error, 00054 noconv 00055 }; 00056 }; 00057 00058 /** 00059 * @brief Common base for codecvt functions. 00060 * 00061 * This template class provides implementations of the public functions 00062 * that forward to the protected virtual functions. 00063 * 00064 * This template also provides abstract stubs for the protected virtual 00065 * functions. 00066 */ 00067 template<typename _InternT, typename _ExternT, typename _StateT> 00068 class __codecvt_abstract_base 00069 : public locale::facet, public codecvt_base 00070 { 00071 public: 00072 // Types: 00073 typedef codecvt_base::result result; 00074 typedef _InternT intern_type; 00075 typedef _ExternT extern_type; 00076 typedef _StateT state_type; 00077 00078 // 22.2.1.5.1 codecvt members 00079 /** 00080 * @brief Convert from internal to external character set. 00081 * 00082 * Converts input string of intern_type to output string of 00083 * extern_type. This is analogous to wcsrtombs. It does this by 00084 * calling codecvt::do_out. 00085 * 00086 * The source and destination character sets are determined by the 00087 * facet's locale, internal and external types. 00088 * 00089 * The characters in [from,from_end) are converted and written to 00090 * [to,to_end). from_next and to_next are set to point to the 00091 * character following the last successfully converted character, 00092 * respectively. If the result needed no conversion, from_next and 00093 * to_next are not affected. 00094 * 00095 * The @a state argument should be initialized if the input is at the 00096 * beginning and carried from a previous call if continuing 00097 * conversion. There are no guarantees about how @a state is used. 00098 * 00099 * The result returned is a member of codecvt_base::result. If 00100 * all the input is converted, returns codecvt_base::ok. If no 00101 * conversion is necessary, returns codecvt_base::noconv. If 00102 * the input ends early or there is insufficient space in the 00103 * output, returns codecvt_base::partial. Otherwise the 00104 * conversion failed and codecvt_base::error is returned. 00105 * 00106 * @param __state Persistent conversion state data. 00107 * @param __from Start of input. 00108 * @param __from_end End of input. 00109 * @param __from_next Returns start of unconverted data. 00110 * @param __to Start of output buffer. 00111 * @param __to_end End of output buffer. 00112 * @param __to_next Returns start of unused output area. 00113 * @return codecvt_base::result. 00114 */ 00115 result 00116 out(state_type& __state, const intern_type* __from, 00117 const intern_type* __from_end, const intern_type*& __from_next, 00118 extern_type* __to, extern_type* __to_end, 00119 extern_type*& __to_next) const 00120 { 00121 return this->do_out(__state, __from, __from_end, __from_next, 00122 __to, __to_end, __to_next); 00123 } 00124 00125 /** 00126 * @brief Reset conversion state. 00127 * 00128 * Writes characters to output that would restore @a state to initial 00129 * conditions. The idea is that if a partial conversion occurs, then 00130 * the converting the characters written by this function would leave 00131 * the state in initial conditions, rather than partial conversion 00132 * state. It does this by calling codecvt::do_unshift(). 00133 * 00134 * For example, if 4 external characters always converted to 1 internal 00135 * character, and input to in() had 6 external characters with state 00136 * saved, this function would write two characters to the output and 00137 * set the state to initialized conditions. 00138 * 00139 * The source and destination character sets are determined by the 00140 * facet's locale, internal and external types. 00141 * 00142 * The result returned is a member of codecvt_base::result. If the 00143 * state could be reset and data written, returns codecvt_base::ok. If 00144 * no conversion is necessary, returns codecvt_base::noconv. If the 00145 * output has insufficient space, returns codecvt_base::partial. 00146 * Otherwise the reset failed and codecvt_base::error is returned. 00147 * 00148 * @param __state Persistent conversion state data. 00149 * @param __to Start of output buffer. 00150 * @param __to_end End of output buffer. 00151 * @param __to_next Returns start of unused output area. 00152 * @return codecvt_base::result. 00153 */ 00154 result 00155 unshift(state_type& __state, extern_type* __to, extern_type* __to_end, 00156 extern_type*& __to_next) const 00157 { return this->do_unshift(__state, __to,__to_end,__to_next); } 00158 00159 /** 00160 * @brief Convert from external to internal character set. 00161 * 00162 * Converts input string of extern_type to output string of 00163 * intern_type. This is analogous to mbsrtowcs. It does this by 00164 * calling codecvt::do_in. 00165 * 00166 * The source and destination character sets are determined by the 00167 * facet's locale, internal and external types. 00168 * 00169 * The characters in [from,from_end) are converted and written to 00170 * [to,to_end). from_next and to_next are set to point to the 00171 * character following the last successfully converted character, 00172 * respectively. If the result needed no conversion, from_next and 00173 * to_next are not affected. 00174 * 00175 * The @a state argument should be initialized if the input is at the 00176 * beginning and carried from a previous call if continuing 00177 * conversion. There are no guarantees about how @a state is used. 00178 * 00179 * The result returned is a member of codecvt_base::result. If 00180 * all the input is converted, returns codecvt_base::ok. If no 00181 * conversion is necessary, returns codecvt_base::noconv. If 00182 * the input ends early or there is insufficient space in the 00183 * output, returns codecvt_base::partial. Otherwise the 00184 * conversion failed and codecvt_base::error is returned. 00185 * 00186 * @param __state Persistent conversion state data. 00187 * @param __from Start of input. 00188 * @param __from_end End of input. 00189 * @param __from_next Returns start of unconverted data. 00190 * @param __to Start of output buffer. 00191 * @param __to_end End of output buffer. 00192 * @param __to_next Returns start of unused output area. 00193 * @return codecvt_base::result. 00194 */ 00195 result 00196 in(state_type& __state, const extern_type* __from, 00197 const extern_type* __from_end, const extern_type*& __from_next, 00198 intern_type* __to, intern_type* __to_end, 00199 intern_type*& __to_next) const 00200 { 00201 return this->do_in(__state, __from, __from_end, __from_next, 00202 __to, __to_end, __to_next); 00203 } 00204 00205 int 00206 encoding() const throw() 00207 { return this->do_encoding(); } 00208 00209 bool 00210 always_noconv() const throw() 00211 { return this->do_always_noconv(); } 00212 00213 int 00214 length(state_type& __state, const extern_type* __from, 00215 const extern_type* __end, size_t __max) const 00216 { return this->do_length(__state, __from, __end, __max); } 00217 00218 int 00219 max_length() const throw() 00220 { return this->do_max_length(); } 00221 00222 protected: 00223 explicit 00224 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { } 00225 00226 virtual 00227 ~__codecvt_abstract_base() { } 00228 00229 /** 00230 * @brief Convert from internal to external character set. 00231 * 00232 * Converts input string of intern_type to output string of 00233 * extern_type. This function is a hook for derived classes to change 00234 * the value returned. @see out for more information. 00235 */ 00236 virtual result 00237 do_out(state_type& __state, const intern_type* __from, 00238 const intern_type* __from_end, const intern_type*& __from_next, 00239 extern_type* __to, extern_type* __to_end, 00240 extern_type*& __to_next) const = 0; 00241 00242 virtual result 00243 do_unshift(state_type& __state, extern_type* __to, 00244 extern_type* __to_end, extern_type*& __to_next) const = 0; 00245 00246 virtual result 00247 do_in(state_type& __state, const extern_type* __from, 00248 const extern_type* __from_end, const extern_type*& __from_next, 00249 intern_type* __to, intern_type* __to_end, 00250 intern_type*& __to_next) const = 0; 00251 00252 virtual int 00253 do_encoding() const throw() = 0; 00254 00255 virtual bool 00256 do_always_noconv() const throw() = 0; 00257 00258 virtual int 00259 do_length(state_type&, const extern_type* __from, 00260 const extern_type* __end, size_t __max) const = 0; 00261 00262 virtual int 00263 do_max_length() const throw() = 0; 00264 }; 00265 00266 /** 00267 * @brief Primary class template codecvt. 00268 * @ingroup locales 00269 * 00270 * NB: Generic, mostly useless implementation. 00271 * 00272 */ 00273 template<typename _InternT, typename _ExternT, typename _StateT> 00274 class codecvt 00275 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT> 00276 { 00277 public: 00278 // Types: 00279 typedef codecvt_base::result result; 00280 typedef _InternT intern_type; 00281 typedef _ExternT extern_type; 00282 typedef _StateT state_type; 00283 00284 protected: 00285 __c_locale _M_c_locale_codecvt; 00286 00287 public: 00288 static locale::id id; 00289 00290 explicit 00291 codecvt(size_t __refs = 0) 00292 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs), 00293 _M_c_locale_codecvt(0) 00294 { } 00295 00296 explicit 00297 codecvt(__c_locale __cloc, size_t __refs = 0); 00298 00299 protected: 00300 virtual 00301 ~codecvt() { } 00302 00303 virtual result 00304 do_out(state_type& __state, const intern_type* __from, 00305 const intern_type* __from_end, const intern_type*& __from_next, 00306 extern_type* __to, extern_type* __to_end, 00307 extern_type*& __to_next) const; 00308 00309 virtual result 00310 do_unshift(state_type& __state, extern_type* __to, 00311 extern_type* __to_end, extern_type*& __to_next) const; 00312 00313 virtual result 00314 do_in(state_type& __state, const extern_type* __from, 00315 const extern_type* __from_end, const extern_type*& __from_next, 00316 intern_type* __to, intern_type* __to_end, 00317 intern_type*& __to_next) const; 00318 00319 virtual int 00320 do_encoding() const throw(); 00321 00322 virtual bool 00323 do_always_noconv() const throw(); 00324 00325 virtual int 00326 do_length(state_type&, const extern_type* __from, 00327 const extern_type* __end, size_t __max) const; 00328 00329 virtual int 00330 do_max_length() const throw(); 00331 }; 00332 00333 template<typename _InternT, typename _ExternT, typename _StateT> 00334 locale::id codecvt<_InternT, _ExternT, _StateT>::id; 00335 00336 /// class codecvt<char, char, mbstate_t> specialization. 00337 template<> 00338 class codecvt<char, char, mbstate_t> 00339 : public __codecvt_abstract_base<char, char, mbstate_t> 00340 { 00341 friend class messages<char>; 00342 00343 public: 00344 // Types: 00345 typedef char intern_type; 00346 typedef char extern_type; 00347 typedef mbstate_t state_type; 00348 00349 protected: 00350 __c_locale _M_c_locale_codecvt; 00351 00352 public: 00353 static locale::id id; 00354 00355 explicit 00356 codecvt(size_t __refs = 0); 00357 00358 explicit 00359 codecvt(__c_locale __cloc, size_t __refs = 0); 00360 00361 protected: 00362 virtual 00363 ~codecvt(); 00364 00365 virtual result 00366 do_out(state_type& __state, const intern_type* __from, 00367 const intern_type* __from_end, const intern_type*& __from_next, 00368 extern_type* __to, extern_type* __to_end, 00369 extern_type*& __to_next) const; 00370 00371 virtual result 00372 do_unshift(state_type& __state, extern_type* __to, 00373 extern_type* __to_end, extern_type*& __to_next) const; 00374 00375 virtual result 00376 do_in(state_type& __state, const extern_type* __from, 00377 const extern_type* __from_end, const extern_type*& __from_next, 00378 intern_type* __to, intern_type* __to_end, 00379 intern_type*& __to_next) const; 00380 00381 virtual int 00382 do_encoding() const throw(); 00383 00384 virtual bool 00385 do_always_noconv() const throw(); 00386 00387 virtual int 00388 do_length(state_type&, const extern_type* __from, 00389 const extern_type* __end, size_t __max) const; 00390 00391 virtual int 00392 do_max_length() const throw(); 00393 }; 00394 00395 #ifdef _GLIBCXX_USE_WCHAR_T 00396 /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization. 00397 * 00398 * Converts between narrow and wide characters in the native character set 00399 */ 00400 template<> 00401 class codecvt<wchar_t, char, mbstate_t> 00402 : public __codecvt_abstract_base<wchar_t, char, mbstate_t> 00403 { 00404 friend class messages<wchar_t>; 00405 00406 public: 00407 // Types: 00408 typedef wchar_t intern_type; 00409 typedef char extern_type; 00410 typedef mbstate_t state_type; 00411 00412 protected: 00413 __c_locale _M_c_locale_codecvt; 00414 00415 public: 00416 static locale::id id; 00417 00418 explicit 00419 codecvt(size_t __refs = 0); 00420 00421 explicit 00422 codecvt(__c_locale __cloc, size_t __refs = 0); 00423 00424 protected: 00425 virtual 00426 ~codecvt(); 00427 00428 virtual result 00429 do_out(state_type& __state, const intern_type* __from, 00430 const intern_type* __from_end, const intern_type*& __from_next, 00431 extern_type* __to, extern_type* __to_end, 00432 extern_type*& __to_next) const; 00433 00434 virtual result 00435 do_unshift(state_type& __state, 00436 extern_type* __to, extern_type* __to_end, 00437 extern_type*& __to_next) const; 00438 00439 virtual result 00440 do_in(state_type& __state, 00441 const extern_type* __from, const extern_type* __from_end, 00442 const extern_type*& __from_next, 00443 intern_type* __to, intern_type* __to_end, 00444 intern_type*& __to_next) const; 00445 00446 virtual 00447 int do_encoding() const throw(); 00448 00449 virtual 00450 bool do_always_noconv() const throw(); 00451 00452 virtual 00453 int do_length(state_type&, const extern_type* __from, 00454 const extern_type* __end, size_t __max) const; 00455 00456 virtual int 00457 do_max_length() const throw(); 00458 }; 00459 #endif //_GLIBCXX_USE_WCHAR_T 00460 00461 #if __cplusplus >= 201103L 00462 #ifdef _GLIBCXX_USE_C99_STDINT_TR1 00463 /** @brief Class codecvt<char16_t, char, mbstate_t> specialization. 00464 * 00465 * Converts between UTF-16 and UTF-8. 00466 */ 00467 template<> 00468 class codecvt<char16_t, char, mbstate_t> 00469 : public __codecvt_abstract_base<char16_t, char, mbstate_t> 00470 { 00471 public: 00472 // Types: 00473 typedef char16_t intern_type; 00474 typedef char extern_type; 00475 typedef mbstate_t state_type; 00476 00477 public: 00478 static locale::id id; 00479 00480 explicit 00481 codecvt(size_t __refs = 0) 00482 : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { } 00483 00484 protected: 00485 virtual 00486 ~codecvt(); 00487 00488 virtual result 00489 do_out(state_type& __state, const intern_type* __from, 00490 const intern_type* __from_end, const intern_type*& __from_next, 00491 extern_type* __to, extern_type* __to_end, 00492 extern_type*& __to_next) const; 00493 00494 virtual result 00495 do_unshift(state_type& __state, 00496 extern_type* __to, extern_type* __to_end, 00497 extern_type*& __to_next) const; 00498 00499 virtual result 00500 do_in(state_type& __state, 00501 const extern_type* __from, const extern_type* __from_end, 00502 const extern_type*& __from_next, 00503 intern_type* __to, intern_type* __to_end, 00504 intern_type*& __to_next) const; 00505 00506 virtual 00507 int do_encoding() const throw(); 00508 00509 virtual 00510 bool do_always_noconv() const throw(); 00511 00512 virtual 00513 int do_length(state_type&, const extern_type* __from, 00514 const extern_type* __end, size_t __max) const; 00515 00516 virtual int 00517 do_max_length() const throw(); 00518 }; 00519 00520 /** @brief Class codecvt<char32_t, char, mbstate_t> specialization. 00521 * 00522 * Converts between UTF-32 and UTF-8. 00523 */ 00524 template<> 00525 class codecvt<char32_t, char, mbstate_t> 00526 : public __codecvt_abstract_base<char32_t, char, mbstate_t> 00527 { 00528 public: 00529 // Types: 00530 typedef char32_t intern_type; 00531 typedef char extern_type; 00532 typedef mbstate_t state_type; 00533 00534 public: 00535 static locale::id id; 00536 00537 explicit 00538 codecvt(size_t __refs = 0) 00539 : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { } 00540 00541 protected: 00542 virtual 00543 ~codecvt(); 00544 00545 virtual result 00546 do_out(state_type& __state, const intern_type* __from, 00547 const intern_type* __from_end, const intern_type*& __from_next, 00548 extern_type* __to, extern_type* __to_end, 00549 extern_type*& __to_next) const; 00550 00551 virtual result 00552 do_unshift(state_type& __state, 00553 extern_type* __to, extern_type* __to_end, 00554 extern_type*& __to_next) const; 00555 00556 virtual result 00557 do_in(state_type& __state, 00558 const extern_type* __from, const extern_type* __from_end, 00559 const extern_type*& __from_next, 00560 intern_type* __to, intern_type* __to_end, 00561 intern_type*& __to_next) const; 00562 00563 virtual 00564 int do_encoding() const throw(); 00565 00566 virtual 00567 bool do_always_noconv() const throw(); 00568 00569 virtual 00570 int do_length(state_type&, const extern_type* __from, 00571 const extern_type* __end, size_t __max) const; 00572 00573 virtual int 00574 do_max_length() const throw(); 00575 }; 00576 00577 #endif // _GLIBCXX_USE_C99_STDINT_TR1 00578 #endif // C++11 00579 00580 /// class codecvt_byname [22.2.1.6]. 00581 template<typename _InternT, typename _ExternT, typename _StateT> 00582 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> 00583 { 00584 public: 00585 explicit 00586 codecvt_byname(const char* __s, size_t __refs = 0) 00587 : codecvt<_InternT, _ExternT, _StateT>(__refs) 00588 { 00589 if (__builtin_strcmp(__s, "C") != 0 00590 && __builtin_strcmp(__s, "POSIX") != 0) 00591 { 00592 this->_S_destroy_c_locale(this->_M_c_locale_codecvt); 00593 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s); 00594 } 00595 } 00596 00597 #if __cplusplus >= 201103L 00598 explicit 00599 codecvt_byname(const string& __s, size_t __refs = 0) 00600 : codecvt_byname(__s.c_str(), __refs) { } 00601 #endif 00602 00603 protected: 00604 virtual 00605 ~codecvt_byname() { } 00606 }; 00607 00608 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1) 00609 template<> 00610 class codecvt_byname<char16_t, char, mbstate_t> 00611 : public codecvt<char16_t, char, mbstate_t> 00612 { 00613 public: 00614 explicit 00615 codecvt_byname(const char* __s, size_t __refs = 0) 00616 : codecvt<char16_t, char, mbstate_t>(__refs) { } 00617 00618 explicit 00619 codecvt_byname(const string& __s, size_t __refs = 0) 00620 : codecvt_byname(__s.c_str(), __refs) { } 00621 00622 protected: 00623 virtual 00624 ~codecvt_byname() { } 00625 }; 00626 00627 template<> 00628 class codecvt_byname<char32_t, char, mbstate_t> 00629 : public codecvt<char32_t, char, mbstate_t> 00630 { 00631 public: 00632 explicit 00633 codecvt_byname(const char* __s, size_t __refs = 0) 00634 : codecvt<char32_t, char, mbstate_t>(__refs) { } 00635 00636 explicit 00637 codecvt_byname(const string& __s, size_t __refs = 0) 00638 : codecvt_byname(__s.c_str(), __refs) { } 00639 00640 protected: 00641 virtual 00642 ~codecvt_byname() { } 00643 }; 00644 #endif 00645 00646 // Inhibit implicit instantiations for required instantiations, 00647 // which are defined via explicit instantiations elsewhere. 00648 #if _GLIBCXX_EXTERN_TEMPLATE 00649 extern template class codecvt_byname<char, char, mbstate_t>; 00650 00651 extern template 00652 const codecvt<char, char, mbstate_t>& 00653 use_facet<codecvt<char, char, mbstate_t> >(const locale&); 00654 00655 extern template 00656 bool 00657 has_facet<codecvt<char, char, mbstate_t> >(const locale&); 00658 00659 #ifdef _GLIBCXX_USE_WCHAR_T 00660 extern template class codecvt_byname<wchar_t, char, mbstate_t>; 00661 00662 extern template 00663 const codecvt<wchar_t, char, mbstate_t>& 00664 use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 00665 00666 extern template 00667 bool 00668 has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 00669 #endif 00670 00671 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1) 00672 extern template class codecvt_byname<char16_t, char, mbstate_t>; 00673 extern template class codecvt_byname<char32_t, char, mbstate_t>; 00674 #endif 00675 00676 #endif 00677 00678 _GLIBCXX_END_NAMESPACE_VERSION 00679 } // namespace std 00680 00681 #endif // _CODECVT_H