codecvt.h

Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
00004 // 2009, 2010  Free Software Foundation, Inc.
00005 //
00006 // This file is part of the GNU ISO C++ Library.  This library is free
00007 // software; you can redistribute it and/or modify it under the
00008 // terms of the GNU General Public License as published by the
00009 // Free Software Foundation; either version 3, or (at your option)
00010 // any later version.
00011 
00012 // This library is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 
00017 // Under Section 7 of GPL version 3, you are granted additional
00018 // permissions described in the GCC Runtime Library Exception, version
00019 // 3.1, as published by the Free Software Foundation.
00020 
00021 // You should have received a copy of the GNU General Public License and
00022 // a copy of the GCC Runtime Library Exception along with this program;
00023 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00024 // <http://www.gnu.org/licenses/>.
00025 
00026 /** @file bits/codecvt.h
00027  *  This is an internal header file, included by other library headers.
00028  *  You should not attempt to use it directly.
00029  */
00030 
00031 //
00032 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00033 //
00034 
00035 // Written by Benjamin Kosnik <bkoz@redhat.com>
00036 
00037 #ifndef _CODECVT_H
00038 #define _CODECVT_H 1
00039 
00040 #pragma GCC system_header
00041 
00042 _GLIBCXX_BEGIN_NAMESPACE(std)
00043 
00044   /// Empty base class for codecvt facet [22.2.1.5].
00045   class codecvt_base
00046   {
00047   public:
00048     enum result
00049     {
00050       ok,
00051       partial,
00052       error,
00053       noconv
00054     };
00055   };
00056 
00057   /**
00058    *  @brief  Common base for codecvt functions.
00059    *
00060    *  This template class provides implementations of the public functions
00061    *  that forward to the protected virtual functions.
00062    *
00063    *  This template also provides abstract stubs for the protected virtual
00064    *  functions.
00065   */
00066   template<typename _InternT, typename _ExternT, typename _StateT>
00067     class __codecvt_abstract_base
00068     : public locale::facet, public codecvt_base
00069     {
00070     public:
00071       // Types:
00072       typedef codecvt_base::result  result;
00073       typedef _InternT          intern_type;
00074       typedef _ExternT          extern_type;
00075       typedef _StateT           state_type;
00076 
00077       // 22.2.1.5.1 codecvt members
00078       /**
00079        *  @brief  Convert from internal to external character set.
00080        *
00081        *  Converts input string of intern_type to output string of
00082        *  extern_type.  This is analogous to wcsrtombs.  It does this by
00083        *  calling codecvt::do_out.
00084        *
00085        *  The source and destination character sets are determined by the
00086        *  facet's locale, internal and external types.
00087        *
00088        *  The characters in [from,from_end) are converted and written to
00089        *  [to,to_end).  from_next and to_next are set to point to the
00090        *  character following the last successfully converted character,
00091        *  respectively.  If the result needed no conversion, from_next and
00092        *  to_next are not affected.
00093        *
00094        *  The @a state argument should be initialized if the input is at the
00095        *  beginning and carried from a previous call if continuing
00096        *  conversion.  There are no guarantees about how @a state is used.
00097        *
00098        *  The result returned is a member of codecvt_base::result.  If
00099        *  all the input is converted, returns codecvt_base::ok.  If no
00100        *  conversion is necessary, returns codecvt_base::noconv.  If
00101        *  the input ends early or there is insufficient space in the
00102        *  output, returns codecvt_base::partial.  Otherwise the
00103        *  conversion failed and codecvt_base::error is returned.
00104        *
00105        *  @param  state  Persistent conversion state data.
00106        *  @param  from  Start of input.
00107        *  @param  from_end  End of input.
00108        *  @param  from_next  Returns start of unconverted data.
00109        *  @param  to  Start of output buffer.
00110        *  @param  to_end  End of output buffer.
00111        *  @param  to_next  Returns start of unused output area.
00112        *  @return  codecvt_base::result.
00113       */
00114       result
00115       out(state_type& __state, const intern_type* __from,
00116       const intern_type* __from_end, const intern_type*& __from_next,
00117       extern_type* __to, extern_type* __to_end,
00118       extern_type*& __to_next) const
00119       {
00120     return this->do_out(__state, __from, __from_end, __from_next,
00121                 __to, __to_end, __to_next);
00122       }
00123 
00124       /**
00125        *  @brief  Reset conversion state.
00126        *
00127        *  Writes characters to output that would restore @a state to initial
00128        *  conditions.  The idea is that if a partial conversion occurs, then
00129        *  the converting the characters written by this function would leave
00130        *  the state in initial conditions, rather than partial conversion
00131        *  state.  It does this by calling codecvt::do_unshift().
00132        *
00133        *  For example, if 4 external characters always converted to 1 internal
00134        *  character, and input to in() had 6 external characters with state
00135        *  saved, this function would write two characters to the output and
00136        *  set the state to initialized conditions.
00137        *
00138        *  The source and destination character sets are determined by the
00139        *  facet's locale, internal and external types.
00140        *
00141        *  The result returned is a member of codecvt_base::result.  If the
00142        *  state could be reset and data written, returns codecvt_base::ok.  If
00143        *  no conversion is necessary, returns codecvt_base::noconv.  If the
00144        *  output has insufficient space, returns codecvt_base::partial.
00145        *  Otherwise the reset failed and codecvt_base::error is returned.
00146        *
00147        *  @param  state  Persistent conversion state data.
00148        *  @param  to  Start of output buffer.
00149        *  @param  to_end  End of output buffer.
00150        *  @param  to_next  Returns start of unused output area.
00151        *  @return  codecvt_base::result.
00152       */
00153       result
00154       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
00155           extern_type*& __to_next) const
00156       { return this->do_unshift(__state, __to,__to_end,__to_next); }
00157 
00158       /**
00159        *  @brief  Convert from external to internal character set.
00160        *
00161        *  Converts input string of extern_type to output string of
00162        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
00163        *  calling codecvt::do_in.
00164        *
00165        *  The source and destination character sets are determined by the
00166        *  facet's locale, internal and external types.
00167        *
00168        *  The characters in [from,from_end) are converted and written to
00169        *  [to,to_end).  from_next and to_next are set to point to the
00170        *  character following the last successfully converted character,
00171        *  respectively.  If the result needed no conversion, from_next and
00172        *  to_next are not affected.
00173        *
00174        *  The @a state argument should be initialized if the input is at the
00175        *  beginning and carried from a previous call if continuing
00176        *  conversion.  There are no guarantees about how @a state is used.
00177        *
00178        *  The result returned is a member of codecvt_base::result.  If
00179        *  all the input is converted, returns codecvt_base::ok.  If no
00180        *  conversion is necessary, returns codecvt_base::noconv.  If
00181        *  the input ends early or there is insufficient space in the
00182        *  output, returns codecvt_base::partial.  Otherwise the
00183        *  conversion failed and codecvt_base::error is returned.
00184        *
00185        *  @param  state  Persistent conversion state data.
00186        *  @param  from  Start of input.
00187        *  @param  from_end  End of input.
00188        *  @param  from_next  Returns start of unconverted data.
00189        *  @param  to  Start of output buffer.
00190        *  @param  to_end  End of output buffer.
00191        *  @param  to_next  Returns start of unused output area.
00192        *  @return  codecvt_base::result.
00193       */
00194       result
00195       in(state_type& __state, const extern_type* __from,
00196      const extern_type* __from_end, const extern_type*& __from_next,
00197      intern_type* __to, intern_type* __to_end,
00198      intern_type*& __to_next) const
00199       {
00200     return this->do_in(__state, __from, __from_end, __from_next,
00201                __to, __to_end, __to_next);
00202       }
00203 
00204       int
00205       encoding() const throw()
00206       { return this->do_encoding(); }
00207 
00208       bool
00209       always_noconv() const throw()
00210       { return this->do_always_noconv(); }
00211 
00212       int
00213       length(state_type& __state, const extern_type* __from,
00214          const extern_type* __end, size_t __max) const
00215       { return this->do_length(__state, __from, __end, __max); }
00216 
00217       int
00218       max_length() const throw()
00219       { return this->do_max_length(); }
00220 
00221     protected:
00222       explicit
00223       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
00224 
00225       virtual
00226       ~__codecvt_abstract_base() { }
00227 
00228       /**
00229        *  @brief  Convert from internal to external character set.
00230        *
00231        *  Converts input string of intern_type to output string of
00232        *  extern_type.  This function is a hook for derived classes to change
00233        *  the value returned.  @see out for more information.
00234       */
00235       virtual result
00236       do_out(state_type& __state, const intern_type* __from,
00237          const intern_type* __from_end, const intern_type*& __from_next,
00238          extern_type* __to, extern_type* __to_end,
00239          extern_type*& __to_next) const = 0;
00240 
00241       virtual result
00242       do_unshift(state_type& __state, extern_type* __to,
00243          extern_type* __to_end, extern_type*& __to_next) const = 0;
00244 
00245       virtual result
00246       do_in(state_type& __state, const extern_type* __from,
00247         const extern_type* __from_end, const extern_type*& __from_next,
00248         intern_type* __to, intern_type* __to_end,
00249         intern_type*& __to_next) const = 0;
00250 
00251       virtual int
00252       do_encoding() const throw() = 0;
00253 
00254       virtual bool
00255       do_always_noconv() const throw() = 0;
00256 
00257       virtual int
00258       do_length(state_type&, const extern_type* __from,
00259         const extern_type* __end, size_t __max) const = 0;
00260 
00261       virtual int
00262       do_max_length() const throw() = 0;
00263     };
00264 
00265 
00266 
00267   /**
00268    *  @brief  Primary class template codecvt.
00269    *  @ingroup locales
00270    *
00271    *  NB: Generic, mostly useless implementation.
00272    *
00273   */
00274    template<typename _InternT, typename _ExternT, typename _StateT>
00275     class codecvt
00276     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
00277     {
00278     public:
00279       // Types:
00280       typedef codecvt_base::result  result;
00281       typedef _InternT          intern_type;
00282       typedef _ExternT          extern_type;
00283       typedef _StateT           state_type;
00284 
00285     protected:
00286       __c_locale            _M_c_locale_codecvt;
00287 
00288     public:
00289       static locale::id         id;
00290 
00291       explicit
00292       codecvt(size_t __refs = 0)
00293       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
00294 
00295       explicit
00296       codecvt(__c_locale __cloc, size_t __refs = 0);
00297 
00298     protected:
00299       virtual
00300       ~codecvt() { }
00301 
00302       virtual result
00303       do_out(state_type& __state, const intern_type* __from,
00304          const intern_type* __from_end, const intern_type*& __from_next,
00305          extern_type* __to, extern_type* __to_end,
00306          extern_type*& __to_next) const;
00307 
00308       virtual result
00309       do_unshift(state_type& __state, extern_type* __to,
00310          extern_type* __to_end, extern_type*& __to_next) const;
00311 
00312       virtual result
00313       do_in(state_type& __state, const extern_type* __from,
00314         const extern_type* __from_end, const extern_type*& __from_next,
00315         intern_type* __to, intern_type* __to_end,
00316         intern_type*& __to_next) const;
00317 
00318       virtual int
00319       do_encoding() const throw();
00320 
00321       virtual bool
00322       do_always_noconv() const throw();
00323 
00324       virtual int
00325       do_length(state_type&, const extern_type* __from,
00326         const extern_type* __end, size_t __max) const;
00327 
00328       virtual int
00329       do_max_length() const throw();
00330     };
00331 
00332   template<typename _InternT, typename _ExternT, typename _StateT>
00333     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
00334 
00335   /// class codecvt<char, char, mbstate_t> specialization.
00336   template<>
00337     class codecvt<char, char, mbstate_t>
00338     : public __codecvt_abstract_base<char, char, mbstate_t>
00339     {
00340     public:
00341       // Types:
00342       typedef char          intern_type;
00343       typedef char          extern_type;
00344       typedef mbstate_t         state_type;
00345 
00346     protected:
00347       __c_locale            _M_c_locale_codecvt;
00348 
00349     public:
00350       static locale::id id;
00351 
00352       explicit
00353       codecvt(size_t __refs = 0);
00354 
00355       explicit
00356       codecvt(__c_locale __cloc, size_t __refs = 0);
00357 
00358     protected:
00359       virtual
00360       ~codecvt();
00361 
00362       virtual result
00363       do_out(state_type& __state, const intern_type* __from,
00364          const intern_type* __from_end, const intern_type*& __from_next,
00365          extern_type* __to, extern_type* __to_end,
00366          extern_type*& __to_next) const;
00367 
00368       virtual result
00369       do_unshift(state_type& __state, extern_type* __to,
00370          extern_type* __to_end, extern_type*& __to_next) const;
00371 
00372       virtual result
00373       do_in(state_type& __state, const extern_type* __from,
00374         const extern_type* __from_end, const extern_type*& __from_next,
00375         intern_type* __to, intern_type* __to_end,
00376         intern_type*& __to_next) const;
00377 
00378       virtual int
00379       do_encoding() const throw();
00380 
00381       virtual bool
00382       do_always_noconv() const throw();
00383 
00384       virtual int
00385       do_length(state_type&, const extern_type* __from,
00386         const extern_type* __end, size_t __max) const;
00387 
00388       virtual int
00389       do_max_length() const throw();
00390   };
00391 
00392 #ifdef _GLIBCXX_USE_WCHAR_T
00393   /// class codecvt<wchar_t, char, mbstate_t> specialization.
00394   template<>
00395     class codecvt<wchar_t, char, mbstate_t>
00396     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
00397     {
00398     public:
00399       // Types:
00400       typedef wchar_t           intern_type;
00401       typedef char          extern_type;
00402       typedef mbstate_t         state_type;
00403 
00404     protected:
00405       __c_locale            _M_c_locale_codecvt;
00406 
00407     public:
00408       static locale::id         id;
00409 
00410       explicit
00411       codecvt(size_t __refs = 0);
00412 
00413       explicit
00414       codecvt(__c_locale __cloc, size_t __refs = 0);
00415 
00416     protected:
00417       virtual
00418       ~codecvt();
00419 
00420       virtual result
00421       do_out(state_type& __state, const intern_type* __from,
00422          const intern_type* __from_end, const intern_type*& __from_next,
00423          extern_type* __to, extern_type* __to_end,
00424          extern_type*& __to_next) const;
00425 
00426       virtual result
00427       do_unshift(state_type& __state,
00428          extern_type* __to, extern_type* __to_end,
00429          extern_type*& __to_next) const;
00430 
00431       virtual result
00432       do_in(state_type& __state,
00433          const extern_type* __from, const extern_type* __from_end,
00434          const extern_type*& __from_next,
00435          intern_type* __to, intern_type* __to_end,
00436          intern_type*& __to_next) const;
00437 
00438       virtual
00439       int do_encoding() const throw();
00440 
00441       virtual
00442       bool do_always_noconv() const throw();
00443 
00444       virtual
00445       int do_length(state_type&, const extern_type* __from,
00446             const extern_type* __end, size_t __max) const;
00447 
00448       virtual int
00449       do_max_length() const throw();
00450     };
00451 #endif //_GLIBCXX_USE_WCHAR_T
00452 
00453   /// class codecvt_byname [22.2.1.6].
00454   template<typename _InternT, typename _ExternT, typename _StateT>
00455     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
00456     {
00457     public:
00458       explicit
00459       codecvt_byname(const char* __s, size_t __refs = 0)
00460       : codecvt<_InternT, _ExternT, _StateT>(__refs)
00461       {
00462     if (__builtin_strcmp(__s, "C") != 0
00463         && __builtin_strcmp(__s, "POSIX") != 0)
00464       {
00465         this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
00466         this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
00467       }
00468       }
00469 
00470     protected:
00471       virtual
00472       ~codecvt_byname() { }
00473     };
00474 
00475   // Inhibit implicit instantiations for required instantiations,
00476   // which are defined via explicit instantiations elsewhere.
00477   // NB: This syntax is a GNU extension.
00478 #if _GLIBCXX_EXTERN_TEMPLATE
00479   extern template class codecvt_byname<char, char, mbstate_t>;
00480 
00481   extern template
00482     const codecvt<char, char, mbstate_t>&
00483     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
00484 
00485   extern template
00486     bool
00487     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
00488 
00489 #ifdef _GLIBCXX_USE_WCHAR_T
00490   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
00491 
00492   extern template
00493     const codecvt<wchar_t, char, mbstate_t>&
00494     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00495 
00496   extern template
00497     bool
00498     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
00499 #endif
00500 #endif
00501 
00502 _GLIBCXX_END_NAMESPACE
00503 
00504 #endif // _CODECVT_H