llvm-gcc-4.2/libstdc++-v3/testsuite/22_locale/codecvt/unicode/char.cc - llvm-archive - Git at Google

 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>

 // Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
 // terms of the GNU General Public License as published by the
 // Free Software Foundation; either version 2, or (at your option)
 // any later version.

 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License for more details.

 // You should have received a copy of the GNU General Public License along
 // with this library; see the file COPYING.  If not, write to the Free
 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
 // USA.

 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
 #include <locale>
 #include <testsuite_hooks.h>

 #ifdef _GLIBCXX_USE___ENC_TRAITS

 // Need some char_traits specializations for this to work.
 typedef unsigned short			unicode_t;

 namespace std
 {
   template<>
     struct char_traits<unicode_t>
     {
       typedef unicode_t 	char_type;
       // Unsigned as wint_t is unsigned.
       typedef unsigned long  	int_type;
       typedef streampos 	pos_type;
       typedef streamoff 	off_type;
       typedef mbstate_t 	state_type;

       static void
       assign(char_type& __c1, const char_type& __c2);

       static bool
       eq(const char_type& __c1, const char_type& __c2);

       static bool
       lt(const char_type& __c1, const char_type& __c2);

       static int
       compare(const char_type* __s1, const char_type* __s2, size_t __n)
       { return memcmp(__s1, __s2, __n); }

       static size_t
       length(const char_type* __s);

       static const char_type*
       find(const char_type* __s, size_t __n, const char_type& __a);

       static char_type*
       move(char_type* __s1, const char_type* __s2, size_t __n);

       static char_type*
       copy(char_type* __s1, const char_type* __s2, size_t __n)
       {  return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }

       static char_type*
       assign(char_type* __s, size_t __n, char_type __a);

       static char_type
       to_char_type(const int_type& __c);

       static int_type
       to_int_type(const char_type& __c);

       static bool
       eq_int_type(const int_type& __c1, const int_type& __c2);

       static int_type
       eof();

       static int_type
       not_eof(const int_type& __c);
     };
 }

 /*
 > how do I check that these conversions are correct?
 Very easy.  Since all the characters are from ASCII you simply
 zero-extend the values.

 drepper$ echo 'black pearl jasmine tea' | od -t x1
 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
 0000020 69 6e 65 20 74 65 61 0a

 So the UCS-2 string is

 0x0062, 0x006c, 0x0061, ...

 You get the idea.  With iconv() you have to take care of the
 byte-order, though.  UCS-2 can mean little- or big endian.  Looking at
 your result

 > $9 = 25856

 it shows that the other byte-order is used (25856 == 0x6500).
 */


 void
 initialize_state(std::__enc_traits& state)
 { state._M_init(); }

 // Partial specialization using __enc_traits.
 // codecvt<unicode_t, char, __enc_traits>
 // UNICODE - UCS2 (big endian)
 void test01()
 {
   using namespace std;
   typedef codecvt_base::result			result;
   typedef unicode_t				int_type;
   typedef char					ext_type;
   typedef __enc_traits				enc_type;
   typedef codecvt<int_type, ext_type, enc_type>	unicode_codecvt;
   typedef char_traits<int_type>			int_traits;
   typedef char_traits<ext_type>			ext_traits;

   bool test __attribute__((unused)) = true;
   const ext_type* 	e_lit = "black pearl jasmine tea";
   int 			size = strlen(e_lit);

   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
   {
     0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
     0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
     0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
     0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
   };
   const int_type* 	i_lit = reinterpret_cast<int_type*>(i_lit_base);

   const ext_type*       efrom_next;
   const int_type*       ifrom_next;
   ext_type* 		e_arr = new ext_type[size + 1];
   ext_type*		eto_next;
   int_type* 		i_arr = new int_type[size + 1];
   int_type*		ito_next;

   // construct a locale object with the specialized facet.
   locale 		loc(locale::classic(), new unicode_codecvt);
   // sanity check the constructed locale has the specialized facet.
   VERIFY( has_facet<unicode_codecvt>(loc) );
   const unicode_codecvt&	cvt = use_facet<unicode_codecvt>(loc);

   // in
   //  unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
   unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0, 0);
   initialize_state(state01);
   // internal encoding is bigger because of bom
   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 		     i_arr, i_arr + size + 1, ito_next);
   VERIFY( r1 == codecvt_base::ok );
   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
   VERIFY( efrom_next == e_lit + size );
   VERIFY( ito_next == i_arr + size );

   // out
   unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0, 0);
   initialize_state(state02);
   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 		       e_arr, e_arr + size, eto_next);
   VERIFY( r2 == codecvt_base::ok );
   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
   VERIFY( ifrom_next == i_lit + size );
   VERIFY( eto_next == e_arr + size );

   // unshift
   ext_traits::copy(e_arr, e_lit, size);
   unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0, 0);
   initialize_state(state03);
   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
   VERIFY( r3 == codecvt_base::noconv );
   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
   VERIFY( eto_next == e_arr );

   int i = cvt.encoding();
   VERIFY( i == 2 ); // Target-dependent.

   VERIFY( !cvt.always_noconv() );

   unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0, 0);
   initialize_state(state04);
   int j = cvt.length(state03, e_lit, e_lit + size, 5);
   VERIFY( j == 5 );

   int k = cvt.max_length();
   VERIFY( k == 1 );

   delete [] e_arr;
   delete [] i_arr;
 }

 // Partial specialization using __enc_traits.
 // codecvt<unicode_t, char, __enc_traits>
 // UNICODE - UCS2 (little endian)
 void test02()
 {
   using namespace std;
   typedef codecvt_base::result			result;
   typedef unsigned short			unicode_t;
   typedef unicode_t				int_type;
   typedef char					ext_type;
   typedef __enc_traits				enc_type;
   typedef codecvt<int_type, ext_type, enc_type>	unicode_codecvt;
   typedef char_traits<int_type>			int_traits;
   typedef char_traits<ext_type>			ext_traits;

   bool test __attribute__((unused)) = true;
   const ext_type* 	e_lit = "black pearl jasmine tea";
   int 			size = strlen(e_lit);

   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
   {
     0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
     0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
     0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
     0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
   };
   const int_type* 	i_lit = reinterpret_cast<int_type*>(i_lit_base);

   const ext_type*       efrom_next;
   const int_type*       ifrom_next;
   ext_type* 		e_arr = new ext_type[size + 1];
   ext_type*		eto_next;
   int_type* 		i_arr = new int_type[size + 1];
   int_type*		ito_next;

   // construct a locale object with the specialized facet.
   locale 		loc(locale::classic(), new unicode_codecvt);
   // sanity check the constructed locale has the specialized facet.
   VERIFY( has_facet<unicode_codecvt>(loc) );
   const unicode_codecvt&	cvt = use_facet<unicode_codecvt>(loc);

   // in
   unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0, 0);
   initialize_state(state01);
   // internal encoding is bigger because of bom
   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 		     i_arr, i_arr + size + 1, ito_next);
   VERIFY( r1 == codecvt_base::ok );
   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
   VERIFY( efrom_next == e_lit + size );
   VERIFY( ito_next == i_arr + size );

   // out
   unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0, 0);
   initialize_state(state02);
   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 		       e_arr, e_arr + size, eto_next);
   VERIFY( r2 == codecvt_base::ok );
   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
   VERIFY( ifrom_next == i_lit + size );
   VERIFY( eto_next == e_arr + size );

   // unshift
   ext_traits::copy(e_arr, e_lit, size);
   unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0, 0);
   initialize_state(state03);
   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
   VERIFY( r3 == codecvt_base::noconv );
   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
   VERIFY( eto_next == e_arr );

   int i = cvt.encoding();
   VERIFY( i == 2 ); // Target-dependent.

   VERIFY( !cvt.always_noconv() );

   unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0, 0);
   initialize_state(state04);
   int j = cvt.length(state03, e_lit, e_lit + size, 5);
   VERIFY( j == 5 );

   int k = cvt.max_length();
   VERIFY( k == 1 );

   delete [] e_arr;
   delete [] i_arr;
 }

 #endif // _GLIBCXX_USE___ENC_TRAITS

 int main ()
 {
 #if _GLIBCXX_USE___ENC_TRAITS
   test01();
   test02();
 #endif
   return 0;
 }
	// 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>

	// Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation
	//
	// This file is part of the GNU ISO C++ Library. This library is free
	// software; you can redistribute it and/or modify it under the
	// terms of the GNU General Public License as published by the
	// Free Software Foundation; either version 2, or (at your option)
	// any later version.

	// This library is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	// GNU General Public License for more details.

	// You should have received a copy of the GNU General Public License along
	// with this library; see the file COPYING. If not, write to the Free
	// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
	// USA.

	// 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
	#include <locale>
	#include <testsuite_hooks.h>

	#ifdef _GLIBCXX_USE___ENC_TRAITS

	// Need some char_traits specializations for this to work.
	typedef unsigned short unicode_t;

	namespace std
	{
	template<>
	struct char_traits<unicode_t>
	{
	typedef unicode_t char_type;
	// Unsigned as wint_t is unsigned.
	typedef unsigned long int_type;
	typedef streampos pos_type;
	typedef streamoff off_type;
	typedef mbstate_t state_type;

	static void
	assign(char_type& __c1, const char_type& __c2);

	static bool
	eq(const char_type& __c1, const char_type& __c2);

	static bool
	lt(const char_type& __c1, const char_type& __c2);

	static int
	compare(const char_type* __s1, const char_type* __s2, size_t __n)
	{ return memcmp(__s1, __s2, __n); }

	static size_t
	length(const char_type* __s);

	static const char_type*
	find(const char_type* __s, size_t __n, const char_type& __a);

	static char_type*
	move(char_type* __s1, const char_type* __s2, size_t __n);

	static char_type*
	copy(char_type* __s1, const char_type* __s2, size_t __n)
	{ return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }

	static char_type*
	assign(char_type* __s, size_t __n, char_type __a);

	static char_type
	to_char_type(const int_type& __c);

	static int_type
	to_int_type(const char_type& __c);

	static bool
	eq_int_type(const int_type& __c1, const int_type& __c2);

	static int_type
	eof();

	static int_type
	not_eof(const int_type& __c);
	};
	}

	/*
	> how do I check that these conversions are correct?
	Very easy. Since all the characters are from ASCII you simply
	zero-extend the values.

	drepper$ echo 'black pearl jasmine tea' \| od -t x1
	0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
	0000020 69 6e 65 20 74 65 61 0a

	So the UCS-2 string is

	0x0062, 0x006c, 0x0061, ...

	You get the idea. With iconv() you have to take care of the
	byte-order, though. UCS-2 can mean little- or big endian. Looking at
	your result

	> $9 = 25856

	it shows that the other byte-order is used (25856 == 0x6500).
	*/


	void
	initialize_state(std::__enc_traits& state)
	{ state._M_init(); }

	// Partial specialization using __enc_traits.
	// codecvt<unicode_t, char, __enc_traits>
	// UNICODE - UCS2 (big endian)
	void test01()
	{
	using namespace std;
	typedef codecvt_base::result result;
	typedef unicode_t int_type;
	typedef char ext_type;
	typedef __enc_traits enc_type;
	typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
	typedef char_traits<int_type> int_traits;
	typedef char_traits<ext_type> ext_traits;

	bool test __attribute__((unused)) = true;
	const ext_type* e_lit = "black pearl jasmine tea";
	int size = strlen(e_lit);

	char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
	{
	0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
	0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
	0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
	0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
	};
	const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);

	const ext_type* efrom_next;
	const int_type* ifrom_next;
	ext_type* e_arr = new ext_type[size + 1];
	ext_type* eto_next;
	int_type* i_arr = new int_type[size + 1];
	int_type* ito_next;

	// construct a locale object with the specialized facet.
	locale loc(locale::classic(), new unicode_codecvt);
	// sanity check the constructed locale has the specialized facet.
	VERIFY( has_facet<unicode_codecvt>(loc) );
	const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);

	// in
	// unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
	unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0, 0);
	initialize_state(state01);
	// internal encoding is bigger because of bom
	result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
	i_arr, i_arr + size + 1, ito_next);
	VERIFY( r1 == codecvt_base::ok );
	VERIFY( !int_traits::compare(i_arr, i_lit, size) );
	VERIFY( efrom_next == e_lit + size );
	VERIFY( ito_next == i_arr + size );

	// out
	unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0, 0);
	initialize_state(state02);
	result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
	e_arr, e_arr + size, eto_next);
	VERIFY( r2 == codecvt_base::ok );
	VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
	VERIFY( ifrom_next == i_lit + size );
	VERIFY( eto_next == e_arr + size );

	// unshift
	ext_traits::copy(e_arr, e_lit, size);
	unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0, 0);
	initialize_state(state03);
	result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
	VERIFY( r3 == codecvt_base::noconv );
	VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
	VERIFY( eto_next == e_arr );

	int i = cvt.encoding();
	VERIFY( i == 2 ); // Target-dependent.

	VERIFY( !cvt.always_noconv() );

	unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0, 0);
	initialize_state(state04);
	int j = cvt.length(state03, e_lit, e_lit + size, 5);
	VERIFY( j == 5 );

	int k = cvt.max_length();
	VERIFY( k == 1 );

	delete [] e_arr;
	delete [] i_arr;
	}

	// Partial specialization using __enc_traits.
	// codecvt<unicode_t, char, __enc_traits>
	// UNICODE - UCS2 (little endian)
	void test02()
	{
	using namespace std;
	typedef codecvt_base::result result;
	typedef unsigned short unicode_t;
	typedef unicode_t int_type;
	typedef char ext_type;
	typedef __enc_traits enc_type;
	typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
	typedef char_traits<int_type> int_traits;
	typedef char_traits<ext_type> ext_traits;

	bool test __attribute__((unused)) = true;
	const ext_type* e_lit = "black pearl jasmine tea";
	int size = strlen(e_lit);

	char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
	{
	0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
	0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
	0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
	0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
	};
	const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);

	const ext_type* efrom_next;
	const int_type* ifrom_next;
	ext_type* e_arr = new ext_type[size + 1];
	ext_type* eto_next;
	int_type* i_arr = new int_type[size + 1];
	int_type* ito_next;

	// construct a locale object with the specialized facet.
	locale loc(locale::classic(), new unicode_codecvt);
	// sanity check the constructed locale has the specialized facet.
	VERIFY( has_facet<unicode_codecvt>(loc) );
	const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);

	// in
	unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0, 0);
	initialize_state(state01);
	// internal encoding is bigger because of bom
	result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
	i_arr, i_arr + size + 1, ito_next);
	VERIFY( r1 == codecvt_base::ok );
	VERIFY( !int_traits::compare(i_arr, i_lit, size) );
	VERIFY( efrom_next == e_lit + size );
	VERIFY( ito_next == i_arr + size );

	// out
	unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0, 0);
	initialize_state(state02);
	result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
	e_arr, e_arr + size, eto_next);
	VERIFY( r2 == codecvt_base::ok );
	VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
	VERIFY( ifrom_next == i_lit + size );
	VERIFY( eto_next == e_arr + size );

	// unshift
	ext_traits::copy(e_arr, e_lit, size);
	unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0, 0);
	initialize_state(state03);
	result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
	VERIFY( r3 == codecvt_base::noconv );
	VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
	VERIFY( eto_next == e_arr );

	int i = cvt.encoding();
	VERIFY( i == 2 ); // Target-dependent.

	VERIFY( !cvt.always_noconv() );

	unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0, 0);
	initialize_state(state04);
	int j = cvt.length(state03, e_lit, e_lit + size, 5);
	VERIFY( j == 5 );

	int k = cvt.max_length();
	VERIFY( k == 1 );

	delete [] e_arr;
	delete [] i_arr;
	}

	#endif // _GLIBCXX_USE___ENC_TRAITS

	int main ()
	{
	#if _GLIBCXX_USE___ENC_TRAITS
	test01();
	test02();
	#endif
	return 0;
	}