| // natIconv.cc -- Java side of iconv() reader. |
| |
| /* Copyright (C) 2000, 2001, 2003 Free Software Foundation |
| |
| This file is part of libgcj. |
| |
| This software is copyrighted work licensed under the terms of the |
| Libgcj License. Please consult the file "LIBGCJ_LICENSE" for |
| details. */ |
| |
| /* Author: Tom Tromey <tromey@redhat.com>. */ |
| |
| #include <config.h> |
| |
| #include <gcj/cni.h> |
| #include <jvm.h> |
| |
| #include <gnu/gcj/convert/Input_iconv.h> |
| #include <gnu/gcj/convert/Output_iconv.h> |
| #include <java/io/CharConversionException.h> |
| #include <java/io/UnsupportedEncodingException.h> |
| |
| #include <errno.h> |
| |
| #ifdef HAVE_ICONV |
| #include <iconv.h> |
| |
| template<typename T> |
| static inline size_t |
| iconv_adapter (size_t (*iconv_f) (iconv_t, T, size_t *, char **, size_t *), |
| iconv_t handle, char **inbuf, size_t *inavail, |
| char **outbuf, size_t *outavail) |
| { |
| return (*iconv_f) (handle, (T) inbuf, inavail, outbuf, outavail); |
| } |
| |
| #endif |
| |
| void |
| gnu::gcj::convert::Input_iconv::init (jstring encoding) |
| { |
| #ifdef HAVE_ICONV |
| jsize len = _Jv_GetStringUTFLength (encoding); |
| char buffer[len + 1]; |
| _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer); |
| buffer[len] = '\0'; |
| |
| iconv_t h = iconv_open ("UCS-2", buffer); |
| if (h == (iconv_t) -1) |
| throw new java::io::UnsupportedEncodingException (encoding); |
| |
| JvAssert (h != NULL); |
| handle = reinterpret_cast<gnu::gcj::RawData *> (h); |
| #else /* HAVE_ICONV */ |
| // If no iconv, just throw an exception. |
| throw new java::io::UnsupportedEncodingException (encoding); |
| #endif /* HAVE_ICONV */ |
| } |
| |
| void |
| gnu::gcj::convert::Input_iconv::finalize (void) |
| { |
| #ifdef HAVE_ICONV |
| if (handle != NULL) |
| { |
| iconv_close ((iconv_t) handle); |
| handle = NULL; |
| } |
| #endif /* HAVE_ICONV */ |
| } |
| |
| jint |
| gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer, |
| jint outpos, jint count) |
| { |
| #ifdef HAVE_ICONV |
| jbyte *bytes = elements (inbuffer); |
| jchar *out = elements (outbuffer); |
| size_t inavail = inlength - inpos; |
| size_t old_in = inavail; |
| size_t outavail = count * sizeof (jchar); |
| size_t old_out = outavail; |
| |
| char *inbuf = (char *) &bytes[inpos]; |
| char *outbuf = (char *) &out[outpos]; |
| |
| size_t r = iconv_adapter (iconv, (iconv_t) handle, |
| &inbuf, &inavail, |
| &outbuf, &outavail); |
| |
| if (r == (size_t) -1) |
| { |
| // If we see EINVAL then there is an incomplete sequence at the |
| // end of the input buffer. If we see E2BIG then we ran out of |
| // space in the output buffer. However, in both these cases |
| // some conversion might have taken place. So we fall through |
| // to the normal case. |
| if (errno != EINVAL && errno != E2BIG) |
| throw new java::io::CharConversionException (); |
| } |
| |
| if (iconv_byte_swap) |
| { |
| size_t max = (old_out - outavail) / sizeof (jchar); |
| for (size_t i = 0; i < max; ++i) |
| { |
| // Byte swap. |
| jchar c = (((out[outpos + i] & 0xff) << 8) |
| | ((out[outpos + i] >> 8) & 0xff)); |
| outbuf[i] = c; |
| } |
| } |
| |
| inpos += old_in - inavail; |
| return (old_out - outavail) / sizeof (jchar); |
| #else /* HAVE_ICONV */ |
| return -1; |
| #endif /* HAVE_ICONV */ |
| } |
| |
| void |
| gnu::gcj::convert::Input_iconv::done () |
| { |
| #ifdef HAVE_ICONV |
| // 50 bytes should be enough for any reset sequence. |
| size_t avail = 50; |
| char tmp[avail]; |
| char *p = tmp; |
| // Calling iconv() with a NULL INBUF pointer will cause iconv() to |
| // switch to its initial state. We don't care about the output that |
| // might be generated in that situation. |
| iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail); |
| BytesToUnicode::done (); |
| #else /* HAVE_ICONV */ |
| // If no iconv, do nothing |
| #endif /* HAVE_ICONV */ |
| } |
| |
| void |
| gnu::gcj::convert::Output_iconv::init (jstring encoding) |
| { |
| #ifdef HAVE_ICONV |
| jsize len = _Jv_GetStringUTFLength (encoding); |
| char buffer[len + 1]; |
| _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer); |
| buffer[len] = '\0'; |
| |
| iconv_t h = iconv_open (buffer, "UCS-2"); |
| if (h == (iconv_t) -1) |
| throw new java::io::UnsupportedEncodingException (encoding); |
| |
| JvAssert (h != NULL); |
| handle = reinterpret_cast<gnu::gcj::RawData *> (h); |
| #else /* HAVE_ICONV */ |
| // If no iconv, just throw an exception. |
| throw new java::io::UnsupportedEncodingException (encoding); |
| #endif /* HAVE_ICONV */ |
| } |
| |
| void |
| gnu::gcj::convert::Output_iconv::finalize (void) |
| { |
| #ifdef HAVE_ICONV |
| if (handle != NULL) |
| { |
| iconv_close ((iconv_t) handle); |
| handle = NULL; |
| } |
| #endif /* HAVE_ICONV */ |
| } |
| |
| jint |
| gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer, |
| jint inpos, jint inlength) |
| { |
| #ifdef HAVE_ICONV |
| jchar *chars = elements (inbuffer); |
| jbyte *out = elements (buf); |
| jchar *temp_buffer = NULL; |
| |
| size_t inavail = inlength * sizeof (jchar); |
| size_t old_in = inavail; |
| |
| size_t outavail = buf->length - count; |
| size_t old_out = outavail; |
| |
| char *inbuf = (char *) &chars[inpos]; |
| char *outbuf = (char *) &out[count]; |
| |
| if (iconv_byte_swap) |
| { |
| // Ugly performance penalty -- don't use losing systems! |
| temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar)); |
| for (int i = 0; i < inlength; ++i) |
| { |
| // Byte swap. |
| jchar c = (((chars[inpos + i] & 0xff) << 8) |
| | ((chars[inpos + i] >> 8) & 0xff)); |
| temp_buffer[i] = c; |
| } |
| inbuf = (char *) temp_buffer; |
| } |
| |
| size_t loop_old_in = old_in; |
| while (1) |
| { |
| size_t r = iconv_adapter (iconv, (iconv_t) handle, |
| &inbuf, &inavail, |
| &outbuf, &outavail); |
| if (r == (size_t) -1) |
| { |
| if (errno == EINVAL) |
| { |
| // Incomplete byte sequence at the end of the input |
| // buffer. This shouldn't be able to happen here. |
| break; |
| } |
| else if (errno == E2BIG) |
| { |
| // Output buffer is too small. |
| break; |
| } |
| else if (errno == EILSEQ || inavail == loop_old_in) |
| { |
| // Untranslatable sequence. Since glibc 2.1.3 doesn't |
| // properly set errno, we also assume that this is what |
| // is happening if no conversions took place. (This can |
| // be a bogus assumption if in fact the output buffer is |
| // too small.) We skip the first character and try |
| // again. |
| inavail -= 2; |
| if (inavail == 0) |
| break; |
| loop_old_in -= 2; |
| inbuf += 2; |
| } |
| } |
| else |
| break; |
| } |
| |
| if (temp_buffer != NULL) |
| _Jv_Free (temp_buffer); |
| |
| count += old_out - outavail; |
| return (old_in - inavail) / sizeof (jchar); |
| #else /* HAVE_ICONV */ |
| return -1; |
| #endif /* HAVE_ICONV */ |
| } |
| |
| jboolean |
| gnu::gcj::convert::IOConverter::iconv_init (void) |
| { |
| // Some versions of iconv() always return their UCS-2 results in |
| // big-endian order, and they also require UCS-2 inputs to be in |
| // big-endian order. For instance, glibc 2.1.3 does this. If the |
| // UTF-8=>UCS-2 iconv converter has this feature, then we assume |
| // that all UCS-2 converters do. (This might not be the best |
| // heuristic, but is is all we've got.) |
| jboolean result = false; |
| #ifdef HAVE_ICONV |
| iconv_t handle = iconv_open ("UCS-2", "UTF-8"); |
| if (handle != (iconv_t) -1) |
| { |
| jchar c; |
| unsigned char in[3]; |
| char *inp, *outp; |
| size_t inc, outc, r; |
| |
| // This is the UTF-8 encoding of \ufeff. |
| in[0] = 0xef; |
| in[1] = 0xbb; |
| in[2] = 0xbf; |
| |
| inp = (char *) in; |
| inc = 3; |
| outp = (char *) &c; |
| outc = 2; |
| |
| r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc); |
| // Conversion must be complete for us to use the result. |
| if (r != (size_t) -1 && inc == 0 && outc == 0) |
| result = (c != 0xfeff); |
| |
| // Release iconv handle. |
| iconv_close (handle); |
| } |
| #endif /* HAVE_ICONV */ |
| return result; |
| } |
| |
| void |
| gnu::gcj::convert::Output_iconv::done () |
| { |
| #ifdef HAVE_ICONV |
| // 50 bytes should be enough for any reset sequence. |
| size_t avail = 50; |
| char tmp[avail]; |
| char *p = tmp; |
| // Calling iconv() with a NULL INBUF pointer will cause iconv() to |
| // switch to its initial state. We don't care about the output that |
| // might be generated in that situation. |
| iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail); |
| UnicodeToBytes::done (); |
| #else /* HAVE_ICONV */ |
| // If no iconv, do nothing |
| #endif /* HAVE_ICONV */ |
| } |