--- src/tag/TagId3.cxx 24 Apr 2015 16:28:05 -0000 1.1 +++ src/tag/TagId3.cxx 26 Apr 2015 12:42:02 -0000 1.2 @@ -28,16 +28,17 @@ #include "util/Domain.hxx" #include "Log.hxx" #include "config/ConfigGlobal.hxx" #include "Riff.hxx" #include "Aiff.hxx" #include "fs/Path.hxx" #include "fs/FileSystem.hxx" +#undef HAVE_GLIB #ifdef HAVE_GLIB #include #endif #include #include @@ -67,28 +68,150 @@ static constexpr Domain id3_domain("id3"); static inline bool tag_is_id3v1(struct id3_tag *tag) { return (id3_tag_options(tag, 0, 0) & ID3_TAG_OPTION_ID3V1) != 0; } +#define USE_CP932 + +#ifdef USE_CP932 + +#include + +#define UNKNOWN_STR "UNKNOWN" +#define MAX_ICONV_BUF 1024 + +typedef enum { + ICONV_OK, + ICONV_TRYNEXT, + ICONV_FATAL +} iconv_result; + +static id3_length_t id3_ucs4_length(id3_ucs4_t const *ucs4) +{ + id3_ucs4_t const *ptr = ucs4; + + while (*ptr) + ++ptr; + + return ptr - ucs4; +} + +static iconv_result do_convert(const char* to_ces, const char* from_ces, + char *inbuf, size_t inbytesleft, + char *outbuf_orig, size_t outbytesleft_orig) { + size_t rc; + iconv_result ret = ICONV_OK; + + size_t outbytesleft = outbytesleft_orig - 1; + char* outbuf = outbuf_orig; + + iconv_t cd = iconv_open(to_ces, from_ces); + if (cd == (iconv_t)-1) { + return ICONV_FATAL; + } + rc = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + if (rc == (size_t)-1) { + if (errno == E2BIG) { + ret = ICONV_FATAL; + } else { + ret = ICONV_TRYNEXT; + memset(outbuf_orig, '\0', outbytesleft_orig); + } + } + iconv_close(cd); + + return ret; +} + +static unsigned char *get_utf8_text(const id3_ucs4_t* native_text) { + unsigned char *utf8_text = NULL; + char *in, *in8, *iconv_buf; + iconv_result rc; + + in8 = (char*)id3_ucs4_utf8duplicate(native_text); + if (!in8) { + goto out; + } + + { + int ucs4_len = id3_ucs4_length(native_text); + int i; + for(i = 0 ; i < ucs4_len ; i++){ + if( native_text[i] > 0xff){ + return (unsigned char *)in8; + } + } + } + + in = (char*)id3_ucs4_latin1duplicate(native_text); + if (!in) { + free(in8); + goto out; + } + + iconv_buf = (char*)calloc(MAX_ICONV_BUF, sizeof(char)); + if (!iconv_buf) { + free(in); free(in8); + goto out; + } + + /* (1) try utf8 -> cp932 */ + rc = do_convert("CP932", "UTF-8", in8, strlen(in8), iconv_buf, MAX_ICONV_BUF); + if (rc == ICONV_OK) { + utf8_text = (unsigned char *)in8; + free(iconv_buf); + } else if (rc == ICONV_TRYNEXT) { + /* (2) try cp932 -> utf8 */ + rc = do_convert("UTF-8", "CP932", in, strlen(in), iconv_buf, MAX_ICONV_BUF); + if (rc == ICONV_OK) { + utf8_text = (unsigned char *)iconv_buf; + } else if (rc == ICONV_TRYNEXT) { + /* (3) try euc-jp -> utf8 */ + rc = do_convert("UTF-8", "EUC-JP", in, strlen(in), iconv_buf, MAX_ICONV_BUF); + if (rc == ICONV_OK) { + utf8_text = (unsigned char *)iconv_buf; + } else if (rc == ICONV_TRYNEXT) { + /* utf-8 including non-japanese char? fallback. */ + utf8_text = (unsigned char *)id3_ucs4_utf8duplicate(native_text); + free(iconv_buf); + } + } + free(in8); + } + free(in); + +out: + if(!utf8_text) { + utf8_text = (unsigned char *)strdup(UNKNOWN_STR); + } + + return utf8_text; +} +#endif + static id3_utf8_t * tag_id3_getstring(const struct id3_frame *frame, unsigned i) { id3_field *field = id3_frame_field(frame, i); if (field == nullptr) return nullptr; const id3_ucs4_t *ucs4 = id3_field_getstring(field); if (ucs4 == nullptr) return nullptr; +#ifndef USE_CP932 + return (id3_utf8_t *)get_utf8_text(ucs4); +#else return id3_ucs4_utf8duplicate(ucs4); +#endif } /* This will try to convert a string to utf-8, */ static id3_utf8_t * import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4) { id3_utf8_t *utf8; @@ -114,17 +237,22 @@ import_id3_string(bool is_id3v1, const i free(isostr); return nullptr; } free(isostr); } else { #else (void)is_id3v1; #endif + +#ifdef USE_CP932 + utf8 = (id3_utf8_t *)get_utf8_text(ucs4); +#else utf8 = id3_ucs4_utf8duplicate(ucs4); +#endif if (gcc_unlikely(utf8 == nullptr)) return nullptr; #ifdef HAVE_GLIB } #endif id3_utf8_t *utf8_stripped = (id3_utf8_t *) xstrdup(Strip((char *)utf8));