From 10bfcb57b7b44686457ae274bcf661b6a97452b2 Mon Sep 17 00:00:00 2001 From: Jonas Kulla Date: Wed, 20 Aug 2014 05:20:07 +0200 Subject: [PATCH] Config: Convert game title to valid UTF-8 (if it isn't already) Removes the need to manually convert the Game.ini to UTF-8 every time with eg. Japanese games. Also, setting the window title on OSX with invalid UTF-8 crashes. This functionality and the dependency on libiconv and libguess are optional and can be enabled with `CONFIG+=INI_ENCODING`. If turned off and invalid UTF-8 is encountered, the game title is treated as being empty (ie. the folder name is used instead). --- README.md | 4 ++ mkxp.conf.sample | 9 +++ mkxp.pro | 8 +++ src/config.cpp | 149 +++++++++++++++++++++++++++++++++++++++++++++++ src/config.h | 1 + 5 files changed, 171 insertions(+) diff --git a/README.md b/README.md index 975f879..fa24768 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ This binding only exists for testing purposes and does nothing (the engine quits * fluidsynth (if midi enabled) * zlib (only ruby bindings) * OpenGL header (alternatively GLES2 with `DEFINES+=GLES2_HEADER`) +* libiconv (on Windows, optional with INI_ENCODING) +* libguess (optional with INI_ENCODING) mkxp employs Qt's qmake build system, so you'll need to install that beforehand. Alternatively, you can build with cmake (FIXME: add cmake instructions). @@ -73,6 +75,8 @@ Midi support is enabled by default; you can disable it via `qmake CONFIG+=DISABL By default, mkxp switches into the directory where its binary is contained and then starts reading the configuration and resolving relative paths. In case this is undesired (eg. when the binary is to be installed to a system global, read-only location), it can be turned off by adding `DEFINES+=WORKDIR_CURRENT` to qmake's arguments. +To auto detect the encoding of the game title in `Game.ini` and auto convert it to UTF-8, build with `CONFIG+=INI_ENCODING`. Requires iconv implementation and libguess. If the encoding is wrongly detected, you can set the "titleLanguage" hint in mkxp.conf. + **RGSS Version**: Add `RGSS_VER=`, with `` being either 1, 2 or 3, to the parameter list to select the desired version. The default is 1. **MRI-Binding**: pkg-config will look for `ruby-2.1.pc`, but you can modify mkxp.pro to use 2.0 instead. This is the default binding, so no arguments to qmake needed (`BINDING=MRI` to be explicit). diff --git a/mkxp.conf.sample b/mkxp.conf.sample index 86c33a9..602f551 100644 --- a/mkxp.conf.sample +++ b/mkxp.conf.sample @@ -163,3 +163,12 @@ # Activate "reverb" effect for midi playback # # midi.reverb=false + + +# Give a hint on which language the game title as +# specified in the Game.ini is, useful if the encoding +# is being falsely detected. Relevant only if mkxp was +# built with automatic encoding conversion (INI_ENCODING). +# (default: none) +# +# titleLanguage=japanese diff --git a/mkxp.pro b/mkxp.pro index 1e42f8e..211b9db 100644 --- a/mkxp.pro +++ b/mkxp.pro @@ -88,6 +88,10 @@ unix { PKGCONFIG += fluidsynth } + INI_ENCODING { + PKGCONFIG += libguess + } + # Deal with boost paths... isEmpty(BOOST_I) { BOOST_I = $$(BOOST_I) @@ -248,6 +252,10 @@ MIDI { DEFINES += MIDI } +INI_ENCODING { + DEFINES += INI_ENCODING +} + defineReplace(xxdOutput) { return($$basename(1).xxd) } diff --git a/src/config.cpp b/src/config.cpp index 8eb29cd..c11a593 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -26,10 +26,98 @@ #include #include +#include #include "debugwriter.h" #include "util.h" +#ifdef INI_ENCODING +extern "C" { +#include +} +#include +#include +#endif + +/* http://stackoverflow.com/a/1031773 */ +static bool validUtf8(const char *string) +{ + const uint8_t *bytes = (uint8_t*) string; + + while(*bytes) + { + if( (/* ASCII + * use bytes[0] <= 0x7F to allow ASCII control characters */ + bytes[0] == 0x09 || + bytes[0] == 0x0A || + bytes[0] == 0x0D || + (0x20 <= bytes[0] && bytes[0] <= 0x7E) + ) + ) { + bytes += 1; + continue; + } + + if( (/* non-overlong 2-byte */ + (0xC2 <= bytes[0] && bytes[0] <= 0xDF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) + ) + ) { + bytes += 2; + continue; + } + + if( (/* excluding overlongs */ + bytes[0] == 0xE0 && + (0xA0 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (/* straight 3-byte */ + ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) || + bytes[0] == 0xEE || + bytes[0] == 0xEF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (/* excluding surrogates */ + bytes[0] == 0xED && + (0x80 <= bytes[1] && bytes[1] <= 0x9F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) + ) { + bytes += 3; + continue; + } + + if( (/* planes 1-3 */ + bytes[0] == 0xF0 && + (0x90 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (/* planes 4-15 */ + (0xF1 <= bytes[0] && bytes[0] <= 0xF3) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (/* plane 16 */ + bytes[0] == 0xF4 && + (0x80 <= bytes[1] && bytes[1] <= 0x8F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) + ) { + bytes += 4; + continue; + } + + return false; + } + + return true; +} + typedef std::vector StringVec; namespace po = boost::program_options; @@ -73,6 +161,7 @@ void Config::read(int argc, char *argv[]) PO_DESC(anyAltToggleFS, bool) \ PO_DESC(allowSymlinks, bool) \ PO_DESC(iconPath, std::string) \ + PO_DESC(titleLanguage, std::string) \ PO_DESC(midi.soundFont, std::string) \ PO_DESC(midi.chorus, bool) \ PO_DESC(midi.reverb, bool) \ @@ -171,6 +260,66 @@ void Config::readGameINI() strReplace(game.scripts, '\\', '/'); +#ifdef INI_ENCODING + /* Can add more later */ + const char *languages[] = + { + titleLanguage.c_str(), + GUESS_REGION_JP, /* Japanese */ + GUESS_REGION_KR, /* Korean */ + GUESS_REGION_CN, /* Chinese */ + 0 + }; + + bool convSuccess = true; + + /* Verify that the game title is UTF-8, and if not, + * try to determine the encoding and convert to UTF-8 */ + if (!validUtf8(game.title.c_str())) + { + const char *encoding = 0; + convSuccess = false; + + for (size_t i = 0; languages[i]; ++i) + { + encoding = libguess_determine_encoding(game.title.c_str(), + game.title.size(), + languages[i]); + if (encoding) + break; + } + + if (encoding) + { + iconv_t cd = iconv_open("UTF-8", encoding); + + size_t inLen = game.title.size(); + size_t outLen = inLen * 4; + std::string buf(outLen, '\0'); + char *inPtr = const_cast(game.title.c_str()); + char *outPtr = const_cast(buf.c_str()); + + errno = 0; + size_t result = iconv(cd, &inPtr, &inLen, &outPtr, &outLen); + + iconv_close(cd); + + if (result != (size_t) -1 && errno == 0) + { + buf.resize(buf.size()-outLen); + game.title = buf; + convSuccess = true; + } + } + } + + if (!convSuccess) + game.title.clear(); +#else + if (!validUtf8(game.title.c_str())) + game.title.clear(); +#endif + if (game.title.empty()) game.title = baseName(gameFolder); } diff --git a/src/config.h b/src/config.h index 8427abb..498a052 100644 --- a/src/config.h +++ b/src/config.h @@ -57,6 +57,7 @@ struct Config bool pathCache; std::string iconPath; + std::string titleLanguage; struct {