Config: Convert game title to valid UTF-8 (if it isn't already)

Removes the need to manually convert the Game.ini to UTF-8 every
time with eg. Japanese games. Also, setting the window title on
OSX with invalid UTF-8 crashes.

This functionality and the dependency on libiconv and libguess
are optional and can be enabled with `CONFIG+=INI_ENCODING`.
If turned off and invalid UTF-8 is encountered, the game title
is treated as being empty (ie. the folder name is used instead).
This commit is contained in:
Jonas Kulla 2014-08-20 05:20:07 +02:00
parent 622845a404
commit 10bfcb57b7
5 changed files with 171 additions and 0 deletions

View File

@ -62,6 +62,8 @@ This binding only exists for testing purposes and does nothing (the engine quits
* fluidsynth (if midi enabled) * fluidsynth (if midi enabled)
* zlib (only ruby bindings) * zlib (only ruby bindings)
* OpenGL header (alternatively GLES2 with `DEFINES+=GLES2_HEADER`) * OpenGL header (alternatively GLES2 with `DEFINES+=GLES2_HEADER`)
* libiconv (on Windows, optional with INI_ENCODING)
* libguess (optional with INI_ENCODING)
mkxp employs Qt's qmake build system, so you'll need to install that beforehand. Alternatively, you can build with cmake (FIXME: add cmake instructions). mkxp employs Qt's qmake build system, so you'll need to install that beforehand. Alternatively, you can build with cmake (FIXME: add cmake instructions).
@ -73,6 +75,8 @@ Midi support is enabled by default; you can disable it via `qmake CONFIG+=DISABL
By default, mkxp switches into the directory where its binary is contained and then starts reading the configuration and resolving relative paths. In case this is undesired (eg. when the binary is to be installed to a system global, read-only location), it can be turned off by adding `DEFINES+=WORKDIR_CURRENT` to qmake's arguments. By default, mkxp switches into the directory where its binary is contained and then starts reading the configuration and resolving relative paths. In case this is undesired (eg. when the binary is to be installed to a system global, read-only location), it can be turned off by adding `DEFINES+=WORKDIR_CURRENT` to qmake's arguments.
To auto detect the encoding of the game title in `Game.ini` and auto convert it to UTF-8, build with `CONFIG+=INI_ENCODING`. Requires iconv implementation and libguess. If the encoding is wrongly detected, you can set the "titleLanguage" hint in mkxp.conf.
**RGSS Version**: Add `RGSS_VER=<version>`, with `<version>` being either 1, 2 or 3, to the parameter list to select the desired version. The default is 1. **RGSS Version**: Add `RGSS_VER=<version>`, with `<version>` being either 1, 2 or 3, to the parameter list to select the desired version. The default is 1.
**MRI-Binding**: pkg-config will look for `ruby-2.1.pc`, but you can modify mkxp.pro to use 2.0 instead. This is the default binding, so no arguments to qmake needed (`BINDING=MRI` to be explicit). **MRI-Binding**: pkg-config will look for `ruby-2.1.pc`, but you can modify mkxp.pro to use 2.0 instead. This is the default binding, so no arguments to qmake needed (`BINDING=MRI` to be explicit).

View File

@ -163,3 +163,12 @@
# Activate "reverb" effect for midi playback # Activate "reverb" effect for midi playback
# #
# midi.reverb=false # midi.reverb=false
# Give a hint on which language the game title as
# specified in the Game.ini is, useful if the encoding
# is being falsely detected. Relevant only if mkxp was
# built with automatic encoding conversion (INI_ENCODING).
# (default: none)
#
# titleLanguage=japanese

View File

@ -88,6 +88,10 @@ unix {
PKGCONFIG += fluidsynth PKGCONFIG += fluidsynth
} }
INI_ENCODING {
PKGCONFIG += libguess
}
# Deal with boost paths... # Deal with boost paths...
isEmpty(BOOST_I) { isEmpty(BOOST_I) {
BOOST_I = $$(BOOST_I) BOOST_I = $$(BOOST_I)
@ -248,6 +252,10 @@ MIDI {
DEFINES += MIDI DEFINES += MIDI
} }
INI_ENCODING {
DEFINES += INI_ENCODING
}
defineReplace(xxdOutput) { defineReplace(xxdOutput) {
return($$basename(1).xxd) return($$basename(1).xxd)
} }

View File

@ -26,10 +26,98 @@
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
#include <fstream> #include <fstream>
#include <stdint.h>
#include "debugwriter.h" #include "debugwriter.h"
#include "util.h" #include "util.h"
#ifdef INI_ENCODING
extern "C" {
#include <libguess.h>
}
#include <iconv.h>
#include <errno.h>
#endif
/* http://stackoverflow.com/a/1031773 */
static bool validUtf8(const char *string)
{
const uint8_t *bytes = (uint8_t*) string;
while(*bytes)
{
if( (/* ASCII
* use bytes[0] <= 0x7F to allow ASCII control characters */
bytes[0] == 0x09 ||
bytes[0] == 0x0A ||
bytes[0] == 0x0D ||
(0x20 <= bytes[0] && bytes[0] <= 0x7E)
)
) {
bytes += 1;
continue;
}
if( (/* non-overlong 2-byte */
(0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF)
)
) {
bytes += 2;
continue;
}
if( (/* excluding overlongs */
bytes[0] == 0xE0 &&
(0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
) ||
(/* straight 3-byte */
((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
bytes[0] == 0xEE ||
bytes[0] == 0xEF) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
) ||
(/* excluding surrogates */
bytes[0] == 0xED &&
(0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
)
) {
bytes += 3;
continue;
}
if( (/* planes 1-3 */
bytes[0] == 0xF0 &&
(0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
) ||
(/* planes 4-15 */
(0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
) ||
(/* plane 16 */
bytes[0] == 0xF4 &&
(0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
)
) {
bytes += 4;
continue;
}
return false;
}
return true;
}
typedef std::vector<std::string> StringVec; typedef std::vector<std::string> StringVec;
namespace po = boost::program_options; namespace po = boost::program_options;
@ -73,6 +161,7 @@ void Config::read(int argc, char *argv[])
PO_DESC(anyAltToggleFS, bool) \ PO_DESC(anyAltToggleFS, bool) \
PO_DESC(allowSymlinks, bool) \ PO_DESC(allowSymlinks, bool) \
PO_DESC(iconPath, std::string) \ PO_DESC(iconPath, std::string) \
PO_DESC(titleLanguage, std::string) \
PO_DESC(midi.soundFont, std::string) \ PO_DESC(midi.soundFont, std::string) \
PO_DESC(midi.chorus, bool) \ PO_DESC(midi.chorus, bool) \
PO_DESC(midi.reverb, bool) \ PO_DESC(midi.reverb, bool) \
@ -171,6 +260,66 @@ void Config::readGameINI()
strReplace(game.scripts, '\\', '/'); strReplace(game.scripts, '\\', '/');
#ifdef INI_ENCODING
/* Can add more later */
const char *languages[] =
{
titleLanguage.c_str(),
GUESS_REGION_JP, /* Japanese */
GUESS_REGION_KR, /* Korean */
GUESS_REGION_CN, /* Chinese */
0
};
bool convSuccess = true;
/* Verify that the game title is UTF-8, and if not,
* try to determine the encoding and convert to UTF-8 */
if (!validUtf8(game.title.c_str()))
{
const char *encoding = 0;
convSuccess = false;
for (size_t i = 0; languages[i]; ++i)
{
encoding = libguess_determine_encoding(game.title.c_str(),
game.title.size(),
languages[i]);
if (encoding)
break;
}
if (encoding)
{
iconv_t cd = iconv_open("UTF-8", encoding);
size_t inLen = game.title.size();
size_t outLen = inLen * 4;
std::string buf(outLen, '\0');
char *inPtr = const_cast<char*>(game.title.c_str());
char *outPtr = const_cast<char*>(buf.c_str());
errno = 0;
size_t result = iconv(cd, &inPtr, &inLen, &outPtr, &outLen);
iconv_close(cd);
if (result != (size_t) -1 && errno == 0)
{
buf.resize(buf.size()-outLen);
game.title = buf;
convSuccess = true;
}
}
}
if (!convSuccess)
game.title.clear();
#else
if (!validUtf8(game.title.c_str()))
game.title.clear();
#endif
if (game.title.empty()) if (game.title.empty())
game.title = baseName(gameFolder); game.title = baseName(gameFolder);
} }

View File

@ -57,6 +57,7 @@ struct Config
bool pathCache; bool pathCache;
std::string iconPath; std::string iconPath;
std::string titleLanguage;
struct struct
{ {