From 27341dc95dae3425c98ac27cbd932eba393b9164 Mon Sep 17 00:00:00 2001 From: Jonas Kulla Date: Sun, 20 Oct 2013 15:51:24 +0200 Subject: [PATCH] MRI-Binding: Enforce UTF-8 strings in 'Marshal::load()' via aux proc We override 'Marshal::load()' via alias and call it with a custom auxiliary proc in the alias which sets the encoding of all demarshalled strings to UTF-8. The case where a user himself provided a proc to 'load()' is not implemented. This is definitely a better solution than patching the ruby source tree. Thanks to github.com/cremno for hints and help! --- binding-mri/filesystem-binding.cpp | 38 ++++++++++++++++++++++++++++++ patches/ruby/marshal_utf8.patch | 15 ------------ 2 files changed, 38 insertions(+), 15 deletions(-) delete mode 100644 patches/ruby/marshal_utf8.patch diff --git a/binding-mri/filesystem-binding.cpp b/binding-mri/filesystem-binding.cpp index ec4a84c..8e7a481 100644 --- a/binding-mri/filesystem-binding.cpp +++ b/binding-mri/filesystem-binding.cpp @@ -24,6 +24,9 @@ #include "sharedstate.h" #include "filesystem.h" +#include "ruby/encoding.h" +#include "ruby/intern.h" + #include DEF_TYPE(FileInt); @@ -154,6 +157,34 @@ RB_METHOD(kernelSaveData) return Qnil; } +static VALUE stringForceUTF8(VALUE arg) +{ + if (rb_type(arg) != RUBY_T_STRING) + return arg; + + rb_enc_associate(arg, rb_utf8_encoding()); + + return arg; +} + +RB_METHOD(_marshalLoad) +{ + RB_UNUSED_PARAM; + + VALUE port, proc = Qnil; + + rb_get_args(argc, argv, "o|o", &port, &proc, RB_ARG_END); + + if (rb_type(proc) != RUBY_T_NIL) + rb_raise(rb_eNotImpError, "MKXP: Marshal with custom proc not (yet) implemented"); + + VALUE utf8Proc = rb_proc_new(RUBY_METHOD_FUNC(stringForceUTF8), Qnil); + + VALUE marsh = rb_const_get(rb_cObject, rb_intern("Marshal")); + + return rb_funcall(marsh, rb_intern("_mkxp_load_alias"), 2, port, utf8Proc); +} + void fileIntBindingInit() { @@ -171,4 +202,11 @@ fileIntBindingInit() _rb_define_module_function(rb_mKernel, "load_data", kernelLoadData); _rb_define_module_function(rb_mKernel, "save_data", kernelSaveData); + + /* We overload the built-in 'Marshal::load()' function to silently + * insert our utf8proc that ensures all read strings will be + * UTF-8 encoded */ + VALUE marsh = rb_const_get(rb_cObject, rb_intern("Marshal")); + rb_define_alias(rb_singleton_class(marsh), "_mkxp_load_alias", "load"); + _rb_define_module_function(marsh, "load", _marshalLoad); } diff --git a/patches/ruby/marshal_utf8.patch b/patches/ruby/marshal_utf8.patch deleted file mode 100644 index 64e58f0..0000000 --- a/patches/ruby/marshal_utf8.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/marshal.c b/marshal.c -index 4cba05d..dfce6ee 100644 ---- a/marshal.c -+++ b/marshal.c -@@ -1312,7 +1312,9 @@ r_unique(struct load_arg *arg) - static VALUE - r_string(struct load_arg *arg) - { -- return r_bytes(arg); -+ VALUE str = r_bytes(arg); -+ rb_enc_associate(str, rb_utf8_encoding()); -+ return str; - } - - static VALUE