ruby-changes:13687
From: yugui <ko1@a...>
Date: Sun, 25 Oct 2009 23:45:54 +0900 (JST)
Subject: [ruby-changes:13687] Ruby:r25473 (ruby_1_9_1): merges r24443 from trunk into ruby_1_9_1.
yugui 2009-10-25 23:45:39 +0900 (Sun, 25 Oct 2009) New Revision: 25473 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=25473 Log: merges r24443 from trunk into ruby_1_9_1. -- * marshal.c (w_symbol r_symlink, r_symbol, r_object0): fix for non-ascii symbols. loading such symbols can cause segfaults in older versions. [ruby-core:24788] Modified files: branches/ruby_1_9_1/ChangeLog branches/ruby_1_9_1/marshal.c branches/ruby_1_9_1/test/ruby/test_marshal.rb branches/ruby_1_9_1/version.h Index: ruby_1_9_1/ChangeLog =================================================================== --- ruby_1_9_1/ChangeLog (revision 25472) +++ ruby_1_9_1/ChangeLog (revision 25473) @@ -1,3 +1,9 @@ +Sat Aug 8 06:18:29 2009 Nobuyoshi Nakada <nobu@r...> + + * marshal.c (w_symbol r_symlink, r_symbol, r_object0): fix for + non-ascii symbols. loading such symbols can cause segfaults in + older versions. [ruby-core:24788] + Sun Aug 9 07:25:07 2009 Nobuyoshi Nakada <nobu@r...> * ext/ripper/eventids2.c (token_to_eventid): added Index: ruby_1_9_1/version.h =================================================================== --- ruby_1_9_1/version.h (revision 25472) +++ ruby_1_9_1/version.h (revision 25473) @@ -1,5 +1,5 @@ #define RUBY_VERSION "1.9.1" -#define RUBY_PATCHLEVEL 282 +#define RUBY_PATCHLEVEL 283 #define RUBY_VERSION_MAJOR 1 #define RUBY_VERSION_MINOR 9 #define RUBY_VERSION_TEENY 1 Index: ruby_1_9_1/marshal.c =================================================================== --- ruby_1_9_1/marshal.c (revision 25472) +++ ruby_1_9_1/marshal.c (revision 25473) @@ -186,6 +186,7 @@ } static void w_long(long, struct dump_arg*); +static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg); static void w_nbyte(const char *s, int n, struct dump_arg *arg) @@ -377,20 +378,34 @@ static void w_symbol(ID id, struct dump_arg *arg) { - const char *sym; + VALUE sym; st_data_t num; + int encidx = -1; if (st_lookup(arg->symbols, id, &num)) { w_byte(TYPE_SYMLINK, arg); w_long((long)num, arg); } else { - sym = rb_id2name(id); + sym = rb_id2str(id); if (!sym) { rb_raise(rb_eTypeError, "can't dump anonymous ID %ld", id); } + encidx = rb_enc_get_index(sym); + if (encidx == rb_usascii_encindex()) { + encidx = -1; + } + else if (rb_enc_str_coderange(sym) != ENC_CODERANGE_7BIT) { + w_byte(TYPE_IVAR, arg); + } w_byte(TYPE_SYMBOL, arg); - w_bytes(sym, strlen(sym), arg); + w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg); + if (encidx != -1) { + struct dump_call_arg c_arg; + c_arg.limit = 1; + c_arg.arg = arg; + w_encoding(sym, 0, &c_arg); + } st_add_direct(arg->symbols, id, arg->symbols->num_entries); } } @@ -936,6 +951,7 @@ static VALUE r_entry(VALUE v, struct load_arg *arg); static VALUE r_object(struct load_arg *arg); +static ID r_symbol(struct load_arg *arg); static VALUE path2class(const char *path); static int @@ -1041,6 +1057,20 @@ return str; } +static int +id2encidx(ID id, VALUE val) +{ + if (id == rb_id_encoding()) { + return rb_enc_find_index(StringValueCStr(val)); + } + else if (id == rb_intern("E")) { + if (val == Qfalse) return rb_usascii_encindex(); + else if (val == Qtrue) return rb_utf8_encindex(); + /* bogus ignore */ + } + return -1; +} + static ID r_symlink(struct load_arg *arg) { @@ -1054,11 +1084,22 @@ } static ID -r_symreal(struct load_arg *arg) +r_symreal(struct load_arg *arg, int ivar) { volatile VALUE s = r_bytes(arg); - ID id = rb_intern(RSTRING_PTR(s)); + ID id; + int idx = -1; + if (ivar) { + long num = r_long(arg); + while (num-- > 0) { + id = r_symbol(arg); + idx = id2encidx(id, r_object(arg)); + } + } + if (idx < 0) idx = rb_usascii_encindex(); + rb_enc_associate_index(s, idx); + id = rb_intern_str(s); st_insert(arg->symbols, arg->symbols->num_entries, id); return id; @@ -1067,15 +1108,22 @@ static ID r_symbol(struct load_arg *arg) { - int type; + int type, ivar = 0; + again: switch ((type = r_byte(arg))) { + case TYPE_IVAR: + ivar = 1; + goto again; case TYPE_SYMBOL: - return r_symreal(arg); + return r_symreal(arg, ivar); case TYPE_SYMLINK: + if (ivar) { + rb_raise(rb_eArgError, "dump format error (symlink with encoding)"); + } return r_symlink(arg); default: - rb_raise(rb_eArgError, "dump format error(0x%x)", type); + rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type); break; } } @@ -1147,9 +1195,9 @@ while (len--) { ID id = r_symbol(arg); VALUE val = r_object(arg); - if (id == rb_id_encoding()) { - int idx = rb_enc_find_index(StringValueCStr(val)); - if (idx > 0) rb_enc_associate_index(obj, idx); + int idx = id2encidx(id, val); + if (idx >= 0) { + rb_enc_associate_index(obj, idx); } else { rb_ivar_set(obj, id, val); @@ -1577,7 +1625,13 @@ break; case TYPE_SYMBOL: - v = ID2SYM(r_symreal(arg)); + if (ivp) { + v = ID2SYM(r_symreal(arg, *ivp)); + *ivp = Qfalse; + } + else { + v = ID2SYM(r_symreal(arg, 0)); + } v = r_leave(v, arg); break; Index: ruby_1_9_1/test/ruby/test_marshal.rb =================================================================== --- ruby_1_9_1/test/ruby/test_marshal.rb (revision 25472) +++ ruby_1_9_1/test/ruby/test_marshal.rb (revision 25473) @@ -192,4 +192,10 @@ assert_equal(true, y.tainted?) assert_equal(true, y.untrusted?) end + + def test_symbol + [:ruby, :"\u{7d05}\u{7389}"].each do |sym| + assert_equal(sym, Marshal.load(Marshal.dump(sym)), '[ruby-core:24788]') + end + end end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/