ruby-changes:7661
From: nobu <ko1@a...>
Date: Sat, 6 Sep 2008 16:57:55 +0900 (JST)
Subject: [ruby-changes:7661] Ruby:r19181 (mvm): * merged from trunk r19119:19180.
nobu 2008-09-06 16:54:38 +0900 (Sat, 06 Sep 2008) New Revision: 19181 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=19181 Log: * merged from trunk r19119:19180. Added files: branches/mvm/enc/trans/escape.trans Modified files: branches/mvm/.merged-trunk-revision branches/mvm/ChangeLog branches/mvm/bignum.c branches/mvm/complex.c branches/mvm/enc/trans/japanese.trans branches/mvm/enc/trans/newline.trans branches/mvm/ext/bigdecimal/bigdecimal.c branches/mvm/ext/iconv/iconv.c branches/mvm/ext/pty/depend branches/mvm/ext/tk/tcltklib.c branches/mvm/ext/tk/tkutil/tkutil.c branches/mvm/ext/zlib/zlib.c branches/mvm/gc.c branches/mvm/include/ruby/encoding.h branches/mvm/include/ruby/intern.h branches/mvm/include/ruby/io.h branches/mvm/include/ruby/ruby.h branches/mvm/insns.def branches/mvm/io.c branches/mvm/iseq.c branches/mvm/lib/prime.rb branches/mvm/marshal.c branches/mvm/math.c branches/mvm/numeric.c branches/mvm/object.c branches/mvm/pack.c branches/mvm/parse.y branches/mvm/process.c branches/mvm/random.c branches/mvm/string.c branches/mvm/test/fileutils/test_fileutils.rb branches/mvm/test/ruby/test_complex.rb branches/mvm/test/ruby/test_econv.rb branches/mvm/test/ruby/test_io_m17n.rb branches/mvm/test/ruby/test_rational.rb branches/mvm/test/ruby/test_transcode.rb branches/mvm/thread.c branches/mvm/time.c branches/mvm/tool/transcode-tblgen.rb branches/mvm/transcode.c branches/mvm/transcode_data.h branches/mvm/version.h branches/mvm/vm.c branches/mvm/vm_dump.c Index: mvm/complex.c =================================================================== --- mvm/complex.c (revision 19180) +++ mvm/complex.c (revision 19181) @@ -168,6 +168,9 @@ fun1(numerator) fun1(polar) fun1(scalar_p) + +#define f_real_p f_scalar_p + fun1(to_f) fun1(to_i) fun1(to_r) @@ -324,7 +327,7 @@ case T_RATIONAL: break; default: - if (!k_numeric_p(num) || !f_scalar_p(num)) + if (!k_numeric_p(num) || !f_real_p(num)) rb_raise(rb_eArgError, "not a real"); } } @@ -341,16 +344,16 @@ if (f_zero_p(image) && f_unify_p(klass)) return real; #endif - else if (f_scalar_p(real) && f_scalar_p(image)) + else if (f_real_p(real) && f_real_p(image)) return nucomp_s_new_internal(klass, real, image); - else if (f_scalar_p(real)) { + else if (f_real_p(real)) { get_dat1(image); return nucomp_s_new_internal(klass, f_sub(real, dat->image), f_add(ZERO, dat->real)); } - else if (f_scalar_p(image)) { + else if (f_real_p(image)) { get_dat1(real); return nucomp_s_new_internal(klass, @@ -453,7 +456,7 @@ { get_dat1(x); - if (f_scalar_p(x)) + if (f_real_p(x)) return m_cos_bang(x); return f_complex_new2(rb_cComplex, f_mul(m_cos_bang(dat->real), @@ -467,7 +470,7 @@ { get_dat1(x); - if (f_scalar_p(x)) + if (f_real_p(x)) return m_sin_bang(x); return f_complex_new2(rb_cComplex, f_mul(m_sin_bang(dat->real), @@ -479,7 +482,7 @@ static VALUE m_sqrt(VALUE x) { - if (f_scalar_p(x)) { + if (f_real_p(x)) { if (!f_negative_p(x)) return m_sqrt_bang(x); return f_complex_new2(rb_cComplex, ZERO, m_sqrt_bang(f_negate(x))); @@ -541,7 +544,7 @@ return f_complex_new2(CLASS_OF(self), real, image); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { get_dat1(self); return f_complex_new2(CLASS_OF(self), @@ -563,7 +566,7 @@ return f_complex_new2(CLASS_OF(self), real, image); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { get_dat1(self); return f_complex_new2(CLASS_OF(self), @@ -587,7 +590,7 @@ return f_complex_new2(CLASS_OF(self), real, image); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { get_dat1(self); return f_complex_new2(CLASS_OF(self), @@ -617,7 +620,7 @@ } return f_div(f_mul(self, f_conjugate(other)), f_abs2(other)); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { get_dat1(self); return f_complex_new2(CLASS_OF(self), @@ -693,7 +696,7 @@ } return f_expt(f_div(f_to_r(ONE), self), f_negate(other)); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { VALUE a, r, theta; a = f_polar(self); @@ -714,7 +717,7 @@ return f_boolcast(f_equal_p(adat->real, bdat->real) && f_equal_p(adat->image, bdat->image)); } - if (k_numeric_p(other) && f_scalar_p(other)) { + if (k_numeric_p(other) && f_real_p(other)) { get_dat1(self); return f_boolcast(f_equal_p(dat->real, other) && f_zero_p(dat->image)); @@ -725,7 +728,7 @@ static VALUE nucomp_coerce(VALUE self, VALUE other) { - if (k_numeric_p(other) && f_scalar_p(other)) + if (k_numeric_p(other) && f_real_p(other)) return rb_assoc_new(f_complex_new_bang1(CLASS_OF(self), other), self); rb_raise(rb_eTypeError, "%s can't be coerced into %s", @@ -775,13 +778,13 @@ return f_complex_new2(CLASS_OF(self), dat->real, f_negate(dat->image)); } -#if 0 static VALUE nucomp_real_p(VALUE self) { return Qfalse; } +#if 0 static VALUE nucomp_complex_p(VALUE self) { @@ -950,12 +953,6 @@ } static VALUE -nucomp_scalar_p(VALUE self) -{ - return Qfalse; -} - -static VALUE nucomp_to_i(VALUE self) { get_dat1(self); @@ -1451,6 +1448,7 @@ rb_define_method(rb_cComplex, "exact?", nucomp_exact_p, 0); rb_define_method(rb_cComplex, "inexact?", nucomp_inexact_p, 0); #endif + rb_define_method(rb_cComplex, "scalar?", nucomp_real_p, 0); rb_define_method(rb_cComplex, "numerator", nucomp_numerator, 0); rb_define_method(rb_cComplex, "denominator", nucomp_denominator, 0); @@ -1465,7 +1463,6 @@ /* --- */ - rb_define_method(rb_cComplex, "scalar?", nucomp_scalar_p, 0); rb_define_method(rb_cComplex, "to_i", nucomp_to_i, 0); rb_define_method(rb_cComplex, "to_f", nucomp_to_f, 0); rb_define_method(rb_cComplex, "to_r", nucomp_to_r, 0); Index: mvm/math.c =================================================================== --- mvm/math.c (revision 19180) +++ mvm/math.c (revision 19181) @@ -83,7 +83,7 @@ math_atan2(VALUE obj, VALUE y, VALUE x) { Need_Float2(y, x); - return DOUBLE2NUM(atan2(RFLOAT_VALUE(y), RFLOAT_VALUE(x))); + return DBL2NUM(atan2(RFLOAT_VALUE(y), RFLOAT_VALUE(x))); } @@ -99,7 +99,7 @@ math_cos(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(cos(RFLOAT_VALUE(x))); + return DBL2NUM(cos(RFLOAT_VALUE(x))); } /* @@ -115,7 +115,7 @@ { Need_Float(x); - return DOUBLE2NUM(sin(RFLOAT_VALUE(x))); + return DBL2NUM(sin(RFLOAT_VALUE(x))); } @@ -131,7 +131,7 @@ { Need_Float(x); - return DOUBLE2NUM(tan(RFLOAT_VALUE(x))); + return DBL2NUM(tan(RFLOAT_VALUE(x))); } /* @@ -150,7 +150,7 @@ errno = 0; d = acos(RFLOAT_VALUE(x)); domain_check(d, "acos"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -169,7 +169,7 @@ errno = 0; d = asin(RFLOAT_VALUE(x)); domain_check(d, "asin"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -183,7 +183,7 @@ math_atan(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(atan(RFLOAT_VALUE(x))); + return DBL2NUM(atan(RFLOAT_VALUE(x))); } #ifndef HAVE_COSH @@ -206,7 +206,7 @@ { Need_Float(x); - return DOUBLE2NUM(cosh(RFLOAT_VALUE(x))); + return DBL2NUM(cosh(RFLOAT_VALUE(x))); } #ifndef HAVE_SINH @@ -229,7 +229,7 @@ math_sinh(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(sinh(RFLOAT_VALUE(x))); + return DBL2NUM(sinh(RFLOAT_VALUE(x))); } #ifndef HAVE_TANH @@ -252,7 +252,7 @@ math_tanh(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(tanh(RFLOAT_VALUE(x))); + return DBL2NUM(tanh(RFLOAT_VALUE(x))); } /* @@ -271,7 +271,7 @@ errno = 0; d = acosh(RFLOAT_VALUE(x)); domain_check(d, "acosh"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -285,7 +285,7 @@ math_asinh(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(asinh(RFLOAT_VALUE(x))); + return DBL2NUM(asinh(RFLOAT_VALUE(x))); } /* @@ -305,7 +305,7 @@ d = atanh(RFLOAT_VALUE(x)); domain_check(d, "atanh"); infinity_check(x, d, "atanh"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -319,7 +319,7 @@ math_exp(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(exp(RFLOAT_VALUE(x))); + return DBL2NUM(exp(RFLOAT_VALUE(x))); } #if defined __CYGWIN__ @@ -357,7 +357,7 @@ } domain_check(d, "log"); infinity_check(x, d, "log"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } #ifndef log2 @@ -389,7 +389,7 @@ d = log2(RFLOAT_VALUE(x)); domain_check(d, "log2"); infinity_check(x, d, "log2"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -409,7 +409,7 @@ d = log10(RFLOAT_VALUE(x)); domain_check(d, "log10"); infinity_check(x, d, "log10"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -445,7 +445,7 @@ errno = 0; d = sqrt(RFLOAT_VALUE(x)); domain_check(d, "sqrt"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -484,7 +484,7 @@ math_cbrt(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(cbrt(RFLOAT_VALUE(x))); + return DBL2NUM(cbrt(RFLOAT_VALUE(x))); } /* @@ -508,7 +508,7 @@ Need_Float(x); d = frexp(RFLOAT_VALUE(x), &exp); - return rb_assoc_new(DOUBLE2NUM(d), INT2NUM(exp)); + return rb_assoc_new(DBL2NUM(d), INT2NUM(exp)); } /* @@ -525,7 +525,7 @@ math_ldexp(VALUE obj, VALUE x, VALUE n) { Need_Float(x); - return DOUBLE2NUM(ldexp(RFLOAT_VALUE(x), NUM2INT(n))); + return DBL2NUM(ldexp(RFLOAT_VALUE(x), NUM2INT(n))); } /* @@ -542,7 +542,7 @@ math_hypot(VALUE obj, VALUE x, VALUE y) { Need_Float2(x, y); - return DOUBLE2NUM(hypot(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); + return DBL2NUM(hypot(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); } /* @@ -556,7 +556,7 @@ math_erf(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(erf(RFLOAT_VALUE(x))); + return DBL2NUM(erf(RFLOAT_VALUE(x))); } /* @@ -570,7 +570,7 @@ math_erfc(VALUE obj, VALUE x) { Need_Float(x); - return DOUBLE2NUM(erfc(RFLOAT_VALUE(x))); + return DBL2NUM(erfc(RFLOAT_VALUE(x))); } /* @@ -622,7 +622,7 @@ errno = 0; d = tgamma(RFLOAT_VALUE(x)); domain_check(d, "gamma"); - return DOUBLE2NUM(d); + return DBL2NUM(d); } /* @@ -647,7 +647,7 @@ errno = 0; d = lgamma_r(RFLOAT_VALUE(x), &sign); domain_check(d, "lgamma"); - v = DOUBLE2NUM(d); + v = DBL2NUM(d); return rb_assoc_new(v, INT2FIX(sign)); } @@ -665,15 +665,15 @@ rb_mMath = rb_define_module("Math"); #ifdef M_PI - rb_define_const(rb_mMath, "PI", DOUBLE2NUM(M_PI)); + rb_define_const(rb_mMath, "PI", DBL2NUM(M_PI)); #else - rb_define_const(rb_mMath, "PI", DOUBLE2NUM(atan(1.0)*4.0)); + rb_define_const(rb_mMath, "PI", DBL2NUM(atan(1.0)*4.0)); #endif #ifdef M_E - rb_define_const(rb_mMath, "E", DOUBLE2NUM(M_E)); + rb_define_const(rb_mMath, "E", DBL2NUM(M_E)); #else - rb_define_const(rb_mMath, "E", DOUBLE2NUM(exp(1.0))); + rb_define_const(rb_mMath, "E", DBL2NUM(exp(1.0))); #endif rb_define_module_function(rb_mMath, "atan2", math_atan2, 2); Index: mvm/time.c =================================================================== --- mvm/time.c (revision 19180) +++ mvm/time.c (revision 19181) @@ -1015,7 +1015,7 @@ struct time_object *tobj; GetTimeval(time, tobj); - return DOUBLE2NUM((double)tobj->ts.tv_sec+(double)tobj->ts.tv_nsec/1e9); + return DBL2NUM((double)tobj->ts.tv_sec+(double)tobj->ts.tv_nsec/1e9); } /* @@ -1512,7 +1512,7 @@ f = (double)(unsigned_time_t)(tobj->ts.tv_sec - tobj2->ts.tv_sec); f += ((double)tobj->ts.tv_nsec - (double)tobj2->ts.tv_nsec)*1e-9; - return DOUBLE2NUM(f); + return DBL2NUM(f); } return time_add(tobj, time2, -1); } Index: mvm/include/ruby/intern.h =================================================================== --- mvm/include/ruby/intern.h (revision 19180) +++ mvm/include/ruby/intern.h (revision 19181) @@ -386,10 +386,10 @@ VALUE rb_io_puts(int, VALUE*, VALUE); VALUE rb_io_fdopen(int, int, const char*); VALUE rb_file_open(const char*, const char*); +VALUE rb_file_open_str(VALUE, const char*); VALUE rb_gets(void); void rb_write_error(const char*); void rb_write_error2(const char*, long); -int rb_io_mode_modenum(const char *mode); void rb_close_before_exec(int lowfd, int maxhint, VALUE noclose_fds); int rb_pipe(int *pipes); /* marshal.c */ Index: mvm/include/ruby/io.h =================================================================== --- mvm/include/ruby/io.h (revision 19180) +++ mvm/include/ruby/io.h (revision 19181) @@ -57,7 +57,7 @@ struct rb_io_enc_t { rb_encoding *enc; rb_encoding *enc2; - int flags; + int ecflags; VALUE ecopts; } encs; @@ -69,7 +69,7 @@ rb_econv_t *writeconv; VALUE writeconv_stateless; - int writeconv_pre_flags; + int writeconv_pre_ecflags; VALUE writeconv_pre_ecopts; int writeconv_initialized; @@ -125,21 +125,22 @@ fp->cbuf_capa = 0;\ fp->writeconv = NULL;\ fp->writeconv_stateless = Qnil;\ - fp->writeconv_pre_flags = 0;\ + fp->writeconv_pre_ecflags = 0;\ fp->writeconv_pre_ecopts = Qnil;\ fp->writeconv_initialized = 0;\ fp->tied_io_for_writing = 0;\ fp->encs.enc = NULL;\ fp->encs.enc2 = NULL;\ - fp->encs.flags = 0;\ + fp->encs.ecflags = 0;\ fp->encs.ecopts = Qnil;\ } while (0) FILE *rb_io_stdio_file(rb_io_t *fptr); FILE *rb_fdopen(int, const char*); -int rb_io_mode_flags(const char*); -int rb_io_modenum_flags(int); +int rb_io_modestr_fmode(const char *modestr); +int rb_io_modestr_oflags(const char *modestr); +int rb_io_oflags_fmode(int oflags); void rb_io_check_writable(rb_io_t*); void rb_io_check_readable(rb_io_t*); int rb_io_fptr_finalize(rb_io_t*); @@ -150,6 +151,10 @@ int rb_io_wait_writable(int); void rb_io_set_nonblock(rb_io_t *fptr); +/* compatibility for ruby 1.8 and older */ +#define rb_io_mode_flags(modestr) rb_io_modestr_fmode(modestr) +#define rb_io_modenum_flags(oflags) rb_io_oflags_fmode(oflags) + VALUE rb_io_taint_check(VALUE); NORETURN(void rb_eof_error(void)); Index: mvm/include/ruby/ruby.h =================================================================== --- mvm/include/ruby/ruby.h (revision 19180) +++ mvm/include/ruby/ruby.h (revision 19181) @@ -568,7 +568,7 @@ double float_value; }; #define RFLOAT_VALUE(v) (RFLOAT(v)->float_value) -#define DOUBLE2NUM(dbl) rb_float_new(dbl) +#define DBL2NUM(dbl) rb_float_new(dbl) #define ELTS_SHARED FL_USER2 Index: mvm/include/ruby/encoding.h =================================================================== --- mvm/include/ruby/encoding.h (revision 19180) +++ mvm/include/ruby/encoding.h (revision 19181) @@ -250,19 +250,20 @@ /* flags for rb_econv_open */ #define ECONV_INVALID_MASK 0x000f -#define ECONV_INVALID_IGNORE 0x0001 #define ECONV_INVALID_REPLACE 0x0002 #define ECONV_UNDEF_MASK 0x00f0 -#define ECONV_UNDEF_IGNORE 0x0010 #define ECONV_UNDEF_REPLACE 0x0020 +#define ECONV_UNDEF_HEX_CHARREF 0x0030 -/* effective only if output is ascii compatible */ +/* usable only if destination encoding is ascii compatible */ #define ECONV_UNIVERSAL_NEWLINE_DECODER 0x0100 -/* effective only if input is ascii compatible */ +/* usable only if source encoding is ascii compatible */ #define ECONV_CRLF_NEWLINE_ENCODER 0x0200 #define ECONV_CR_NEWLINE_ENCODER 0x0400 +#define ECONV_HTML_TEXT_ENCODER 0x0800 +#define ECONV_HTML_ATTR_ENCODER 0x1000 /* end of flags for rb_econv_open */ Index: mvm/insns.def =================================================================== --- mvm/insns.def (revision 19180) +++ mvm/insns.def (revision 19181) @@ -1319,7 +1319,7 @@ else if (HEAP_CLASS_OF(recv) == rb_cFloat && HEAP_CLASS_OF(obj) == rb_cFloat && BASIC_OP_UNREDEFINED_P(BOP_PLUS)) { - val = DOUBLE2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); + val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); } #endif @@ -1420,7 +1420,7 @@ else if (HEAP_CLASS_OF(recv) == rb_cFloat && HEAP_CLASS_OF(obj) == rb_cFloat && BASIC_OP_UNREDEFINED_P(BOP_MULT)) { - val = DOUBLE2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj)); + val = DBL2NUM(RFLOAT_VALUE(recv) * RFLOAT_VALUE(obj)); } #endif else { @@ -1484,7 +1484,7 @@ else if (HEAP_CLASS_OF(recv) == rb_cFloat && HEAP_CLASS_OF(obj) == rb_cFloat && BASIC_OP_UNREDEFINED_P(BOP_DIV)) { - val = DOUBLE2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj)); + val = DBL2NUM(RFLOAT_VALUE(recv) / RFLOAT_VALUE(obj)); } #endif else { @@ -1564,7 +1564,7 @@ mod += y; div -= 1.0; } - val = DOUBLE2NUM(mod); + val = DBL2NUM(mod); } else { goto INSN_LABEL(normal_dispatch); Index: mvm/ChangeLog =================================================================== --- mvm/ChangeLog (revision 19180) +++ mvm/ChangeLog (revision 19181) @@ -1,3 +1,312 @@ +<<<<<<< .working +======= +Sat Sep 6 15:06:21 2008 Tanaka Akira <akr@f...> + + * transcode.c (sym_html): new variable. + (sym_text): ditto. + (sym_attr): ditto. + (econv_opts): check :html=>:text and :html=>:attr. + (Init_transcode): initialize the above variables. + +Sat Sep 6 14:46:12 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (ECONV_HTML_TEXT_ENCODER): new constant. + (ECONV_HTML_ATTR_ENCODER): ditto. + + * transcode.c (rb_econv_open): check ECONV_HTML_TEXT_ENCODER and + ECONV_HTML_ATTR_ENCODER. + (Init_transcode): Encoding::Converter::HTML_TEXT_ENCODER and + Encoding::Converter::HTML_ATTR_ENCODER defined. + +Sat Sep 6 14:15:25 2008 Tanaka Akira <akr@f...> + + * transcode.c (struct trans_open_t): defined to pass num_additional. + (trans_open_i): use struct trans_open_t. + (rb_econv_open): ditto. + +Sat Sep 6 13:43:20 2008 Tanaka Akira <akr@f...> + + * enc/trans/escape.trans (fun_so_escape_html_attr): fix return type. + +Sat Sep 6 12:43:55 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_open): needless branch removed. + +Sat Sep 6 12:38:34 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_open): test condition simplified. + +Sat Sep 6 12:25:31 2008 Tanaka Akira <akr@f...> + + * transcode.c (stateless_encoding_i): ignore supplemental conversions. + Encoding::Converter.stateless_encoding("html-attr-escaped") should be + nil. + +Sat Sep 6 12:19:36 2008 Tanaka Akira <akr@f...> + + * enc/trans/escape.trans (escape_html_attr_init): new function. + (fun_so_escape_html_attr): new function. + (escape_html_attr_finish): new function. + (rb_escape_html_attr): use them to quote the converted result. + +Sat Sep 6 07:54:36 2008 Tadayoshi Funaba <tadf@d...> + + * complex.c: uses f_real_p macro. + +Sat Sep 6 07:27:00 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_open): fail for ASCII incompatible with + newline conversion. + +Sat Sep 6 07:24:49 2008 Tanaka Akira <akr@f...> + + * io.c (rb_io_extract_modeenc): raise an error for ASCII incompatible + encoding without binmode. + +Sat Sep 6 07:12:42 2008 Yukihiro Matsumoto <matz@r...> + + * bignum.c (bigdivrem1): optimization by skipping zeros at the + tail of digits. a patch from TOYOFUKU Chikanobu + <nobu_toyofuku at nifty.com> in [ruby-dev:36169]. + +Sat Sep 6 06:28:46 2008 Tanaka Akira <akr@f...> + + * enc/trans/escape.trans: new file. + +Sat Sep 6 06:23:27 2008 Tanaka Akira <akr@f...> + + * tool/transcode-tblgen.rb (StrSet.parse): accept upper case + hexadecimal digits. + +Sat Sep 6 05:37:08 2008 Tanaka Akira <akr@f...> + + * transcode.c (output_hex_charref): upcase hexadecimal digits. + +Sat Sep 6 05:22:29 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (ECONV_UNDEF_HEX_CHARREF): defined. + + * transcode.c (output_hex_charref): new function. + (rb_econv_convert): call output_hex_charref if + ECONV_UNDEF_HEX_CHARREF. + (Init_transcode): Encoding::Converter::UNDEF_HEX_CHARREF added. + +Sat Sep 6 03:52:47 2008 Tanaka Akira <akr@f...> + + * transcode.c (rb_econv_convert): use ECONV_INVALID_MASK and + ECONV_UNDEF_MASK. + +Sat Sep 6 06:05:09 2008 Yukihiro Matsumoto <matz@r...> + + * include/ruby/ruby.h (DBL2NUM): renamed from DOUBLE2NUM. + a patch from Tadashi Saito <shiba at mail2.accsnet.ne.jp> + in [ruby-dev:36102]. + +Sat Sep 6 03:18:17 2008 Tanaka Akira <akr@f...> + + * transcode.c (allocate_converted_string): add arguments for a buffer + allocated by caller. + (rb_econv_insert_output): provide caller allocated buffer to + allocate_converted_string. + +Sat Sep 6 02:58:53 2008 Tanaka Akira <akr@f...> + + * transcode.c (str_transcode_enc_args): local variables renamed. + (str_transcode0): ditto. + +Sat Sep 6 02:23:18 2008 Tanaka Akira <akr@f...> + + * transcode.c (econv_s_stateless_encoding): new method. + +Sat Sep 6 02:01:59 2008 Tanaka Akira <akr@f...> + + * transcode.c (enc_arg): extracted from str_transcode_enc_args. + (str_transcode_enc_args): use enc_arg. + +Fri Sep 5 20:27:17 2008 Tanaka Akira <akr@f...> + + * include/ruby/io.h (rb_io_modestr_fmode): renamed from + rb_io_mode_flags. + (rb_io_modestr_oflags): renamed from rb_io_mode_modenum. + (rb_io_oflags_fmode): renamed from rb_io_modenum_flags. + (rb_io_mode_flags): defined as a macro. + (rb_io_modenum_flags): ditto. + + * io.c: follow the renaming with consistency. + + * process.c (check_exec_redirect): call rb_io_modestr_oflags. + + * ext/pty/depend: pty.o depends on io.h. + +Fri Sep 5 20:12:23 2008 Tanaka Akira <akr@f...> + + * enc/trans/newline.trans (universal_newline_finish): new function. + +Fri Sep 5 20:07:37 2008 Tanaka Akira <akr@f...> + + * include/ruby/io.h (rb_io_mode_modenum): moved from + include/ruby/intern.h. + +Fri Sep 5 19:59:26 2008 Tanaka Akira <akr@f...> + + * io.c (MODENUM_MAX): removed. + +Fri Sep 5 18:37:52 2008 Nobuyoshi Nakada <nobu@r...> + + * io.c (copy_stream_fallback_body): use read method unless readpartial + is available. [ruby-dev:36124] + +Fri Sep 5 18:16:31 2008 Nobuyoshi Nakada <nobu@r...> + + * ext/iconv/iconv.c (iconv_create): strips glibc style option before + charset mapping. retris without options if they seemed causing + error, and warns. [ruby-dev:36147] + +Fri Sep 5 03:09:48 2008 Koichi Sasada <ko1@a...> + + * iseq.c (iseq_data_to_ary): make it static. + + * thread.c (thgroup_enclose): ditto. + +Fri Sep 5 02:56:37 2008 Koichi Sasada <ko1@a...> + + * vm.c (thread_recycle_stack_slot, thread_recycle_stack_count): + make it static. + +Fri Sep 5 02:40:38 2008 Koichi Sasada <ko1@a...> + + * thread.c (rb_thread_critical): removed. + +Fri Sep 5 01:22:23 2008 Tanaka Akira <akr@f...> + + * io.c (argf_mark): mark p->encs.ecopts. + +Fri Sep 5 00:45:07 2008 Tanaka Akira <akr@f...> + + * transcode_data.h (rb_transcoder): rename fields: + from_encoding -> src_encoding, to_encoding -> dst_encoding. + + * transcode.c: follow the renaming. + +Fri Sep 5 00:22:34 2008 Tanaka Akira <akr@f...> + + * transcode.c: variables renamed for consistency. + +Fri Sep 5 00:05:27 2008 Tanaka Akira <akr@f...> + + * pack.c (encodes): make buff fixed length to avoid SEGV by + ruby -e '["a"*10000000].pack("m1000000000")' + +Thu Sep 4 23:47:05 2008 Yusuke Endoh <mame@t...> + + * ext/bigdecimal/bigdecimal.c (BigDecimal_mode): set exception mode + correctly. In spite of BigDecimal.mode(BigDecimal::EXCEPTION_ALL, + true), BigDecimal.new("NaN") did not raise an exception previously. + +Thu Sep 4 23:42:42 2008 Tanaka Akira <akr@f...> + + * include/ruby/intern.h (rb_file_open_str): declared. + + * io.c (rb_file_open_str): defined. + + * ext/zlib/zlib.c (gzfile_s_open): use rb_file_open_str instead of + rb_file_open. + +Thu Sep 4 23:18:55 2008 Tanaka Akira <akr@f...> + + * enc/trans/newline.trans: record newline types met in universal + newline decoder. + +Thu Sep 4 23:05:54 2008 Yuki Sonoda (Yugui) <yugui@y...> + + * lib/prime.rb (Prime::OldCompatibility#each): added compatibility to + Ruby 1.8.7. + (Prime#each): added more rdocs. + (Prime#each): remembers the last value of the given block. + +Thu Sep 4 21:53:58 2008 Tanaka Akira <akr@f...> + + * transcode.c (econv_init): accept an integer as 3rd argument as well. + +Thu Sep 4 21:46:21 2008 Tanaka Akira <akr@f...> + + * transcode.c (sym_partial_input): new variable. + (econv_primitive_convert): accept a hash as 5th argument as well. + +Thu Sep 4 21:04:27 2008 Tanaka Akira <akr@f...> + + * transcode.c (sym_universal_newline_decoder): new variable. + (sym_crlf_newline_encoder): ditto. + (sym_cr_newline_encoder): ditto. + (econv_opts): check newline converter options. + (econv_init): make 3rd argument hash/nil only. + +Thu Sep 4 21:03:28 2008 Koichi Sasada <ko1@a...> + + * vm_dump.c: rename some debug functions. + +Thu Sep 4 20:57:54 2008 Koichi Sasada <ko1@a...> + + * thread.c (rb_thread_check_trap_pending): added for compatibility. + + * ext/tk/tcltklib.c, ext/tk/tkutil/tkutil.c: remove ruby/signal.h + dependency. + +Thu Sep 4 20:30:24 2008 Tanaka Akira <akr@f...> + + * transcode_data.h (PType): defined unconditionaly. + + * transcode.c (PType): don't define here. + +Thu Sep 4 20:19:36 2008 Tanaka Akira <akr@f...> + + * io.c: variables renamed for consistency. + +Thu Sep 4 19:40:50 2008 Narihiro Nakamura <authorNari@g...> + + * gc.c (gc_profile_record_get): to static function. + (gc_profile_result): ditto. + (gc_profile_report): ditto. + +Thu Sep 4 19:20:24 2008 Tanaka Akira <akr@f...> + + * include/ruby/io.h (rb_io_enc_t): rename flags to ecflags. + (rb_io_t): rename writeconv_pre_flags to writeconv_pre_ecflags. + (MakeOpenFile): follow the renaming. + + * io.c: follow the renaming. + +Thu Sep 4 19:10:27 2008 Tanaka Akira <akr@f...> + + * include/ruby/encoding.h (ECONV_INVALID_IGNORE): removed because + it tend to cause security problem. If the behaviour is really + required, ECONV_INVALID_REPLACE with empty string can be used. + For example, CVE-2006-2313, CVE-2008-1036, [ruby-core:15645] + (ECONV_UNDEF_IGNORE): ditto. + + * transcode.c (rb_econv_convert): follow the above change. + (econv_opts): ditto. + (Init_transcode): ditto. + +Thu Sep 4 13:22:02 2008 Nobuyoshi Nakada <nobu@r...> + + * vm_core.h (struct rb_vm_struct): replaced signal staff with trap + staff. + + * signal.c (signal_buff): per process resouce now. + + * signal.c (trap_list): moved to VM. + + * signal.c (rb_get_next_signal): reverted. + + * signal.c (rb_trap_exit): trap_pending_list was no longer used. + + * thread.c (timer_thread_function): delivers buffered per-process + signals to each VMs. + + * vm.c (rb_vm_mark): marks trap_list. + +>>>>>>> .merge-right.r19180 Thu Sep 4 13:01:11 2008 Nobuyoshi Nakada <nobu@r...> * io.c (struct sysopen_struct, rb_sysopen_internal, rb_sysopen): Index: mvm/enc/trans/escape.trans =================================================================== --- mvm/enc/trans/escape.trans (revision 0) +++ mvm/enc/trans/escape.trans (revision 19181) @@ -0,0 +1,157 @@ +#include "transcode_data.h" + +static int +fun_so_escape_html_chref(void *statep, const unsigned char *s, size_t l, unsigned char *o) +{ + switch (*s) { + case '&': + o[0] = '&'; + o[1] = 'a'; + o[2] = 'm'; + o[3] = 'p'; + o[4] = ';'; + return 5; + + case '<': + o[0] = '&'; + o[1] = 'l'; + o[2] = 't'; + o[3] = ';'; + return 4; + + case '>': + o[0] = '&'; + o[1] = 'g'; + o[2] = 't'; + o[3] = ';'; + return 4; + + case '"': + o[0] = '&'; + o[1] = 'q'; + o[2] = 'u'; + o[3] = 'o'; + o[4] = 't'; + o[5] = ';'; + return 6; + + default: + rb_bug("unexpected char"); + } +} +<% + map_amp = {} + map_amp["{00-25,27-FF}"] = :nomap + map_amp["26"] = :func_so + transcode_generate_node(ActionMap.parse(map_amp), "escape_amp_as_chref") + + map_html_text = {} + map_html_text["{00-25,27-3B,3D,3F-FF}"] = :nomap + map_html_text["26"] = :func_so + map_html_text["3C"] = :func_so + map_html_text["3E"] = :func_so + transcode_generate_node(ActionMap.parse(map_html_text), "escape_html_text") + + map_html_attr = {} + map_html_attr["{00-FF}"] = :func_so + transcode_generate_node(ActionMap.parse(map_html_attr), "escape_html_attr") +%> + +<%= transcode_generated_code %> + +static const rb_transcoder +rb_escape_amp_as_chref = { + "", "amp-escaped", escape_amp_as_chref, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 5, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_html_chref +}; + +static const rb_transcoder +rb_escape_html_text = { + "", "html-text-escaped", escape_html_text, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 5, /* max_output */ + stateless_converter, /* stateful_type */ + 0, NULL, NULL, + NULL, NULL, NULL, &fun_so_escape_html_chref +}; + +#define END 0 +#define NORMAL 1 + +static int +escape_html_attr_init(void *statep) +{ + unsigned char *sp = statep; + *sp = END; + return 0; +} + +static int +fun_so_escape_html_attr(void *statep, const unsigned char *s, size_t l, unsigned char *o) +{ + unsigned char *sp = statep; + int n = 0; + if (*sp == END) { + *sp = NORMAL; + o[n++] = '"'; + } + switch (s[0]) { + case '&': + case '<': + case '>': + case '"': + n += fun_so_escape_html_chref(statep, s, l, o+n); + break; + + default: + o[n++] = s[0]; + break; + } + return n; +} + +static int +escape_html_attr_finish(void *statep, unsigned char *o) +{ + unsigned char *sp = statep; + int n = 0; + + if (*sp == END) { + o[n++] = '"'; + } + + o[n++] = '"'; + *sp = END; + + return n; +} + +static const rb_transcoder +rb_escape_html_attr = { + "", "html-attr-escaped", escape_html_attr, + TRANSCODE_TABLE_INFO, + 1, /* input_unit_length */ + 1, /* max_input */ + 7, /* max_output */ + stateful_encoder, /* stateful_type */ + 1, escape_html_attr_init, escape_html_attr_init, + NULL, NULL, NULL, fun_so_escape_html_attr, + escape_html_attr_finish +}; + +void +Init_escape(void) +{ + rb_register_transcoder(&rb_escape_amp_as_chref); + rb_register_transcoder(&rb_escape_html_text); + rb_register_transcoder(&rb_escape_html_attr); +} + Index: mvm/enc/trans/newline.trans =================================================================== --- mvm/enc/trans/newline.trans (revision 19180) +++ mvm/enc/trans/newline.trans (revision 19181) @@ -21,45 +21,70 @@ <%= transcode_generated_code %> +#define STATE (sp[0]) #define NORMAL 0 #define JUST_AFTER_CR 1 +/* no way to access this information, yet. */ +#define NEWLINES_MET (sp[1]) +#define MET_LF 0x01 +#define MET_CRLF 0x02 +#define MET_CR 0x04 + static int universal_newline_init(void *statep) { unsigned char *sp = statep; - *sp = NORMAL; + STATE = NORMAL; + NEWLINES_MET = 0; return 0; } static int -fun_so_universal_newline(void *statep, const unsigned char* s, size_t l, unsigned char* o) +fun_so_universal_newline(void *statep, const unsigned char *s, size_t l, unsigned char *o) { unsigned char *sp = statep; int len; if (s[0] == '\n') { - if (*sp == NORMAL) { + if (STATE == NORMAL) { + NEWLINES_MET |= MET_LF; o[0] = '\n'; len = 1; } else { /* JUST_AFTER_CR */ + NEWLINES_MET |= MET_CRLF; len = 0; } - *sp = NORMAL; + STATE = NORMAL; } - else if (s[0] == '\r') { - o[0] = '\n'; - len = 1; - *sp = JUST_AFTER_CR; - } else { - o[0] = s[0]; - len = 1; - *sp = NORMAL; + if (STATE == JUST_AFTER_CR) + NEWLINES_MET |= MET_CR; + if (s[0] == '\r') { + o[0] = '\n'; + len = 1; + STATE = JUST_AFTER_CR; + } + else { + o[0] = s[0]; + len = 1; + STATE = NORMAL; + } } + return len; } +static int +universal_newline_finish(void *statep, unsigned char *o) +{ + unsigned char *sp = statep; + if (STATE == JUST_AFTER_CR) + NEWLINES_MET |= MET_CR; + STATE = NORMAL; + return 0; +} + static const rb_transcoder rb_universal_newline = { "universal_newline", "", universal_newline, @@ -68,8 +93,9 @@ 1, /* max_input */ 1, /* max_output */ stateful_decoder, /* stateful_type */ - 1, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ - NULL, NULL, NULL, fun_so_universal_newline + 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ + NULL, NULL, NULL, fun_so_universal_newline, + universal_newline_finish }; static const rb_transcoder Property changes on: mvm/enc/trans/japanese.trans ___________________________________________________________________ Name: svn:eol-style + LF Index: mvm/iseq.c =================================================================== --- mvm/iseq.c (revision 19180) +++ mvm/iseq.c (revision 19181) @@ -557,6 +557,7 @@ RSTRING_PTR(iseq->name), RSTRING_PTR(iseq->filename)); } +static VALUE iseq_data_to_ary(rb_iseq_t *iseq); static VALUE @@ -978,7 +979,7 @@ return ST_CONTINUE; } -VALUE +static VALUE iseq_data_to_ary(rb_iseq_t *iseq) { int i, pos, line = 0; Index: mvm/string.c =================================================================== --- mvm/string.c (revision 19180) +++ mvm/string.c (revision 19181) @@ -3787,7 +3787,7 @@ static VALUE rb_str_to_f(VALUE str) { - return DOUBLE2NUM(rb_str_to_dbl(str, Qfalse)); + return DBL2NUM(rb_str_to_dbl(str, Qfalse)); } Index: mvm/object.c =================================================================== --- mvm/object.c (revision 19180) +++ mvm/object.c (revision 19181) @@ -846,7 +846,7 @@ static VALUE nil_to_f(VALUE obj) { - return DOUBLE2NUM(0.0); + return DBL2NUM(0.0); } /* @@ -2226,16 +2226,16 @@ { switch (TYPE(val)) { case T_FIXNUM: - return DOUBLE2NUM((double)FIX2LONG(val)); + return DBL2NUM((double)FIX2LONG(val)); case T_FLOAT: return val; case T_BIGNUM: - return DOUBLE2NUM(rb_big2dbl(val)); + return DBL2NUM(rb_big2dbl(val)); case T_STRING: - return DOUBLE2NUM(rb_str_to_dbl(val, Qtrue)); + return DBL2NUM(rb_str_to_dbl(val, Qtrue)); case T_NIL: rb_raise(rb_eTypeError, "can't convert nil into Float"); Index: mvm/io.c =================================================================== --- mvm/io.c (revision 19180) +++ mvm/io.c (revision 19181) @@ -698,9 +698,9 @@ /* ECONV_INVALID_XXX and ECONV_UNDEF_XXX should be set both. * But ECONV_CRLF_NEWLINE_ENCODER should be set only for the first. */ - fptr->writeconv_pre_flags = fptr->encs.flags; + fptr->writeconv_pre_ecflags = fptr->encs.ecflags; fptr->writeconv_pre_ecopts = fptr->encs.ecopts; - ecflags = fptr->encs.flags; + ecflags = fptr->encs.ecflags; ecopts = fptr->encs.ecopts; #ifdef TEXTMODE_NEWLINE_ENCODER @@ -715,7 +715,7 @@ } if (NEED_NEWLINE_ENCODER(fptr)) - fptr->writeconv_pre_flags |= TEXTMODE_NEWLINE_ENCODER; + fptr->writeconv_pre_ecflags |= TEXTMODE_NEWLINE_ENCODER; #endif enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; @@ -758,7 +758,7 @@ if (!NIL_P(common_encoding)) { str = rb_str_transcode(str, common_encoding, - fptr->writeconv_pre_flags, fptr->writeconv_pre_ecopts); + fptr->writeconv_pre_ecflags, fptr->writeconv_pre_ecopts); } if (fptr->writeconv) { @@ -1196,19 +1196,19 @@ */ static VALUE -rb_io_set_sync(VALUE io, VALUE mode) +rb_io_set_sync(VALUE io, VALUE sync) { rb_io_t *fptr; io = GetWriteIO(io); GetOpenFile(io, fptr); - if (RTEST(mode)) { + if (RTEST(sync)) { fptr->mode |= FMODE_SYNC; } else { fptr->mode &= ~FMODE_SYNC; } - return mode; + return sync; } /* @@ -1446,7 +1446,7 @@ int ecflags; VALUE ecopts; const char *sname, *dname; - ecflags = fptr->encs.flags; + ecflags = fptr->encs.ecflags; ecopts = fptr->encs.ecopts; if (NEED_NEWLINE_DECODER(fptr)) ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER; @@ -1613,18 +1613,18 @@ void rb_io_set_nonblock(rb_io_t *fptr) { - int flags; + int oflags; #ifdef F_GETFL - flags = fcntl(fptr->fd, F_GETFL); - if (flags == -1) { + oflags = fcntl(fptr->fd, F_GETFL); + if (oflags == -1) { rb_sys_fail_path(fptr->pathv); } #else - flags = 0; + oflags = 0; #endif - if ((flags & O_NONBLOCK) == 0) { - flags |= O_NONBLOCK; - if (fcntl(fptr->fd, F_SETFL, flags) == -1) { + if ((oflags & O_NONBLOCK) == 0) { + oflags |= O_NONBLOCK; + if (fcntl(fptr->fd, F_SETFL, oflags) == -1) { rb_sys_fail_path(fptr->pathv); } } @@ -3544,62 +3544,62 @@ } static const char* -rb_io_flags_mode(int flags) +rb_io_fmode_modestr(int fmode) { -# define MODE_BTMODE(a,b,c) ((flags & FMODE_BINMODE) ? (b) : \ - (flags & FMODE_TEXTMODE) ? (c) : (a)) - if (flags & FMODE_APPEND) { - if ((flags & FMODE_READWRITE) == FMODE_READWRITE) { +# define MODE_BTMODE(a,b,c) ((fmode & FMODE_BINMODE) ? (b) : \ + (fmode & FMODE_TEXTMODE) ? (c) : (a)) + if (fmode & FMODE_APPEND) { + if ((fmode & FMODE_READWRITE) == FMODE_READWRITE) { return MODE_BTMODE("a+", "ab+", "at+"); } return MODE_BTMODE("a", "ab", "at"); } - switch (flags & FMODE_READWRITE) { + switch (fmode & FMODE_READWRITE) { case FMODE_READABLE: return MODE_BTMODE("r", "rb", "rt"); case FMODE_WRITABLE: return MODE_BTMODE("w", "wb", "wt"); case FMODE_READWRITE: - if (flags & FMODE_CREATE) { + if (fmode & FMODE_CREATE) { return MODE_BTMODE("w+", "wb+", "wt+"); } return MODE_BTMODE("r+", "rb+", "rt+"); } - rb_raise(rb_eArgError, "invalid access modenum 0x%x", flags); + rb_raise(rb_eArgError, "invalid access fmode 0x%x", fmode); return NULL; /* not reached */ } int -rb_io_mode_flags(const char *mode) +rb_io_modestr_fmode(const char *modestr) { - int flags = 0; - const char *m = mode; + int fmode = 0; + const char *m = modestr; switch (*m++) { case 'r': - flags |= FMODE_READABLE; + fmode |= FMODE_READABLE; break; case 'w': - flags |= FMODE_WRITABLE | FMODE_TRUNC | FMODE_CREATE; + fmode |= FMODE_WRITABLE | FMODE_TRUNC | FMODE_CREATE; break; case 'a': - flags |= FMODE_WRITABLE | FMODE_APPEND | FMODE_CREATE; + fmode |= FMODE_WRITABLE | FMODE_APPEND | FMODE_CREATE; break; default: error: - rb_raise(rb_eArgError, "invalid access mode %s", mode); + rb_raise(rb_eArgError, "invalid access mode %s", modestr); } while (*m) { switch (*m++) { case 'b': - flags |= FMODE_BINMODE; + fmode |= FMODE_BINMODE; break; case 't': - flags |= FMODE_TEXTMODE; + fmode |= FMODE_TEXTMODE; break; case '+': - flags |= FMODE_READWRITE; + fmode |= FMODE_READWRITE; break; default: goto error; @@ -3609,105 +3609,103 @@ } finished: - if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE)) + if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE)) goto error; - return flags; + return fmode; } int -rb_io_modenum_flags(int mode) +rb_io_oflags_fmode(int oflags) { - int flags = 0; + int fmode = 0; - switch (mode & (O_RDONLY|O_WRONLY|O_RDWR)) { + switch (oflags & (O_RDONLY|O_WRONLY|O_RDWR)) { case O_RDONLY: - flags = FMODE_READABLE; + fmode = FMODE_READABLE; break; case O_WRONLY: - flags = FMODE_WRITABLE; + fmode = FMODE_WRITABLE; break; case O_RDWR: - flags = FMODE_READWRITE; + fmode = FMODE_READWRITE; break; } - if (mode & O_APPEND) { - flags |= FMODE_APPEND; + if (oflags & O_APPEND) { + fmode |= FMODE_APPEND; } - if (mode & O_TRUNC) { - flags |= FMODE_TRUNC; + if (oflags & O_TRUNC) { + fmode |= FMODE_TRUNC; } - if (mode & O_CREAT) { - flags |= FMODE_CREATE; + if (oflags & O_CREAT) { + fmode |= FMODE_CREATE; } #ifdef O_BINARY - if (mode & O_BINARY) { - flags |= FMODE_BINMODE; + if (oflags & O_BINARY) { + fmode |= FMODE_BINMODE; } #endif - return flags; + return fmode; } static int -rb_io_flags_modenum(int flags) +rb_io_fmode_oflags(int fmode) { - int mode = 0; + int oflags = 0; - switch (flags & FMODE_READWRITE) { + switch (fmode & FMODE_READWRITE) { case FMODE_READABLE: - mode |= O_RDONLY; + oflags |= O_RDONLY; break; case FMODE_WRITABLE: - mode |= O_WRONLY; + oflags |= O_WRONLY; break; case FMODE_READWRITE: - mode |= O_RDWR; + oflags |= O_RDWR; break; } - if (flags & FMODE_APPEND) { - mode |= O_APPEND; + if (fmode & FMODE_APPEND) { + oflags |= O_APPEND; } - if (flags & FMODE_TRUNC) { - mode |= O_TRUNC; + if (fmode & FMODE_TRUNC) { + oflags |= O_TRUNC; } - if (flags & FMODE_CREATE) { - mode |= O_CREAT; + if (fmode & FMODE_CREATE) { + oflags |= O_CREAT; } #ifdef O_BINARY - if (flags & FMODE_BINMODE) { - mode |= O_BINARY; + if (fmode & FMODE_BINMODE) { + oflags |= O_BINARY; } #endif - return mode; + return oflags; } int -rb_io_mode_modenum(const char *mode) +rb_io_modestr_oflags(const char *modestr) { - return rb_io_flags_modenum(rb_io_mode_flags(mode)); + return rb_io_fmode_oflags(rb_io_modestr_fmode(modestr)); } -#define MODENUM_MAX 4 - static const char* -rb_io_modenum_mode(int flags) +rb_io_oflags_modestr(int oflags) { #ifdef O_BINARY -# define MODE_BINARY(a,b) ((flags & O_BINARY) ? (b) : (a)) +# define MODE_BINARY(a,b) ((oflags & O_BINARY) ? (b) : (a)) #else # define MODE_BINARY(a,b) (a) #endif - if (flags & O_APPEND) { - if ((flags & O_RDWR) == O_RDWR) { + if (oflags & O_APPEND) { + if ((oflags & O_RDWR) == O_RDWR) { return MODE_BINARY("a+", "ab+"); } return MODE_BINARY("a", "ab"); } - switch (flags & (O_RDONLY|O_WRONLY|O_RDWR)) { + switch (oflags & (O_RDONLY|O_WRONLY|O_RDWR)) { case O_RDONLY: return MODE_BINARY("r", "rb"); case O_WRONLY: @@ -3715,7 +3713,7 @@ case O_RDWR: return MODE_BINARY("r+", "rb+"); } - rb_raise(rb_eArgError, "invalid access modenum 0x%x", flags); + rb_raise(rb_eArgError, "invalid access oflags 0x%x", oflags); return NULL; /* not reached */ } @@ -3776,9 +3774,9 @@ } void -rb_io_mode_enc(rb_io_t *fptr, const char *mode) +rb_io_mode_enc(rb_io_t *fptr, const char *modestr) { - const char *p = strchr(mode, ':'); + const char *p = strchr(modestr, ':'); if (p) { mode_enc(fptr, p+1); } @@ -3838,37 +3836,37 @@ typedef struct rb_io_enc_t convconfig_t; static void -rb_io_extract_modeenc(VALUE *mode_p, VALUE opthash, - int *modenum_p, int *flags_p, convconfig_t *convconfig_p) +rb_io_extract_modeenc(VALUE *vmode_p, VALUE opthash, + int *oflags_p, int *fmode_p, convconfig_t *convconfig_p) { - VALUE mode; - int modenum, flags; + VALUE vmode; + int oflags, fmode; rb_encoding *enc, *enc2; int ecflags; VALUE ecopts; int has_enc = 0; VALUE intmode; - mode = *mode_p; + vmode = *vmode_p; enc = NULL; enc2 = NULL; - if (NIL_P(mode)) { - flags = FMODE_READABLE; - modenum = O_RDONLY; + if (NIL_P(vmode)) { + fmode = FMODE_READABLE; + oflags = O_RDONLY; } - else if (!NIL_P(intmode = rb_check_to_integer(mode, "to_int"))) { - mode = intmode; - modenum = NUM2INT(intmode); - flags = rb_io_modenum_flags(modenum); + else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) { + vmode = intmode; + oflags = NUM2INT(intmode); + fmode = rb_io_oflags_fmode(oflags); } else { const char *p; - SafeStringValue(mode); - p = StringValueCStr(mode); - flags = rb_io_mode_flags(p); - modenum = rb_io_flags_modenum(flags); + SafeStringValue(vmode); + p = StringValueCStr(vmode); + fmode = rb_io_modestr_fmode(p); + oflags = rb_io_fmode_oflags(fmode); p = strchr(p, ':'); if (p) { has_enc = 1; @@ -3884,12 +3882,12 @@ VALUE v; v = rb_hash_aref(opthash, sym_textmode); if (RTEST(v)) - flags |= FMODE_TEXTMODE; + fmode |= FMODE_TEXTMODE; v = rb_hash_aref(opthash, sym_binmode); if (RTEST(v)) { - flags |= FMODE_BINMODE; + fmode |= FMODE_BINMODE; #ifdef O_BINARY - modenum |= O_BINARY; + oflags |= O_BINARY; #endif } ecflags = rb_econv_prepare_opts(opthash, &ecopts); @@ -3901,16 +3899,19 @@ } } - if ((flags & FMODE_BINMODE) && (flags & FMODE_TEXTMODE)) + if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE)) rb_raise(rb_eArgError, "both textmode and binmode specified"); - *mode_p = mode; + if (enc && !rb_enc_asciicompat(enc) && !(fmode & FMODE_BINMODE)) + rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); - *modenum_p = modenum; - *flags_p = flags; + *vmode_p = vmode; + + *oflags_p = oflags; + *fmode_p = fmode; convconfig_p->enc = enc; convconfig_p->enc2 = enc2; - convconfig_p->flags = ecflags; + convconfig_p->ecflags = ecflags; convconfig_p->ecopts = ecopts; } @@ -3919,8 +3920,8 @@ int base; #endif const char *fname; - int flag; - mode_t mode; + int oflags; + mode_t perm; }; static VALUE @@ -3928,14 +3929,14 @@ { struct sysopen_struct *data = ptr; #if USE_OPENAT - return (VALUE)openat(data->base, data->fname, data->flag, data->mode); + return (VALUE)openat(data->base, data->fname, data->oflags, data->perm); #else - return (VALUE)open(data->fname, data->flag, data->mode); + return (VALUE)open(data->fname, data->oflags, data->perm); #endif } static int -rb_sysopen_internal(const char *fname, int flags, mode_t mode) +rb_sysopen_internal(const char *fname, int oflags, mode_t perm) { struct sysopen_struct data; @@ -3943,13 +3944,13 @@ data.base = GET_THREAD()->cwd.fd; #endif data.fname = fname; - data.flag = flags; - data.mode = mode; + data.oflags = oflags; + data.perm = perm; return (int)rb_thread_blocking_region(sysopen_func, &data, RUBY_UBF_IO, 0); } static int -rb_sysopen(const char *fname, int flags, mode_t mode) +rb_sysopen(const char *fname, int oflags, mode_t perm) { int fd; #if !USE_OPENAT @@ -3957,7 +3958,7 @@ #endif #ifdef O_BINARY - flags |= O_BINARY; + oflags |= O_BINARY; #endif #if !USE_OPENAT @@ -3967,11 +3968,11 @@ } #endif - fd = rb_sysopen_internal(fname, flags, mode); + fd = rb_sysopen_internal(fname, oflags, perm); if (fd < 0) { if (errno == EMFILE || errno == ENFILE) { rb_gc(); - fd = rb_sysopen_internal(fname, flags, mode); + fd = rb_sysopen_internal(fname, oflags, perm); } if (fd < 0) { rb_sys_fail(fname); @@ -3986,14 +3987,14 @@ } FILE * -rb_fdopen(int fd, const char *mode) +rb_fdopen(int fd, const char *modestr) { FILE *file; #if defined(sun) errno = 0; #endif - file = fdopen(fd, mode); + file = fdopen(fd, modestr); if (!file) { if ( #if defined(sun) @@ -4004,7 +4005,7 @@ #if defined(sun) errno = 0; #endif - file = fdopen(fd, mode); + file = fdopen(fd, modestr); } if (!file) { #ifdef _WIN32 @@ -4032,34 +4033,34 @@ } static VALUE -rb_file_open_generic(VALUE io, VALUE filename, int modenum, int flags, convconfig_t *convconfig, mode_t perm) +rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig_t *convconfig, mode_t perm) { rb_io_t *fptr; MakeOpenFile(io, fptr); - fptr->mode = flags; + fptr->mode = fmode; if (convconfig) { fptr->encs = *convconfig; } else { fptr->encs.enc = NULL; fptr->encs.enc2 = NULL; - fptr->encs.flags = 0; + fptr->encs.ecflags = 0; fptr->encs.ecopts = Qnil; } fptr->pathv = rb_str_new_frozen(filename); - fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), modenum, perm); + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, perm); io_check_tty(fptr); return io; } static VALUE -rb_file_open_internal(VALUE io, VALUE filename, const char *mode) +rb_file_open_internal(VALUE io, VALUE filename, const char *modestr) { - int flags; + int fmode; - const char *p = strchr(mode, ':'); + const char *p = strchr(modestr, ':'); convconfig_t convconfig; if (p) { parse_mode_enc(p+1, &convconfig.enc, &convconfig.enc2); @@ -4067,22 +4068,22 @@ else { convconfig.enc = NULL; convconfig.enc2 = NULL; - convconfig.flags = 0; + convconfig.ecflags = 0; convconfig.ecopts = Qnil; } - flags = rb_io_mode_flags(mode); + fmode = rb_io_modestr_fmode(modestr); return rb_file_open_generic(io, filename, - rb_io_flags_modenum(flags), - flags, + rb_io_fmode_oflags(fmode), + fmode, &convconfig, 0666); } VALUE -rb_file_open(const char *fname, const char *mode) +rb_file_open_str(VALUE fname, const char *modestr) { - return rb_file_open_internal(io_alloc(rb_cFile), rb_str_new_cstr(fname), mode); + return rb_file_open_internal(io_alloc(rb_cFile), fname, modestr); } VALUE @@ -4146,6 +4147,12 @@ return newio; } +VALUE +rb_file_open(const char *fname, const char *modestr) +{ + return rb_file_open_internal(io_alloc(rb_cFile), rb_str_new_cstr(fname), modestr); +} + #if defined(__CYGWIN__) || !defined(HAVE_FORK) static struct pipe_list { rb_io_t *fptr; @@ -4324,7 +4331,7 @@ #endif static VALUE -pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *mode, int flags, convconfig_t *convconfig) +pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode, convconfig_t *convconfig) { int pid = 0; rb_io_t *fptr; @@ -4335,7 +4342,7 @@ int status; struct popen_arg arg; #elif defined(_WIN32) - int openmode = rb_io_mode_modenum(mode); + int openmode = rb_io_modestr_oflags(modestr); const char *exename = NULL; volatile VALUE cmdbuf; struct rb_exec_arg sarg; @@ -4368,10 +4375,10 @@ #if defined(HAVE_FORK) arg.execp = eargp; - arg.modef = flags; + arg.modef = fmode; arg.pair[0] = arg.pair[1] = -1; arg.write_pair[0] = arg.write_pair[1] = -1; - switch (flags & (FMODE_READABLE|FMODE_WRITABLE)) { + switch (fmode & (FMODE_READABLE|FMODE_WRITABLE)) { case FMODE_READABLE|FMODE_WRITABLE: if (rb_pipe(arg.write_pair) < 0) rb_sys_fail(cmd); @@ -4424,20 +4431,20 @@ int e = errno; close(arg.pair[0]); close(arg.pair[1]); - if ((flags & (FMODE_READABLE|FMODE_WRITABLE)) == (FMODE_READABLE|FMODE_WRITABLE)) { + if ((fmode & (FMODE_READABLE|FMODE_WRITABLE)) == (FMODE_READABLE|FMODE_WRITABLE)) { close(arg.write_pair[0]); close(arg.write_pair[1]); } errno = e; rb_sys_fail(cmd); } - if ((flags & FMODE_READABLE) && (flags & FMODE_WRITABLE)) { + if ((fmode & FMODE_READABLE) && (fmode & FMODE_WRITABLE)) { close(arg.pair[1]); fd = arg.pair[0]; close(arg.write_pair[0]); write_fd = arg.write_pair[1]; } - else if (flags & FMODE_READABLE) { + else if (fmode & FMODE_READABLE) { close(arg.pair[1]); fd = arg.pair[0]; } @@ -4496,7 +4503,7 @@ rb_exec_arg_fixup(eargp); rb_run_exec_options(eargp, &sarg); } - fp = popen(cmd, mode); + fp = popen(cmd, modestr); if (eargp) rb_run_exec_options(&sarg, NULL); if (!fp) rb_sys_fail(RSTRING_PTR(prog)); @@ -4507,7 +4514,7 @@ MakeOpenFile(port, fptr); fptr->fd = fd; fptr->stdio_file = fp; - fptr->mode = flags | FMODE_SYNC|FMODE_DUPLEX; + fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX; if (convconfig) { fptr->encs = *convconfig; } @@ -4517,7 +4524,7 @@ write_port = io_alloc(rb_cIO); MakeOpenFile(write_port, write_fptr); write_fptr->fd = write_fd; - write_fptr->mode = (flags & ~FMODE_READABLE)| FMODE_SYNC|FMODE_DUPLEX; + write_fptr->mode = (fmode & ~FMODE_READABLE)| FMODE_SYNC|FMODE_DUPLEX; fptr->mode &= ~FMODE_WRITABLE; fptr->tied_io_for_writing = write_port; rb_ivar_set(port, rb_intern("@tied_io_for_writing"), write_port); @@ -4531,16 +4538,16 @@ } static VALUE -pipe_open_v(int argc, VALUE *argv, const char *mode, int flags, convconfig_t *convconfig) +pipe_open_v(int argc, VALUE *argv, const char *modestr, int fmode, convconfig_t *convconfig) { VALUE prog; struct rb_exec_arg earg; prog = rb_exec_arg_init(argc, argv, Qfalse, &earg); - return pipe_open(&earg, prog, mode, flags, convconfig); + return pipe_open(&earg, prog, modestr, fmode, convconfig); } static VALUE -pipe_open_s(VALUE prog, const char *mode, int flags, convconfig_t *convconfig) +pipe_open_s(VALUE prog, const char *modestr, int fmode, convconfig_t *convconfig) { const char *cmd = RSTRING_PTR(prog); int argc = 1; @@ -4552,11 +4559,11 @@ rb_raise(rb_eNotImpError, "fork() function is unimplemented on this machine"); #endif - return pipe_open(0, 0, mode, flags, convconfig); + return pipe_open(0, 0, modestr, fmode, convconfig); } rb_exec_arg_init(argc, argv, Qtrue, &earg); - return pipe_open(&earg, prog, mode, flags, convconfig); + return pipe_open(&earg, prog, modestr, fmode, convconfig); } static VALUE @@ -4631,27 +4638,27 @@ static VALUE rb_io_s_popen(int argc, VALUE *argv, VALUE klass) { - const char *mode; + const char *modestr; VALUE pname, pmode, port, tmp, opt; - int modenum, flags; + int oflags, fmode; convconfig_t convconfig; opt = pop_last_hash(&argc, &argv); rb_scan_args(argc, argv, "11", &pname, &pmode); - rb_io_extract_modeenc(&pmode, opt, &modenum, &flags, &convconfig); - mode = rb_io_modenum_mode(modenum); + rb_io_extract_modeenc(&pmode, opt, &oflags, &fmode, &convconfig); + modestr = rb_io_oflags_modestr(oflags); tmp = rb_check_array_type(pname); if (!NIL_P(tmp)) { tmp = rb_ary_dup(tmp); RBASIC(tmp)->klass = 0; - port = pipe_open_v(RARRAY_LEN(tmp), RARRAY_PTR(tmp), mode, flags, &convconfig); + port = pipe_open_v(RARRAY_LEN(tmp), RARRAY_PTR(tmp), modestr, fmode, &convconfig); rb_ary_clear(tmp); } else { SafeStringValue(pname); - port = pipe_open_s(pname, mode, flags, &convconfig); + port = pipe_open_s(pname, modestr, fmode, &convconfig); } if (NIL_P(port)) { /* child */ @@ -4672,11 +4679,11 @@ static void rb_scan_open_args(int argc, VALUE *argv, - VALUE *fname_p, int *modenum_p, int *flags_p, + VALUE *fname_p, int *oflags_p, int *fmode_p, convconfig_t *convconfig_p, mode_t *perm_p) { VALUE opt=Qnil, fname, vmode, vperm; - int modenum, flags; + int oflags, fmode; mode_t perm; opt = pop_last_hash(&argc, &argv); @@ -4703,13 +4710,13 @@ #endif FilePathValue(fname); - rb_io_extract_modeenc(&vmode, opt, &modenum, &flags, convconfig_p); + rb_io_extract_modeenc(&vmode, opt, &oflags, &fmode, convconfig_p); perm = NIL_P(vperm) ? 0666 : NUM2UINT(vperm); *fname_p = fname; - *modenum_p = modenum; - *flags_p = flags; + *oflags_p = oflags; + *fmode_p = fmode; *perm_p = perm; } @@ -4717,12 +4724,12 @@ rb_open_file(int argc, VALUE *argv, VALUE io) { VALUE fname; - int modenum, flags; + int oflags, fmode; convconfig_t convconfig; mode_t perm; - rb_scan_open_args(argc, argv, &fname, &modenum, &flags, &convconfig, &perm); - rb_file_open_generic(io, fname, modenum, flags, &convconfig, perm); + rb_scan_open_args(argc, argv, &fname, &oflags, &fmode, &convconfig, &perm); + rb_file_open_generic(io, fname, oflags, fmode, &convconfig, perm); return io; } @@ -4768,7 +4775,7 @@ { VALUE fname, vmode, vperm; VALUE intmode; - int modenum, fd; + int oflags, fd; mode_t perm; char *path; @@ -4776,19 +4783,19 @@ FilePathValue(fname); if (NIL_P(vmode)) - modenum = O_RDONLY; + oflags = O_RDONLY; else if (!NIL_P(intmode = rb_check_to_integer(vmode, "to_int"))) - modenum = NUM2INT(intmode); + oflags = NUM2INT(intmode); else { SafeStringValue(vmode); - modenum = rb_io_mode_modenum(StringValueCStr(vmode)); + oflags = rb_io_modestr_oflags(StringValueCStr(vmode)); } if (NIL_P(vperm)) perm = 0666; else perm = NUM2UINT(vperm); RB_GC_GUARD(fname) = rb_str_new4(fname); path = RSTRING_PTR(fname); - fd = rb_sysopen(path, modenum, perm); + fd = rb_sysopen(path, oflags, perm); return INT2NUM(fd); } @@ -4953,22 +4960,22 @@ } static VALUE -rb_io_open(VALUE filename, VALUE mode, VALUE vperm, VALUE opt) +rb_io_open(VALUE filename, VALUE vmode, VALUE vperm, VALUE opt) { VALUE cmd; - int modenum, flags; + int oflags, fmode; convconfig_t convconfig; mode_t perm; - rb_io_extract_modeenc(&mode, opt, &modenum, &flags, &convconfig); + rb_io_extract_modeenc(&vmode, opt, &oflags, &fmode, &convconfig); perm = NIL_P(vperm) ? 0666 : NUM2UINT(vperm); if (!NIL_P(cmd = check_pipe_command(filename))) { - return pipe_open_s(cmd, rb_io_modenum_mode(modenum), flags, &convconfig); + return pipe_open_s(cmd, rb_io_oflags_modestr(oflags), fmode, &convconfig); } else { return rb_file_open_generic(io_alloc(rb_cFile), filename, - modenum, flags, &convconfig, perm); + oflags, fmode, &convconfig, perm); } } @@ -5004,8 +5011,8 @@ (fptr->stdio_file == stderr && !(orig->mode & FMODE_WRITABLE))) { rb_raise(rb_eArgError, "%s can't change access mode from \"%s\" to \"%s\"", - PREP_STDIO_NAME(fptr), rb_io_flags_mode(fptr->mode), - rb_io_flags_mode(orig->mode)); + PREP_STDIO_NAME(fptr), rb_io_fmode_modestr(fptr->mode), + rb_io_fmode_modestr(orig->mode)); } } if (orig->mode & FMODE_READABLE) { @@ -5084,7 +5091,7 @@ rb_io_reopen(int argc, VALUE *argv, VALUE file) { VALUE fname, nmode; - int modenum; + int oflags; rb_io_t *fptr; rb_secure(4); @@ -5104,25 +5111,25 @@ } if (!NIL_P(nmode)) { - int flags = rb_io_mode_flags(StringValueCStr(nmode)); + int fmode = rb_io_modestr_fmode(StringValueCStr(nmode)); if (IS_PREP_STDIO(fptr) && - ((fptr->mode & FMODE_READWRITE) & (flags & FMODE_READWRITE)) != + ((fptr->mode & FMODE_READWRITE) & (fmode & FMODE_READWRITE)) != (fptr->mode & FMODE_READWRITE)) { rb_raise(rb_eArgError, "%s can't change access mode from \"%s\" to \"%s\"", - PREP_STDIO_NAME(fptr), rb_io_flags_mode(fptr->mode), - rb_io_flags_mode(flags)); + PREP_STDIO_NAME(fptr), rb_io_fmode_modestr(fptr->mode), + rb_io_fmode_modestr(fmode)); } - fptr->mode = flags; + fptr->mode = fmode; rb_io_mode_enc(fptr, StringValueCStr(nmode)); - fptr->encs.flags = 0; + fptr->encs.ecflags = 0; fptr->encs.ecopts = Qnil; } fptr->pathv = rb_str_new_frozen(fname); - modenum = rb_io_flags_modenum(fptr->mode); + oflags = rb_io_fmode_oflags(fptr->mode); if (fptr->fd < 0) { - fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), modenum, 0666); + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, 0666); fptr->stdio_file = 0; return file; } @@ -5133,7 +5140,7 @@ fptr->rbuf_off = fptr->rbuf_len = 0; if (fptr->stdio_file) { - if (freopen(RSTRING_PTR(fptr->pathv), rb_io_modenum_mode(modenum), fptr->stdio_file) == 0) { + if (freopen(RSTRING_PTR(fptr->pathv), rb_io_oflags_modestr(oflags), fptr->stdio_file) == 0) { rb_sys_fail_path(fptr->pathv); } fptr->fd = fileno(fptr->stdio_file); @@ -5146,7 +5153,7 @@ if (close(fptr->fd) < 0) rb_sys_fail_path(fptr->pathv); fptr->fd = -1; - fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), modenum, 0666); + fptr->fd = rb_sysopen(RSTRING_PTR(fptr->pathv), oflags, 0666); } return file; @@ -5562,7 +5569,7 @@ } static VALUE -prep_io(int fd, int flags, VALUE klass, const char *path) +prep_io(int fd, int fmode, VALUE klass, const char *path) { rb_io_t *fp; VALUE io = io_alloc(klass); @@ -5571,11 +5578,11 @@ fp->fd = fd; #ifdef __CYGWIN__ if (!isatty(fd)) { - flags |= FMODE_BINMODE; + fmode |= FMODE_BINMODE; setmode(fd, O_BINARY); } #endif - fp->mode = flags; + fp->mode = fmode; io_check_tty(fp); if (path) fp->pathv = rb_obj_freeze(rb_str_new_cstr(path)); @@ -5583,19 +5590,19 @@ } VALUE -rb_io_fdopen(int fd, int modenum, const char *path) +rb_io_fdopen(int fd, int oflags, const char *path) { VALUE klass = rb_cIO; if (path && strcmp(path, "-")) klass = rb_cFile; - return prep_io(fd, rb_io_modenum_flags(modenum), klass, path); + return prep_io(fd, rb_io_oflags_fmode(oflags), klass, path); } static VALUE -prep_stdio(FILE *f, int flags, VALUE klass, const char *path) +prep_stdio(FILE *f, int fmode, VALUE klass, const char *path) { rb_io_t *fptr; - VALUE io = prep_io(fileno(f), flags|FMODE_PREP, klass, path); + VALUE io = prep_io(fileno(f), fmode|FMODE_PREP, klass, path); GetOpenFile(io, fptr); fptr->stdio_file = f; @@ -5607,8 +5614,8 @@ rb_io_stdio_file(rb_io_t *fptr) { if (!fptr->stdio_file) { - int modenum = rb_io_flags_modenum(fptr->mode); - fptr->stdio_file = rb_fdopen(fptr->fd, rb_io_modenum_mode(modenum)); + int oflags = rb_io_fmode_oflags(fptr->mode); + fptr->stdio_file = rb_fdopen(fptr->fd, rb_io_oflags_modestr(oflags)); } return fptr->stdio_file; } @@ -5637,30 +5644,30 @@ static VALUE rb_io_initialize(int argc, VALUE *argv, VALUE io) { - VALUE fnum, mode; + VALUE fnum, vmode; rb_io_t *fp; - int fd, flags, modenum = O_RDONLY; + int fd, fmode, oflags = O_RDONLY; convconfig_t convconfig; VALUE opt; rb_secure(4); opt = pop_last_hash(&argc, &argv); - rb_scan_args(argc, argv, "11", &fnum, &mode); - rb_io_extract_modeenc(&mode, opt, &modenum, &flags, &convconfig); + rb_scan_args(argc, argv, "11", &fnum, &vmode); + rb_io_extract_modeenc(&vmode, opt, &oflags, &fmode, &convconfig); fd = NUM2INT(fnum); UPDATE_MAXFD(fd); - if (NIL_P(mode)) { + if (NIL_P(vmode)) { #if defined(HAVE_FCNTL) && defined(F_GETFL) - modenum = fcntl(fd, F_GETFL); - if (modenum == -1) rb_sys_fail(0); - flags = rb_io_modenum_flags(modenum); + oflags = fcntl(fd, F_GETFL); + if (oflags == -1) rb_sys_fail(0); + fmode = rb_io_oflags_fmode(oflags); #endif } MakeOpenFile(io, fp); fp->fd = fd; - fp->mode = flags; + fp->mode = fmode; fp->encs = convconfig; clear_codeconv(fp); io_check_tty(fp); @@ -5764,8 +5771,12 @@ rb_gc_mark(p->current_file); rb_gc_mark(p->lineno); rb_gc_mark(p->argv); +<<<<<<< .working rb_gc_mark(p->defin); rb_gc_mark(p->defout); +======= + rb_gc_mark(p->encs.ecopts); +>>>>>>> .merge-right.r19180 } static void @@ -6734,14 +6745,14 @@ if (argc == 2) { fptr->encs.enc2 = rb_to_encoding(v1); fptr->encs.enc = rb_to_encoding(v2); - fptr->encs.flags = rb_econv_prepare_opts(opt, &fptr->encs.ecopts); + fptr->encs.ecflags = rb_econv_prepare_opts(opt, &fptr->encs.ecopts); clear_codeconv(fptr); } else if (argc == 1) { if (NIL_P(v1)) { fptr->encs.enc = NULL; fptr->encs.enc2 = NULL; - fptr->encs.flags = 0; + fptr->encs.ecflags = 0; fptr->encs.ecopts = Qnil; clear_codeconv(fptr); } @@ -6749,12 +6760,12 @@ VALUE tmp = rb_check_string_type(v1); if (!NIL_P(tmp)) { mode_enc(fptr, StringValueCStr(tmp)); - fptr->encs.flags = rb_econv_prepare_opts(opt, &fptr->encs.ecopts); + fptr->encs.ecflags = rb_econv_prepare_opts(opt, &fptr->encs.ecopts); } else { fptr->encs.enc = rb_to_encoding(v1); fptr->encs.enc2 = NULL; - fptr->encs.flags = 0; + fptr->encs.ecflags = 0; fptr->encs.ecopts = Qnil; clear_codeconv(fptr); } @@ -6865,7 +6876,7 @@ open_key_args(int argc, VALUE *argv, struct foreach_arg *arg) { VALUE opt, v; - VALUE mode, perm; + VALUE vmode, vperm; FilePathValue(argv[0]); arg->io = 0; @@ -6892,15 +6903,15 @@ arg->io = rb_io_open_with_args(RARRAY_LEN(args), RARRAY_PTR(args)); return; } - mode = Qnil; - perm = INT2NUM(O_RDONLY); + vmode = Qnil; + vperm = INT2NUM(O_RDONLY); v = rb_hash_aref(opt, sym_mode); if (!NIL_P(v)) - mode = v; + vmode = v; v = rb_hash_aref(opt, sym_perm); if (!NIL_P(v)) - perm = v; - arg->io = rb_io_open(argv[0], mode, perm, opt); + vperm = v; + arg->io = rb_io_open(argv[0], vmode, vperm, opt); } static VALUE @@ -7348,7 +7359,14 @@ VALUE buf = rb_str_buf_new(buflen); long rest = stp->copy_length; off_t off = stp->src_offset; + ID read_method = id_readpartial; + if (stp->src_fd == -1) { + if (!rb_respond_to(stp->src, read_method)) { + read_method = id_read; + } + } + while (1) { long numwrote; long l; @@ -7361,7 +7379,7 @@ l = buflen < rest ? buflen : rest; } if (stp->src_fd == -1) { - rb_funcall(stp->src, id_readpartial, 2, INT2FIX(l), buf); + rb_funcall(stp->src, read_method, 2, INT2FIX(l), buf); } else { ssize_t ss; @@ -7380,6 +7398,9 @@ numwrote = NUM2LONG(n); stp->total += numwrote; rest -= numwrote; + if (read_method == id_read && RSTRING_LEN(buf) == 0) { + break; + } } return Qnil; @@ -7421,13 +7442,13 @@ src_io = rb_check_convert_type(stp->src, T_FILE, "IO", "to_io"); if (NIL_P(src_io)) { VALUE args[2]; - int flags = O_RDONLY; + int oflags = O_RDONLY; #ifdef O_NOCTTY - flags |= O_NOCTTY; + oflags |= O_NOCTTY; #endif FilePathValue(stp->src); args[0] = stp->src; - args[1] = INT2NUM(flags); + args[1] = INT2NUM(oflags); src_io = rb_class_new_instance(2, args, rb_cFile); stp->src = src_io; stp->close_src = 1; @@ -7449,13 +7470,13 @@ dst_io = rb_check_convert_type(stp->dst, T_FILE, "IO", "to_io"); if (NIL_P(dst_io)) { VALUE args[3]; - int flags = O_WRONLY|O_CREAT|O_TRUNC; + int oflags = O_WRONLY|O_CREAT|O_TRUNC; #ifdef O_NOCTTY - flags |= O_NOCTTY; + oflags |= O_NOCTTY; #endif FilePathValue(stp->dst); args[0] = stp->dst; - args[1] = INT2NUM(flags); + args[1] = INT2NUM(oflags); args[2] = INT2FIX(0600); dst_io = rb_class_new_instance(3, args, rb_cFile); stp->dst = dst_io; Index: mvm/pack.c =================================================================== --- mvm/pack.c (revision 19180) +++ mvm/pack.c (revision 19181) @@ -1011,7 +1011,7 @@ static void encodes(VALUE str, const char *s, long len, int type) { - char *buff = ALLOCA_N(char, len * 4 / 3 + 6); + char buff[4096]; long i = 0; const char *trans = type == 'u' ? uu_table : b64_table; int padding; @@ -1024,13 +1024,20 @@ padding = '='; } while (len >= 3) { - buff[i++] = trans[077 & (*s >> 2)]; - buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; - buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; - buff[i++] = trans[077 & s[2]]; - s += 3; - len -= 3; + while (len >= 3 && sizeof(buff)-i >= 4) { + buff[i++] = trans[077 & (*s >> 2)]; + buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; + buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; + buff[i++] = trans[077 & s[2]]; + s += 3; + len -= 3; + } + if (sizeof(buff)-i < 4) { + rb_str_buf_cat(str, buff, i); + i = 0; + } } + if (len == 2) { buff[i++] = trans[077 & (*s >> 2)]; buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; @@ -1656,7 +1663,7 @@ float tmp; memcpy(&tmp, s, sizeof(float)); s += sizeof(float); - UNPACK_PUSH(DOUBLE2NUM((double)tmp)); + UNPACK_PUSH(DBL2NUM((double)tmp)); } PACK_ITEM_ADJUST(); break; @@ -1670,7 +1677,7 @@ memcpy(&tmp, s, sizeof(float)); s += sizeof(float); tmp = VTOHF(tmp,ftmp); - UNPACK_PUSH(DOUBLE2NUM((double)tmp)); + UNPACK_PUSH(DBL2NUM((double)tmp)); } PACK_ITEM_ADJUST(); break; @@ -1684,7 +1691,7 @@ memcpy(&tmp, s, sizeof(double)); s += sizeof(double); tmp = VTOHD(tmp,dtmp); - UNPACK_PUSH(DOUBLE2NUM(tmp)); + UNPACK_PUSH(DBL2NUM(tmp)); } PACK_ITEM_ADJUST(); break; @@ -1696,7 +1703,7 @@ double tmp; memcpy(&tmp, s, sizeof(double)); s += sizeof(double); - UNPACK_PUSH(DOUBLE2NUM(tmp)); + UNPACK_PUSH(DBL2NUM(tmp)); } PACK_ITEM_ADJUST(); break; @@ -1710,7 +1717,7 @@ memcpy(&tmp, s, sizeof(float)); s += sizeof(float); tmp = NTOHF(tmp,ftmp); - UNPACK_PUSH(DOUBLE2NUM((double)tmp)); + UNPACK_PUSH(DBL2NUM((double)tmp)); } PACK_ITEM_ADJUST(); break; @@ -1724,7 +1731,7 @@ memcpy(&tmp, s, sizeof(double)); s += sizeof(double); tmp = NTOHD(tmp,dtmp); - UNPACK_PUSH(DOUBLE2NUM(tmp)); + UNPACK_PUSH(DBL2NUM(tmp)); } PACK_ITEM_ADJUST(); break; Index: mvm/lib/prime.rb =================================================================== --- mvm/lib/prime.rb (revision 19180) +++ mvm/lib/prime.rb (revision 19181) @@ -91,19 +91,6 @@ warn "Prime::new is obsolete. use Prime::instance or class methods of Prime." end - module OldCompatibility - def succ - @generator.succ - end - alias next succ - - def each(&block) - loop do - yield succ - end - end - end - class<<self extend Forwardable include Enumerable @@ -137,6 +124,14 @@ # +ubound+:: # Upper bound of prime numbers. The iterator stops after # yields all prime numbers p <= +ubound+. + # + # == Note + # +Prime+.+new+ returns a object extended by +Prime+::+OldCompatibility+ + # in order to compatibility to Ruby 1.9, and +Prime+#each is overwritten + # by +Prime+::+OldCompatibility+#+each+. + # + # +Prime+.+new+ is now obsolete. Use +Prime+.+instance+.+each+ or simply + # +Prime+.+each+. def each(ubound = nil, generator = EratosthenesGenerator.new, &block) generator.upper_bound = ubound generator.each(&block) @@ -254,18 +249,18 @@ end # Iterates the given block for each prime numbers. - # +ubound+:: def each(&block) return self.dup unless block if @ubound + last_value = nil loop do - p = succ - break if p > @ubound - block.call p + prime = succ + break last_value if prime > @ubound + last_value = block.call(prime) end else loop do - block.call succ + block.call(succ) end end end @@ -351,7 +346,7 @@ - # An implementation of prime table by trial division method. + # Internal use. An implementation of prime table by trial division method. class TrialDivision include Singleton @@ -399,7 +394,7 @@ end end - # An implementation of eratosthenes's sieve + # Internal use. An implementation of eratosthenes's sieve class EratosthenesSieve include Singleton @@ -443,4 +438,24 @@ end end end + + # Provides a +Prime+ object with compatibility to Ruby 1.8 when instanciated via +Prime+.+new+. + module OldCompatibility + # Returns the next prime number and forwards internal pointer. + def succ + @generator.succ + end + alias next succ + + # Overwrites Prime#each. + # + # Iterates the given block over all prime numbers. Note that enumeration starts from + # the current position of internal pointer, not rewinded. + def each(&block) + return @generator.dup unless block_given? + loop do + yield succ + end + end + end end Index: mvm/thread.c =================================================================== --- mvm/thread.c (revision 19180) +++ mvm/thread.c (revision 19181) @@ -909,6 +909,16 @@ RUBY_VM_CHECK_INTS(); } +/* + * Hidden API for tcl/tk wrapper. + * There is no guarantee to perpetuate it. + */ +int +rb_thread_check_trap_pending(void) +{ + return GET_THREAD()->exec_signal != 0; +} + struct timeval rb_time_timeval(); void @@ -940,8 +950,6 @@ } } -int rb_thread_critical; /* TODO: dummy variable */ - VALUE rb_thread_blocking_region( rb_blocking_function_t *func, void *data1, @@ -2564,7 +2572,7 @@ * ThreadError: can't move from the enclosed thread group */ -VALUE +static VALUE thgroup_enclose(VALUE group) { struct thgroup *data; Index: mvm/gc.c =================================================================== --- mvm/gc.c (revision 19180) +++ mvm/gc.c (revision 19181) @@ -2675,7 +2675,7 @@ } #endif -VALUE +static VALUE gc_profile_record_get(void) { VALUE prof; @@ -2689,14 +2689,14 @@ for (i =0; i < objspace->profile.count; i++) { prof = rb_hash_new(); - rb_hash_aset(prof, ID2SYM(rb_intern("GC_TIME")), DOUBLE2NUM(objspace->profile.record[i].gc_time)); - rb_hash_aset(prof, ID2SYM(rb_intern("GC_INVOKE_TIME")), DOUBLE2NUM(objspace->profile.record[i].gc_invoke_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_TIME")), DBL2NUM(objspace->profile.record[i].gc_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_INVOKE_TIME")), DBL2NUM(objspace->profile.record[i].gc_invoke_time)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), rb_uint2inum(objspace->profile.record[i].heap_use_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), rb_uint2inum(objspace->profile.record[i].heap_total_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), rb_uint2inum(objspace->profile.record[i].heap_total_objects)); #if GC_PROFILE_MORE_DETAIL - rb_hash_aset(prof, ID2SYM(rb_intern("GC_MARK_TIME")), DOUBLE2NUM(objspace->profile.record[i].gc_mark_time)); - rb_hash_aset(prof, ID2SYM(rb_intern("GC_SWEEP_TIME")), DOUBLE2NUM(objspace->profile.record[i].gc_sweep_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_MARK_TIME")), DBL2NUM(objspace->profile.record[i].gc_mark_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_SWEEP_TIME")), DBL2NUM(objspace->profile.record[i].gc_sweep_time)); rb_hash_aset(prof, ID2SYM(rb_intern("ALLOCATE_INCREASE")), rb_uint2inum(objspace->profile.record[i].allocate_increase)); rb_hash_aset(prof, ID2SYM(rb_intern("ALLOCATE_LIMIT")), rb_uint2inum(objspace->profile.record[i].allocate_limit)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SLOTS")), rb_uint2inum(objspace->profile.record[i].heap_use_slots)); @@ -2722,14 +2722,15 @@ * 1 0.012 159240 212940 10647 0.00000000000001530000 */ -VALUE +static VALUE gc_profile_result(void) { rb_objspace_t *objspace = &rb_objspace; - VALUE record = gc_profile_record_get(); + VALUE record; VALUE result; int i; + record = gc_profile_record_get(); if (objspace->profile.run && objspace->profile.count) { result = rb_sprintf("GC %d invokes.\n", NUM2INT(gc_count(0))); rb_str_cat2(result, "Index Invoke Time(sec) Use Size(byte) Total Size(byte) Total Object GC Time(ms)\n"); @@ -2773,7 +2774,7 @@ * */ -VALUE +static VALUE gc_profile_report(int argc, VALUE *argv, VALUE self) { VALUE out; Index: mvm/parse.y =================================================================== --- mvm/parse.y (revision 19180) +++ mvm/parse.y (revision 19181) @@ -6920,7 +6920,7 @@ rb_warningS("Float %s out of range", tok()); errno = 0; } - set_yylval_literal(DOUBLE2NUM(d)); + set_yylval_literal(DBL2NUM(d)); return tFLOAT; } set_yylval_literal(rb_cstr_to_inum(tok(), 10, Qfalse)); Index: mvm/process.c =================================================================== --- mvm/process.c (revision 19180) +++ mvm/process.c (revision 19181) @@ -1315,7 +1315,7 @@ if (NIL_P(flags)) flags = INT2NUM(O_RDONLY); else if (TYPE(flags) == T_STRING) - flags = INT2NUM(rb_io_mode_modenum(StringValueCStr(flags))); + flags = INT2NUM(rb_io_modestr_oflags(StringValueCStr(flags))); else flags = rb_to_int(flags); perm = rb_ary_entry(val, 2); @@ -5050,10 +5050,10 @@ times(&buf); return rb_struct_new(rb_cProcessTms, - utime = DOUBLE2NUM(buf.tms_utime / hertz), - stime = DOUBLE2NUM(buf.tms_stime / hertz), - cutime = DOUBLE2NUM(buf.tms_cutime / hertz), - sctime = DOUBLE2NUM(buf.tms_cstime / hertz)); + utime = DBL2NUM(buf.tms_utime / hertz), + stime = DBL2NUM(buf.tms_stime / hertz), + cutime = DBL2NUM(buf.tms_cutime / hertz), + sctime = DBL2NUM(buf.tms_cstime / hertz)); #else rb_notimplement(); #endif Index: mvm/ext/bigdecimal/bigdecimal.c =================================================================== --- mvm/ext/bigdecimal/bigdecimal.c (revision 19180) +++ mvm/ext/bigdecimal/bigdecimal.c (revision 19181) @@ -412,11 +412,17 @@ VpSetException((unsigned short)((val==Qtrue)?(fo|VP_EXCEPTION_INFINITY): (fo&(~VP_EXCEPTION_INFINITY)))); } + fo = VpGetException(); if(f&VP_EXCEPTION_NaN) { VpSetException((unsigned short)((val==Qtrue)?(fo|VP_EXCEPTION_NaN): (fo&(~VP_EXCEPTION_NaN)))); } fo = VpGetException(); + if(f&VP_EXCEPTION_UNDERFLOW) { + VpSetException((unsigned short)((val==Qtrue)?(fo|VP_EXCEPTION_UNDERFLOW): + (fo&(~VP_EXCEPTION_UNDERFLOW)))); + } + fo = VpGetException(); return INT2FIX(fo); } if(VP_ROUND_MODE==f) { Index: mvm/ext/zlib/zlib.c =================================================================== --- mvm/ext/zlib/zlib.c (revision 19180) +++ mvm/ext/zlib/zlib.c (revision 19181) @@ -2294,7 +2294,7 @@ } filename = argv[0]; FilePathValue(filename); - io = rb_file_open(RSTRING_PTR(filename), mode); + io = rb_file_open_str(filename, mode); argv[0] = io; return rb_gzfile_s_wrap(argc, argv, klass); Index: mvm/ext/pty/depend =================================================================== --- mvm/ext/pty/depend (revision 19180) +++ mvm/ext/pty/depend (revision 19181) @@ -1 +1 @@ -pty.o: pty.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h +pty.o: pty.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h $(hdrdir)/io.h Index: mvm/ext/tk/tcltklib.c =================================================================== --- mvm/ext/tk/tcltklib.c (revision 19180) +++ mvm/ext/tk/tcltklib.c (revision 19181) @@ -8,11 +8,6 @@ #include "ruby.h" -#ifdef HAVE_RUBY_SIGNAL_H -#include "ruby/signal.h" -#else -#include "rubysig.h" -#endif #ifdef HAVE_RUBY_ENCODING_H #include "ruby/encoding.h" #endif @@ -54,6 +49,9 @@ #define TCL_FINAL_RELEASE 2 #endif +static VALUE rb_thread_critical; /* dummy */ +int rb_thread_check_trap_pending(); + static struct { int major; int minor; @@ -1623,7 +1621,29 @@ } #endif +#define TRAP_CHECK() do { \ + if (trap_check(check_var) == 0) return 0; \ +} while (0) + static int +trap_check(int *check_var) +{ + DUMP1("trap check"); + + if (rb_thread_check_trap_pending()) { + if (check_var != (int*)NULL) { + /* wait command */ + return 0; + } + else { + rb_thread_check_ints(); + } + } + + return 1; +} + +static int lib_eventloop_core(check_root, update_flag, check_var, interp) int check_root; int update_flag; @@ -1755,28 +1775,12 @@ } } - DUMP1("trap check"); - if (rb_trap_pending) { - run_timer_flag = 0; - if (rb_prohibit_interrupt || check_var != (int*)NULL) { - /* pending or on wait command */ - return 0; - } else { - rb_trap_exec(); - } - } + TRAP_CHECK(); - DUMP1("check Root Widget"); + DUMP1("check Root Widget"); if (check_root && tk_stubs_init_p() && Tk_GetNumMainWindows() == 0) { run_timer_flag = 0; - if (rb_trap_pending) { - if (rb_prohibit_interrupt || check_var != (int*)NULL) { - /* pending or on wait command */ - return 0; - } else { - rb_trap_exec(); - } - } + TRAP_CHECK(); return 1; } @@ -1886,16 +1890,7 @@ return 0; } - DUMP1("trap check"); - if (rb_trap_pending) { - run_timer_flag = 0; - if (rb_prohibit_interrupt || check_var != (int*)NULL) { - /* pending or on wait command */ - return 0; - } else { - rb_trap_exec(); - } - } + TRAP_CHECK(); if (check_var != (int*)NULL && !NIL_P(rbtk_pending_exception)) { @@ -1966,28 +1961,12 @@ return 1; } - DUMP1("trap check"); - if (rb_trap_pending) { - run_timer_flag = 0; - if (rb_prohibit_interrupt || check_var != (int*)NULL) { - /* pending or on wait command */ - return 0; - } else { - rb_trap_exec(); - } - } + TRAP_CHECK(); DUMP1("check Root Widget"); if (check_root && tk_stubs_init_p() && Tk_GetNumMainWindows() == 0) { run_timer_flag = 0; - if (rb_trap_pending) { - if (rb_prohibit_interrupt || check_var != (int*)NULL) { - /* pending or on wait command */ - return 0; - } else { - rb_trap_exec(); - } - } + TRAP_CHECK(); return 1; } @@ -2823,7 +2802,6 @@ VALUE (*proc)(); VALUE data; { - int old_trapflag = rb_trap_immediate; int code; #ifdef HAVE_NATIVETHREAD @@ -2834,10 +2812,7 @@ #endif #endif - rb_trap_immediate = 0; code = tcl_protect_core(interp, proc, data); - rb_trap_immediate = old_trapflag; - return code; } @@ -3404,7 +3379,7 @@ } /* trap check */ - if (rb_trap_pending) { + if (rb_thread_check_trap_pending()) { Tcl_Release(interp); return TCL_RETURN; @@ -3770,7 +3745,7 @@ } /* trap check */ - if (rb_trap_pending) { + if (rb_thread_check_trap_pending()) { #if TCL_MAJOR_VERSION >= 8 Tcl_DecrRefCount(objv[1]); #endif @@ -4059,7 +4034,7 @@ } /* trap check */ - if (rb_trap_pending) { + if (rb_thread_check_trap_pending()) { Tcl_Release(interp); return TCL_RETURN; @@ -4119,7 +4094,7 @@ } /* trap check */ - if (rb_trap_pending) { + if (rb_thread_check_trap_pending()) { #if TCL_MAJOR_VERSION >= 8 Tcl_DecrRefCount(objv[2]); #endif @@ -4214,7 +4189,7 @@ } /* trap check */ - if (rb_trap_pending) { + if (rb_thread_check_trap_pending()) { Tcl_Release(interp); return TCL_RETURN; Index: mvm/ext/tk/tkutil/tkutil.c =================================================================== --- mvm/ext/tk/tkutil/tkutil.c (revision 19180) +++ mvm/ext/tk/tkutil/tkutil.c (revision 19181) @@ -11,11 +11,6 @@ #include "ruby.h" -#ifdef HAVE_RUBY_SIGNAL_H -#include "ruby/signal.h" -#else -#include "rubysig.h" -#endif #ifdef HAVE_RUBY_ST_H #include "ruby/st.h" #else @@ -895,15 +890,12 @@ { int idx, size; volatile VALUE dst; - int thr_crit_bup; VALUE old_gc; if (argc < 2) { rb_raise(rb_eArgError, "too few arguments"); } - thr_crit_bup = rb_thread_critical; - rb_thread_critical = Qtrue; old_gc = rb_gc_disable(); for(size = 0, idx = 2; idx < argc; idx++) { @@ -928,7 +920,6 @@ } if (old_gc == Qfalse) rb_gc_enable(); - rb_thread_critical = thr_crit_bup; return rb_ary_plus(argv[0], dst); } @@ -1599,12 +1590,8 @@ unsigned char type_chr; volatile VALUE dst = rb_ary_new2(vallen); volatile VALUE proc; - int thr_crit_bup; VALUE old_gc; - thr_crit_bup = rb_thread_critical; - rb_thread_critical = Qtrue; - old_gc = rb_gc_disable(); Data_Get_Struct(rb_const_get(self, ID_SUBST_INFO), @@ -1632,7 +1619,6 @@ } if (old_gc == Qfalse) rb_gc_enable(); - rb_thread_critical = thr_crit_bup; return dst; } Index: mvm/ext/iconv/iconv.c =================================================================== --- mvm/ext/iconv/iconv.c (revision 19180) +++ mvm/ext/iconv/iconv.c (revision 19181) @@ -135,6 +135,21 @@ return charset_map; } +static VALUE +strip_glibc_option(VALUE *code) +{ + VALUE val = *code; + const char *ptr = RSTRING_PTR(val), *pend = RSTRING_END(val); + const char *slash = memchr(ptr, '/', pend - ptr); + if (slash && slash < pend - 1 && slash[1] == '/') { + VALUE opt = rb_str_subseq(val, slash - ptr, pend - slash); + val = rb_str_subseq(val, 0, slash - ptr); + *code = val; + return opt; + } + return 0; +} + static char * map_charset(VALUE *code) { @@ -153,29 +168,53 @@ static iconv_t iconv_create(VALUE to, VALUE from, struct rb_iconv_opt_t *opt, int *idx) { + VALUE toopt = strip_glibc_option(&to); + VALUE fromopt = strip_glibc_option(&from); + VALUE toenc = 0, fromenc = 0; const char* tocode = map_charset(&to); const char* fromcode = map_charset(&from); iconv_t cd; + int retry = 0; - if ((*idx = rb_enc_find_index(tocode)) < 0) { - const char *slash = strchr(tocode, '/'); - if (slash && slash[1] == '/') { - VALUE tmp = rb_str_new(tocode, slash - tocode); - *idx = rb_enc_find_index(RSTRING_PTR(tmp)); - } + *idx = rb_enc_find_index(tocode); + + if (toopt) { + toenc = rb_str_plus(to, toopt); + tocode = RSTRING_PTR(toenc); } - - cd = iconv_open(tocode, fromcode); - if (cd == (iconv_t)-1) { + if (fromopt) { + fromenc = rb_str_plus(from, fromopt); + fromcode = RSTRING_PTR(fromenc); + } + while ((cd = iconv_open(tocode, fromcode)) == (iconv_t)-1) { + int inval = 0; switch (errno) { case EMFILE: case ENFILE: case ENOMEM: - rb_gc(); - cd = iconv_open(tocode, fromcode); + if (!retry++) { + rb_gc(); + continue; + } + break; + case EINVAL: + retry = 0; + inval = 1; + if (toenc) { + tocode = RSTRING_PTR(to); + rb_str_resize(toenc, 0); + toenc = 0; + continue; + } + if (fromenc) { + fromcode = RSTRING_PTR(from); + rb_str_resize(fromenc, 0); + fromenc = 0; + continue; + } + break; } - if (cd == (iconv_t)-1) { - int inval = errno == EINVAL; + { const char *s = inval ? "invalid encoding " : "iconv"; volatile VALUE msg = rb_str_new(0, strlen(s) + RSTRING_LEN(to) + RSTRING_LEN(from) + 8); @@ -190,10 +229,25 @@ } } + if (toopt || fromopt) { + if (toopt && fromopt && RTEST(rb_str_equal(toopt, fromopt))) { + fromopt = 0; + } + if (toopt && fromopt) { + rb_warning("encoding option isn't portable: %s, %s", + RSTRING_PTR(toopt) + 2, RSTRING_PTR(fromopt) + 2); + } + else { + rb_warning("encoding option isn't portable: %s", + (toopt ? RSTRING_PTR(toopt) : RSTRING_PTR(fromopt)) + 2); + } + } + if (opt) { #ifdef ICONV_SET_TRANSLITERATE if (opt->transliterate != Qundef) { int flag = RTEST(opt->transliterate); + rb_warning("encoding option isn't portable: transliterate"); if (iconvctl(cd, ICONV_SET_TRANSLITERATE, (void *)&flag)) rb_sys_fail("ICONV_SET_TRANSLITERATE"); } @@ -201,6 +255,7 @@ #ifdef ICONV_SET_DISCARD_ILSEQ if (opt->discard_ilseq != Qundef) { int flag = RTEST(opt->discard_ilseq); + rb_warning("encoding option isn't portable: discard_ilseq"); if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&flag)) rb_sys_fail("ICONV_SET_DISCARD_ILSEQ"); } Index: mvm/.merged-trunk-revision =================================================================== --- mvm/.merged-trunk-revision (revision 19180) +++ mvm/.merged-trunk-revision (revision 19181) @@ -1 +1 @@ -19119 +19180 Index: mvm/numeric.c =================================================================== --- mvm/numeric.c (revision 19180) +++ mvm/numeric.c (revision 19181) @@ -554,7 +554,7 @@ static VALUE flo_uminus(VALUE flt) { - return DOUBLE2NUM(-RFLOAT_VALUE(flt)); + return DBL2NUM(-RFLOAT_VALUE(flt)); } /* @@ -570,11 +570,11 @@ { switch (TYPE(y)) { case T_FIXNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) + (double)FIX2LONG(y)); + return DBL2NUM(RFLOAT_VALUE(x) + (double)FIX2LONG(y)); case T_BIGNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) + rb_big2dbl(y)); + return DBL2NUM(RFLOAT_VALUE(x) + rb_big2dbl(y)); case T_FLOAT: - return DOUBLE2NUM(RFLOAT_VALUE(x) + RFLOAT_VALUE(y)); + return DBL2NUM(RFLOAT_VALUE(x) + RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '+'); } @@ -593,11 +593,11 @@ { switch (TYPE(y)) { case T_FIXNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) - (double)FIX2LONG(y)); + return DBL2NUM(RFLOAT_VALUE(x) - (double)FIX2LONG(y)); case T_BIGNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) - rb_big2dbl(y)); + return DBL2NUM(RFLOAT_VALUE(x) - rb_big2dbl(y)); case T_FLOAT: - return DOUBLE2NUM(RFLOAT_VALUE(x) - RFLOAT_VALUE(y)); + return DBL2NUM(RFLOAT_VALUE(x) - RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '-'); } @@ -616,11 +616,11 @@ { switch (TYPE(y)) { case T_FIXNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) * (double)FIX2LONG(y)); + return DBL2NUM(RFLOAT_VALUE(x) * (double)FIX2LONG(y)); case T_BIGNUM: - return DOUBLE2NUM(RFLOAT_VALUE(x) * rb_big2dbl(y)); + return DBL2NUM(RFLOAT_VALUE(x) * rb_big2dbl(y)); case T_FLOAT: - return DOUBLE2NUM(RFLOAT_VALUE(x) * RFLOAT_VALUE(y)); + return DBL2NUM(RFLOAT_VALUE(x) * RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '*'); } @@ -643,12 +643,12 @@ switch (TYPE(y)) { case T_FIXNUM: f_y = FIX2LONG(y); - return DOUBLE2NUM(RFLOAT_VALUE(x) / (double)f_y); + return DBL2NUM(RFLOAT_VALUE(x) / (double)f_y); case T_BIGNUM: d = rb_big2dbl(y); - return DOUBLE2NUM(RFLOAT_VALUE(x) / d); + return DBL2NUM(RFLOAT_VALUE(x) / d); case T_FLOAT: - return DOUBLE2NUM(RFLOAT_VALUE(x) / RFLOAT_VALUE(y)); + return DBL2NUM(RFLOAT_VALUE(x) / RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '/'); } @@ -718,7 +718,7 @@ return rb_num_coerce_bin(x, y, '%'); } flodivmod(RFLOAT_VALUE(x), fy, 0, &mod); - return DOUBLE2NUM(mod); + return DBL2NUM(mod); } static VALUE @@ -765,7 +765,7 @@ } flodivmod(RFLOAT_VALUE(x), fy, &div, &mod); a = dbl2ival(div); - b = DOUBLE2NUM(mod); + b = DBL2NUM(mod); return rb_assoc_new(a, b); } @@ -782,11 +782,11 @@ { switch (TYPE(y)) { case T_FIXNUM: - return DOUBLE2NUM(pow(RFLOAT_VALUE(x), (double)FIX2LONG(y))); + return DBL2NUM(pow(RFLOAT_VALUE(x), (double)FIX2LONG(y))); case T_BIGNUM: - return DOUBLE2NUM(pow(RFLOAT_VALUE(x), rb_big2dbl(y))); + return DBL2NUM(pow(RFLOAT_VALUE(x), rb_big2dbl(y))); case T_FLOAT: - return DOUBLE2NUM(pow(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); + return DBL2NUM(pow(RFLOAT_VALUE(x), RFLOAT_VALUE(y))); default: return rb_num_coerce_bin(x, y, rb_intern("**")); } @@ -1122,7 +1122,7 @@ flo_abs(VALUE flt) { double val = fabs(RFLOAT_VALUE(flt)); - return DOUBLE2NUM(val); + return DBL2NUM(val); } /* @@ -1304,7 +1304,7 @@ else number /= f; } - if (ndigits > 0) return DOUBLE2NUM(number); + if (ndigits > 0) return DBL2NUM(number); if (!FIXABLE(number)) { return rb_dbl2big(number); @@ -1493,7 +1493,7 @@ if (err>0.5) err=0.5; n = floor(n + err) + 1; for (i=0; i<n; i++) { - rb_yield(DOUBLE2NUM(i*unit+beg)); + rb_yield(DBL2NUM(i*unit+beg)); } } else { @@ -2098,7 +2098,7 @@ case T_BIGNUM: return rb_big_plus(y, x); case T_FLOAT: - return DOUBLE2NUM((double)FIX2LONG(x) + RFLOAT_VALUE(y)); + return DBL2NUM((double)FIX2LONG(x) + RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '+'); } @@ -2132,7 +2132,7 @@ x = rb_int2big(FIX2LONG(x)); return rb_big_minus(x, y); case T_FLOAT: - return DOUBLE2NUM((double)FIX2LONG(x) - RFLOAT_VALUE(y)); + return DBL2NUM((double)FIX2LONG(x) - RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '-'); } @@ -2191,7 +2191,7 @@ case T_BIGNUM: return rb_big_mul(y, x); case T_FLOAT: - return DOUBLE2NUM((double)FIX2LONG(x) * RFLOAT_VALUE(y)); + return DBL2NUM((double)FIX2LONG(x) * RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '*'); } @@ -2240,13 +2240,13 @@ fix_fdiv(VALUE x, VALUE y) { if (FIXNUM_P(y)) { - return DOUBLE2NUM((double)FIX2LONG(x) / (double)FIX2LONG(y)); + return DBL2NUM((double)FIX2LONG(x) / (double)FIX2LONG(y)); } switch (TYPE(y)) { case T_BIGNUM: - return DOUBLE2NUM((double)FIX2LONG(x) / rb_big2dbl(y)); + return DBL2NUM((double)FIX2LONG(x) / rb_big2dbl(y)); case T_FLOAT: - return DOUBLE2NUM((double)FIX2LONG(x) / RFLOAT_VALUE(y)); + return DBL2NUM((double)FIX2LONG(x) / RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, rb_intern("fdiv")); } @@ -2271,7 +2271,7 @@ if (op == '/') { div = (double)FIX2LONG(x) / RFLOAT_VALUE(y); - return DOUBLE2NUM(div); + return DBL2NUM(div); } else { if (RFLOAT_VALUE(y) == 0) rb_num_zerodiv(); @@ -2339,7 +2339,7 @@ double mod; flodivmod((double)FIX2LONG(x), RFLOAT_VALUE(y), 0, &mod); - return DOUBLE2NUM(mod); + return DBL2NUM(mod); } default: return rb_num_coerce_bin(x, y, '%'); @@ -2373,7 +2373,7 @@ flodivmod((double)FIX2LONG(x), RFLOAT_VALUE(y), &div, &mod); a = dbl2ival(div); - b = DOUBLE2NUM(mod); + b = DBL2NUM(mod); return rb_assoc_new(a, b); } default: @@ -2445,7 +2445,7 @@ if (b == 1) return x; if (a == 0) { if (b > 0) return INT2FIX(0); - return DOUBLE2NUM(1.0 / zero); + return DBL2NUM(1.0 / zero); } if (a == 1) return INT2FIX(1); if (a == -1) { @@ -2471,12 +2471,12 @@ x = rb_int2big(FIX2LONG(x)); return rb_big_pow(x, y); case T_FLOAT: - if (RFLOAT_VALUE(y) == 0.0) return DOUBLE2NUM(1.0); + if (RFLOAT_VALUE(y) == 0.0) return DBL2NUM(1.0); if (a == 0) { - return DOUBLE2NUM(RFLOAT_VALUE(y) < 0 ? (1.0 / zero) : 0.0); + return DBL2NUM(RFLOAT_VALUE(y) < 0 ? (1.0 / zero) : 0.0); } - if (a == 1) return DOUBLE2NUM(1.0); - return DOUBLE2NUM(pow((double)a, RFLOAT_VALUE(y))); + if (a == 1) return DBL2NUM(1.0); + return DBL2NUM(pow((double)a, RFLOAT_VALUE(y))); default: return rb_num_coerce_bin(x, y, rb_intern("**")); } @@ -2846,7 +2846,7 @@ val = (double)FIX2LONG(num); - return DOUBLE2NUM(val); + return DBL2NUM(val); } /* @@ -3237,9 +3237,9 @@ rb_define_const(rb_cFloat, "MAX_EXP", INT2FIX(DBL_MAX_EXP)); rb_define_const(rb_cFloat, "MIN_10_EXP", INT2FIX(DBL_MIN_10_EXP)); rb_define_const(rb_cFloat, "MAX_10_EXP", INT2FIX(DBL_MAX_10_EXP)); - rb_define_const(rb_cFloat, "MIN", DOUBLE2NUM(DBL_MIN)); - rb_define_const(rb_cFloat, "MAX", DOUBLE2NUM(DBL_MAX)); - rb_define_const(rb_cFloat, "EPSILON", DOUBLE2NUM(DBL_EPSILON)); + rb_define_const(rb_cFloat, "MIN", DBL2NUM(DBL_MIN)); + rb_define_const(rb_cFloat, "MAX", DBL2NUM(DBL_MAX)); + rb_define_const(rb_cFloat, "EPSILON", DBL2NUM(DBL_EPSILON)); rb_define_method(rb_cFloat, "to_s", flo_to_s, 0); rb_define_method(rb_cFloat, "coerce", flo_coerce, 1); Index: mvm/vm.c =================================================================== --- mvm/vm.c (revision 19180) +++ mvm/vm.c (revision 19181) @@ -1418,8 +1418,8 @@ #if USE_THREAD_DATA_RECYCLE #define RECYCLE_MAX 64 -VALUE *thread_recycle_stack_slot[RECYCLE_MAX]; -int thread_recycle_stack_count = 0; +static VALUE *thread_recycle_stack_slot[RECYCLE_MAX]; +static int thread_recycle_stack_count = 0; static VALUE * thread_recycle_stack(int size) Index: mvm/version.h =================================================================== --- mvm/version.h (revision 19180) +++ mvm/version.h (revision 19181) @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-09-04" +#define RUBY_RELEASE_DATE "2008-09-06" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080904 +#define RUBY_RELEASE_CODE 20080906 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 9 -#define RUBY_RELEASE_DAY 4 +#define RUBY_RELEASE_DAY 6 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; Index: mvm/vm_dump.c =================================================================== --- mvm/vm_dump.c (revision 19180) +++ mvm/vm_dump.c (revision 19181) @@ -184,7 +184,7 @@ } void -env_dump_raw(rb_env_t *env, VALUE *lfp, VALUE *dfp) +vm_env_dump_raw(rb_env_t *env, VALUE *lfp, VALUE *dfp) { int i; fprintf(stderr, "-- env --------------------\n"); @@ -212,7 +212,7 @@ } void -proc_dump_raw(rb_proc_t *proc) +vm_proc_dump_raw(rb_proc_t *proc) { rb_env_t *env; char *selfstr; @@ -222,11 +222,11 @@ fprintf(stderr, "-- proc -------------------\n"); fprintf(stderr, "self: %s\n", selfstr); GetEnvPtr(proc->envval, env); - env_dump_raw(env, proc->block.lfp, proc->block.dfp); + vm_env_dump_raw(env, proc->block.lfp, proc->block.dfp); } void -stack_dump_th(VALUE thval) +vm_stack_dump_th(VALUE thval) { rb_thread_t *th; GetThreadPtr(thval, th); @@ -234,7 +234,7 @@ } void -stack_dump_each(rb_thread_t *th, rb_control_frame_t *cfp) +vm_stack_dump_each(rb_thread_t *th, rb_control_frame_t *cfp) { int i; @@ -283,7 +283,7 @@ VALUE *ptr = dfp - local_size; - stack_dump_each(th, cfp + 1); + vm_stack_dump_each(th, cfp + 1); control_frame_dump(th, cfp); if (lfp != dfp) { @@ -314,7 +314,7 @@ } else if (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_FINISH) { if ((th)->stack + (th)->stack_size > (VALUE *)(cfp + 2)) { - stack_dump_each(th, cfp + 1); + vm_stack_dump_each(th, cfp + 1); } else { /* SDR(); */ @@ -327,7 +327,7 @@ void -debug_print_register(rb_thread_t *th) +vm_debug_print_register(rb_thread_t *th) { rb_control_frame_t *cfp = th->cfp; int pc = -1; @@ -350,15 +350,15 @@ } void -thread_dump_regs(VALUE thval) +vm_thread_dump_regs(VALUE thval) { rb_thread_t *th; GetThreadPtr(thval, th); - debug_print_register(th); + vm_debug_print_register(th); } void -debug_print_pre(rb_thread_t *th, rb_control_frame_t *cfp) +vm_debug_print_pre(rb_thread_t *th, rb_control_frame_t *cfp) { rb_iseq_t *iseq = cfp->iseq; @@ -372,12 +372,12 @@ #if VMDEBUG > 3 fprintf(stderr, " (1)"); - debug_print_register(th); + vm_debug_print_register(th); #endif } void -debug_print_post(rb_thread_t *th, rb_control_frame_t *cfp +vm_debug_print_post(rb_thread_t *th, rb_control_frame_t *cfp #if OPT_STACK_CACHING , VALUE reg_a, VALUE reg_b #endif @@ -389,13 +389,13 @@ #if VMDEBUG > 3 fprintf(stderr, " (2)"); - debug_print_register(th); + vm_debug_print_register(th); #endif /* stack_dump_raw(th, cfp); */ #if VMDEBUG > 2 /* stack_dump_thobj(th); */ - stack_dump_each(th, th->cfp); + vm_stack_dump_each(th, th->cfp); #if OPT_STACK_CACHING { VALUE rstr; @@ -550,9 +550,8 @@ #endif - VALUE -thread_dump_state(VALUE self) +vm_thread_dump_state(VALUE self) { rb_thread_t *th; rb_control_frame_t *cfp; Index: mvm/transcode_data.h =================================================================== --- mvm/transcode_data.h (revision 19180) +++ mvm/transcode_data.h (revision 19181) @@ -20,10 +20,7 @@ #define BYTE_LOOKUP_BASE(bl) ((bl)[0]) #define BYTE_LOOKUP_INFO(bl) ((bl)[1]) -#ifndef PType -/* data file needs to treat this as a pointer, to remove warnings */ #define PType (unsigned int) -#endif #define NOMAP (PType 0x01) /* single byte direct map */ #define ONEbt (0x02) /* one byte payload */ @@ -66,8 +63,8 @@ /* static structure, one per supported encoding pair */ struct rb_transcoder { - const char *from_encoding; - const char *to_encoding; + const char *src_encoding; + const char *dst_encoding; unsigned int conv_tree_start; const unsigned char *byte_array; unsigned int byte_array_length; Index: mvm/bignum.c =================================================================== --- mvm/bignum.c (revision 19180) +++ mvm/bignum.c (revision 19181) @@ -1244,7 +1244,7 @@ static VALUE rb_big_to_f(VALUE x) { - return DOUBLE2NUM(rb_big2dbl(x)); + return DBL2NUM(rb_big2dbl(x)); } /* @@ -1506,7 +1506,7 @@ return bignorm(bigadd(x, y, 1)); case T_FLOAT: - return DOUBLE2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y)); + return DBL2NUM(rb_big2dbl(x) + RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '+'); @@ -1531,7 +1531,7 @@ return bignorm(bigadd(x, y, 0)); case T_FLOAT: - return DOUBLE2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y)); + return DBL2NUM(rb_big2dbl(x) - RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '-'); @@ -1595,7 +1595,7 @@ break; case T_FLOAT: - return DOUBLE2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y)); + return DBL2NUM(rb_big2dbl(x) * RFLOAT_VALUE(y)); default: return rb_num_coerce_bin(x, y, '*'); @@ -1640,19 +1640,20 @@ { struct big_div_struct *bds = (struct big_div_struct*)ptr; long nx = bds->nx, ny = bds->ny; - long i, j; + long i, j, nyzero; BDIGIT *yds = bds->yds, *zds = bds->zds; BDIGIT_DBL t2; BDIGIT_DBL_SIGNED num; BDIGIT q; j = nx==ny?nx+1:nx; + for (nyzero = 0; !yds[nyzero]; nyzero++); do { if (bds->stop) return Qnil; if (zds[j] == yds[ny-1]) q = BIGRAD-1; else q = (BDIGIT)((BIGUP(zds[j]) + zds[j-1])/yds[ny-1]); if (q) { - i = 0; num = 0; t2 = 0; + i = nyzero; num = 0; t2 = 0; do { /* multiply and subtract */ BDIGIT_DBL ee; t2 += (BDIGIT_DBL)yds[i] * q; @@ -1824,7 +1825,7 @@ { double div = rb_big2dbl(x) / RFLOAT_VALUE(y); if (op == '/') { - return DOUBLE2NUM(div); + return DBL2NUM(div); } else { return rb_dbl2big(div); @@ -2017,7 +2018,7 @@ if (ey) y = big_shift(y, ey); bignum: bigdivrem(x, y, &z, 0); - return DOUBLE2NUM(ldexp(big2dbl(z), ex - ey)); + return DBL2NUM(ldexp(big2dbl(z), ex - ey)); } case T_FLOAT: if (isnan(RFLOAT_VALUE(y))) return y; @@ -2042,7 +2043,7 @@ default: return rb_num_coerce_bin(x, y, rb_intern("fdiv")); } - return DOUBLE2NUM(dx / dy); + return DBL2NUM(dx / dy); } static VALUE @@ -2155,7 +2156,7 @@ default: return rb_num_coerce_bin(x, y, rb_intern("**")); } - return DOUBLE2NUM(pow(rb_big2dbl(x), d)); + return DBL2NUM(pow(rb_big2dbl(x), d)); } static VALUE Index: mvm/marshal.c =================================================================== --- mvm/marshal.c (revision 19180) +++ mvm/marshal.c (revision 19181) @@ -1308,7 +1308,7 @@ d = strtod(ptr, &e); d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr)); } - v = DOUBLE2NUM(d); + v = DBL2NUM(d); v = r_entry(v, arg); v = r_leave(v, arg); } Index: mvm/tool/transcode-tblgen.rb =================================================================== --- mvm/tool/transcode-tblgen.rb (revision 19180) +++ mvm/tool/transcode-tblgen.rb (revision 19181) @@ -36,11 +36,11 @@ seq = $' set_result = [] set.scan(/[^,]+/) {|range| - if /\A([0-9a-f][0-9a-f])-([0-9a-f][0-9a-f])\z/ =~ range + if /\A([0-9a-f][0-9a-f])-([0-9a-f][0-9a-f])\z/i =~ range b = $1.to_i(16) e = $2.to_i(16) set_result << (b..e) - elsif /\A([0-9a-f][0-9a-f])\z/ =~ range + elsif /\A([0-9a-f][0-9a-f])\z/i =~ range byte = $1.to_i(16) set_result << (byte..byte) else Index: mvm/test/ruby/test_transcode.rb =================================================================== --- mvm/test/ruby/test_transcode.rb (revision 19180) +++ mvm/test/ruby/test_transcode.rb (revision 19181) @@ -247,23 +247,23 @@ def test_invalid_ignore # arguments only - assert_nothing_raised { 'abc'.encode('utf-8', invalid: :ignore) } + assert_nothing_raised { 'abc'.encode('utf-8', invalid: :replace, replace: "") } # check handling of UTF-8 ill-formed subsequences assert_equal("\x00\x41\x00\x3E\x00\x42".force_encoding('UTF-16BE'), - "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'), - "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\x00\x42".force_encoding('UTF-16BE'), - "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal(''.force_encoding('UTF-16BE'), - "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"), - "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"), - "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"), - "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) end def test_invalid_replace @@ -538,4 +538,42 @@ # assert_equal("\x00\x61\xFF\xFD\xFF\xFD\xFF\xFD\xFF\xFD\xFF\xFD\xFF\xFD\x00\x62".force_encoding('UTF-16BE'), # "\x61\xF1\x80\x80\xE1\x80\xC2\x62".encode('UTF-16BE', 'UTF-8', invalid: :replace)) # option 3 end + + def test_yen_sign + check_both_ways("\u005C", "\x5C", "Shift_JIS") + check_both_ways("\u005C", "\x5C", "Windows-31J") + check_both_ways("\u005C", "\x5C", "EUC-JP") + check_both_ways("\u005C", "\x5C", "eucJP-ms") + check_both_ways("\u005C", "\x5C", "CP51932") + check_both_ways("\u005C", "\x5C", "ISO-2022-JP") + assert_equal("\u005C", "\e(J\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) + assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Shift_JIS") } + assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Windows-31J") } + assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("EUC-JP") } + assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("eucJP-ms") } + assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("CP51932") } + + # FULLWIDTH REVERSE SOLIDUS + check_both_ways("\uFF3C", "\x81\x5F", "Shift_JIS") + check_both_ways("\uFF3C", "\x81\x5F", "Windows-31J") + check_both_ways("\uFF3C", "\xA1\xC0", "EUC-JP") + check_both_ways("\uFF3C", "\xA1\xC0", "eucJP-ms") + check_both_ways("\uFF3C", "\xA1\xC0", "CP51932") + end + + def test_tilde_overline + check_both_ways("\u007E", "\x7E", "Shift_JIS") + check_both_ways("\u007E", "\x7E", "Windows-31J") + check_both_ways("\u007E", "\x7E", "EUC-JP") + check_both_ways("\u007E", "\x7E", "eucJP-ms") + check_both_ways("\u007E", "\x7E", "CP51932") + check_both_ways("\u007E", "\x7E", "ISO-2022-JP") + assert_equal("\u007E", "\e(J\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) + assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Shift_JIS") } + assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Windows-31J") } + assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("EUC-JP") } + assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("eucJP-ms") } + assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("CP51932") } + end + end Index: mvm/test/ruby/test_io_m17n.rb =================================================================== --- mvm/test/ruby/test_io_m17n.rb (revision 19180) +++ mvm/test/ruby/test_io_m17n.rb (revision 19181) @@ -853,6 +853,7 @@ def test_read_stateful with_pipe("euc-jp:iso-2022-jp") {|r, w| + r.binmode w << "\xA4\xA2" w.close assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read) @@ -1227,24 +1228,74 @@ def test_textmode_read_ascii_incompat_internal with_tmpdir { + # ascii incompatible internal encoding needs binmode. + assert_raise(ArgumentError) { + open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf16.crlf", "rt:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf16.crlf", "r:utf-16be") {|f| } + } + } + end + + def test_binmode_read_ascii_incompat_internal + with_tmpdir { generate_file("t.utf8.crlf", "a\r\nb\r\n") - open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| + generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n") + # ascii incompatible internal encoding needs binmode. + open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f| content = f.read - # textmode doesn't affect for ascii incompatible internal encoding. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content) } + open("t.utf16.crlf", "rb:utf-16be") {|f| + content = f.read + assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), + content) + } } end def test_textmode_write_ascii_incompat_internal with_tmpdir { - open("t.utf8.lf", "wt:utf-8:utf-16be") {|f| + # ascii incompatible internal encoding needs binmode. + assert_raise(ArgumentError) { + open("t.utf8", "wt:utf-8:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf8", "w:utf-8:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf8", "w:utf-8:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf16", "wt:utf-16be") {|f| } + } + assert_raise(ArgumentError) { + open("t.utf16", "w:utf-16be") {|f| } + } + } + end + + def test_binmode_write_ascii_incompat_internal + with_tmpdir { + open("t.utf8.lf", "wb:utf-8:utf-16be") {|f| f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE") } content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit") - # textmode doesn't affect for ascii incompatible internal encoding. assert_equal("a\nb\n", content) + + open("t.utf8.lf", "wb:utf-16be") {|f| + f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE") + } + content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit") + assert_equal("\0a\0\n\0b\0\n", content) } end @@ -1312,14 +1363,14 @@ open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| assert_equal("a?b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| assert_equal("ab", f.read) } open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| assert_raise(Encoding::InvalidByteSequence) { f.read } assert_equal("b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| assert_raise(Encoding::InvalidByteSequence) { f.read } assert_equal("b", f.read) } @@ -1332,14 +1383,14 @@ open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| assert_equal("a?b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| assert_equal("ab", f.read) } open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.read } assert_equal("b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.read } assert_equal("b", f.read) } @@ -1354,7 +1405,7 @@ } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f| assert_nothing_raised { f.write invalid_utf8 } } assert_equal("ab", File.read("t.txt")) @@ -1362,7 +1413,7 @@ open("t.txt", "w:euc-jp", :undef => :replace) {|f| assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 } } - open("t.txt", "w:euc-jp", :undef => :ignore) {|f| + open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f| assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 } } } @@ -1375,14 +1426,14 @@ assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f| + open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f| assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("ab", File.read("t.txt")) open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } - open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f| + open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } } @@ -1395,14 +1446,14 @@ assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f| + open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f| assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("ab", File.read("t.txt")) open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } - open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f| + open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } } Index: mvm/test/ruby/test_econv.rb =================================================================== --- mvm/test/ruby/test_econv.rb (revision 19180) +++ mvm/test/ruby/test_econv.rb (revision 19181) @@ -1,8 +1,8 @@ require 'test/unit' class TestEncodingConverter < Test::Unit::TestCase - def check_ec(edst, esrc, eres, dst, src, ec, off, len, flags=0) - res = ec.primitive_convert(src, dst, off, len, flags) + def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil) + res = ec.primitive_convert(src, dst, off, len, opts) assert_equal([edst.dup.force_encoding("ASCII-8BIT"), esrc.dup.force_encoding("ASCII-8BIT"), eres], @@ -11,11 +11,11 @@ res]) end - def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, flags=0) + def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil) ec = Encoding::Converter.new(*ec) if Array === ec i = consumed + rest o = "" - ret = ec.primitive_convert(i, o, 0, obuf_bytesize, flags) + ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts) assert_equal([converted, eres, rest], [o, ret, i]) end @@ -27,6 +27,24 @@ ec.primitive_errinfo) end + def test_s_stateless_encoding + assert_equal(Encoding::EUC_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP")) + assert_equal(Encoding::EUC_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP)) + assert_nil(Encoding::Converter.stateless_encoding("EUC-JP")) + assert_nil(Encoding::Converter.stateless_encoding("UTF-8")) + assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE")) + assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8)) + assert_nil(Encoding::Converter.stateless_encoding("html-attr-escaped")) + end + + def test_stateless_encoding_iso2022jp + slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP") + str = "\e$B~~\(B".force_encoding("iso-2022-jp") + str2 = str.encode(slenc) + str3 = str.encode("ISO-2022-JP") + assert_equal(str, str3) + end + def test_new assert_kind_of(Encoding::Converter, Encoding::Converter.new("UTF-8", "EUC-JP")) assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP)) @@ -45,6 +63,28 @@ assert(!encoding_list.include?(name2)) end + def test_newline_converter_with_ascii_incompatible + assert_raise(Encoding::NoConverter) { + Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) + } + assert_raise(Encoding::NoConverter) { + Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_ENCODER) + } + assert_raise(Encoding::NoConverter) { + Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_ENCODER) + } + + assert_nothing_raised { + Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) + } + assert_nothing_raised { + Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_ENCODER) + } + assert_nothing_raised { + Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CR_NEWLINE_ENCODER) + } + end + def test_get_encoding ec = Encoding::Converter.new("UTF-8", "EUC-JP") assert_equal(Encoding::UTF_8, ec.source_encoding) @@ -61,20 +101,20 @@ def test_output_region ec = Encoding::Converter.new("UTF-8", "EUC-JP") - ec.primitive_convert(src="a", dst="b", nil, 1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true) assert_equal("ba", dst) - ec.primitive_convert(src="a", dst="b", 0, 1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true) assert_equal("a", dst) - ec.primitive_convert(src="a", dst="b", 1, 1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true) assert_equal("ba", dst) assert_raise(ArgumentError) { - ec.primitive_convert(src="a", dst="b", 2, 1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true) } assert_raise(ArgumentError) { - ec.primitive_convert(src="a", dst="b", -1, 1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true) } assert_raise(ArgumentError) { - ec.primitive_convert(src="a", dst="b", 1, -1, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true) } end @@ -114,7 +154,7 @@ def test_partial_input ec = Encoding::Converter.new("UTF-8", "EUC-JP") - ret = ec.primitive_convert(src="", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true) assert_equal(:source_buffer_empty, ret) ret = ec.primitive_convert(src="", dst="", nil, 10) assert_equal(:finished, ret) @@ -153,7 +193,7 @@ def test_iso2022jp_encode ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "a"; check_ec("a", "", :source_buffer_empty, *a) src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a) src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a) @@ -166,7 +206,7 @@ def test_iso2022jp_decode ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "a"; check_ec("a", "", :source_buffer_empty, *a) src << "\e"; check_ec("a", "", :source_buffer_empty, *a) src << "$"; check_ec("a", "", :source_buffer_empty, *a) @@ -212,7 +252,7 @@ def test_invalid4 ec = Encoding::Converter.new("Shift_JIS", "EUC-JP") - a = ["", "abc\xFFdef", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + a = ["", "abc\xFFdef", ec, nil, 10, :output_followed_by_input=>true] check_ec("a", "bc\xFFdef", :output_followed_by_input, *a) check_ec("ab", "c\xFFdef", :output_followed_by_input, *a) check_ec("abc", "\xFFdef", :output_followed_by_input, *a) @@ -225,7 +265,7 @@ def test_invalid_utf16le ec = Encoding::Converter.new("UTF-16LE", "UTF-8") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "A"; check_ec("", "", :source_buffer_empty, *a) src << "\x00"; check_ec("A", "", :source_buffer_empty, *a) src << "\x00"; check_ec("A", "", :source_buffer_empty, *a) @@ -244,7 +284,7 @@ def test_invalid_utf16be ec = Encoding::Converter.new("UTF-16BE", "UTF-8") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "\x00"; check_ec("", "", :source_buffer_empty, *a) src << "A"; check_ec("A", "", :source_buffer_empty, *a) src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a) @@ -263,7 +303,7 @@ def test_invalid_utf32be ec = Encoding::Converter.new("UTF-32BE", "UTF-8") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "\x00"; check_ec("", "", :source_buffer_empty, *a) src << "\x00"; check_ec("", "", :source_buffer_empty, *a) src << "\x00"; check_ec("", "", :source_buffer_empty, *a) @@ -287,7 +327,7 @@ def test_invalid_utf32le ec = Encoding::Converter.new("UTF-32LE", "UTF-8") - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "A"; check_ec("", "", :source_buffer_empty, *a) src << "\x00"; check_ec("", "", :source_buffer_empty, *a) src << "\x00"; check_ec("", "", :source_buffer_empty, *a) @@ -319,7 +359,7 @@ def test_errors2 ec = Encoding::Converter.new("UTF-16BE", "EUC-JP") - a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :output_followed_by_input=>true] check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a) check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a) check_ec("A", "\x00B", :invalid_byte_sequence, *a) @@ -328,8 +368,8 @@ end def test_universal_newline - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline_decoder: true) + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a) src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a) src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a) @@ -339,8 +379,8 @@ end def test_universal_newline2 - ec = Encoding::Converter.new("", "", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) - a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT] + ec = Encoding::Converter.new("", "", universal_newline_decoder: true) + a = ["", src="", ec, nil, 50, :partial_input=>true] src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a) src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a) src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a) @@ -350,28 +390,28 @@ end def test_crlf_newline - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", crlf_newline_encoder: true) assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") end def test_crlf_newline2 - ec = Encoding::Converter.new("", "", Encoding::Converter::CRLF_NEWLINE_ENCODER) + ec = Encoding::Converter.new("", "", crlf_newline_encoder: true) assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "") end def test_cr_newline - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", cr_newline_encoder: true) assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end def test_cr_newline2 - ec = Encoding::Converter.new("", "", Encoding::Converter::CR_NEWLINE_ENCODER) + ec = Encoding::Converter.new("", "", cr_newline_encoder: true) assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end def test_output_followed_by_input ec = Encoding::Converter.new("UTF-8", "EUC-JP") - a = ["", "abc\u{3042}def", ec, nil, 100, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + a = ["", "abc\u{3042}def", ec, nil, 100, :output_followed_by_input=>true] check_ec("a", "bc\u{3042}def", :output_followed_by_input, *a) check_ec("ab", "c\u{3042}def", :output_followed_by_input, *a) check_ec("abc", "\u{3042}def", :output_followed_by_input, *a) @@ -408,7 +448,7 @@ def test_errinfo_valid_partial_character ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") - ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="\xa4", dst="", nil, 10, :partial_input=>true) assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec) end @@ -428,23 +468,23 @@ def test_output_iso2022jp ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP") - ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true) assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst) assert_equal(nil, ec.insert_output("???")) - ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert("", dst, nil, 10, :partial_input=>true) assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst) - ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true) assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst) assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP"))) - ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert("", dst, nil, 10, :partial_input=>true) assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst) - ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true) assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst) assert_equal(nil, ec.insert_output("\u3042")) - ec.primitive_convert("", dst, nil, 10, Encoding::Converter::PARTIAL_INPUT) + ec.primitive_convert("", dst, nil, 10, :partial_input=>true) assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst) assert_raise(Encoding::ConversionUndefined) { @@ -516,7 +556,7 @@ end def test_invalid_replace - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_REPLACE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", invalid: :replace) ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) @@ -524,7 +564,7 @@ end def test_invalid_ignore - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_IGNORE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "") ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) @@ -532,7 +572,7 @@ end def test_undef_replace - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_REPLACE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace) ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) @@ -540,7 +580,7 @@ end def test_undef_ignore - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_IGNORE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "") ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) @@ -561,7 +601,7 @@ def test_noconv_partial ec = Encoding::Converter.new("", "") - a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::PARTIAL_INPUT] + a = ["", "abcdefg", ec, nil, 2, :partial_input=>true] check_ec("ab", "cdefg", :destination_buffer_full, *a) check_ec("abcd", "efg", :destination_buffer_full, *a) check_ec("abcdef", "g", :destination_buffer_full, *a) @@ -570,7 +610,7 @@ def test_noconv_output_followed_by_input ec = Encoding::Converter.new("", "") - a = ["", "abcdefg", ec, nil, 2, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT] + a = ["", "abcdefg", ec, nil, 2, :output_followed_by_input=>true] check_ec("a", "bcdefg", :output_followed_by_input, *a) check_ec("ab", "cdefg", :output_followed_by_input, *a) check_ec("abc", "defg", :output_followed_by_input, *a) @@ -650,7 +690,7 @@ end def test_set_replacement - ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE) + ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace) ec.replacement = "<undef>" assert_equal("a <undef> b", ec.convert("a \u3042 b")) end @@ -661,4 +701,78 @@ ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace, :replace => "X") assert_equal("a X b", ec.convert("a \u3042 b")) end + + def test_hex_charref + ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("あ", ec.convert("\u3042")) + + ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("\xa4\xcf\xa4\xa1\xa4\xa4♥\xa1\xa3".force_encoding("euc-jp"), + ec.convert("\u{306f 3041 3044 2665 3002}")) + + ec = Encoding::Converter.new("UTF-8", "ISO-2022-JP", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("\e$B$O$!$$\e(B♥\e$B!#".force_encoding("ISO-2022-JP"), + ec.convert("\u{306f 3041 3044 2665 3002}")) + assert_equal("\e(B".force_encoding("ISO-2022-JP"), + ec.finish) + + ec = Encoding::Converter.new("EUC-JP", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("交換法則: n×m=m×n".force_encoding("ISO-8859-1"), + ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn")) + + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("交換法則: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"), + ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn")) + + ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal("&", ec.convert("&")) + end + + def test_html_escape_text + ec = Encoding::Converter.new("", "amp-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("", "html-text-escaped") + assert_equal('&<>"', ec.convert("&<>\"")) + assert_equal('', ec.finish) + end + + def test_html_escape_attr + ec = Encoding::Converter.new("", "html-attr-escaped") + assert_equal('""', ec.finish) + + ec = Encoding::Converter.new("", "html-attr-escaped") + assert_equal('', ec.convert("")) + assert_equal('""', ec.finish) + + ec = Encoding::Converter.new("", "html-attr-escaped") + assert_equal('""', ec.convert('"')) + assert_equal('"', ec.finish) + + ec = Encoding::Converter.new("", "html-attr-escaped") + assert_equal('"&<>"', ec.convert("&<>\"")) + assert_equal('"', ec.finish) + end + + def test_html_escape_with_charref + ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_TEXT_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal('<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) + assert_equal('', ec.finish) + + ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::HTML_ATTR_ENCODER|Encoding::Converter::UNDEF_HEX_CHARREF) + assert_equal('"<♥>&"♡"', ec.convert("<\u2665>&\"\u2661\"")) + assert_equal('"', ec.finish) + + ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::HTML_TEXT_ENCODER) + assert_equal("&\e$B$&\e(B&".force_encoding("iso-2022-jp"), ec.convert("&\u3046&")) + assert_equal('', ec.finish) + end + + def test_html_hasharg + assert_equal("&\e$B$&\e(B♥&\"'".force_encoding("iso-2022-jp"), + "&\u3046\u2665&\"'".encode("iso-2022-jp", html: :text)) + assert_equal("\"&\e$B$&\e(B♡&"'\"".force_encoding("iso-2022-jp"), + "&\u3046\u2661&\"'".encode("iso-2022-jp", html: :attr)) + end end Index: mvm/test/ruby/test_complex.rb =================================================================== --- mvm/test/ruby/test_complex.rb (revision 19180) +++ mvm/test/ruby/test_complex.rb (revision 19181) @@ -105,6 +105,7 @@ end def test_new + assert_instance_of(Complex, Complex.__send__(:new, 2,0.0)) if defined?(Complex::Unify) assert_instance_of(Fixnum, Complex.__send__(:new, 2,0)) else @@ -172,6 +173,9 @@ assert_equal(Complex.__send__(:new, 1),Complex(1)) assert_equal(Complex.__send__(:new, 1),Complex('1')) assert_raise(ArgumentError){Complex(nil)} + assert_raise(ArgumentError){Complex(Object.new)} + assert_raise(ArgumentError){Complex()} + assert_raise(ArgumentError){Complex(1,2,3)} end def test_attr @@ -1033,6 +1037,12 @@ end + def test_ruby19 + assert_raise(NoMethodError){ Complex.new(1) } + assert_raise(NoMethodError){ Complex.new!(1) } + assert_raise(NoMethodError){ Complex.reduce(1) } + end + def test_fixed_bug if defined?(Rational) && !Rational.instance_variable_get('@RCS_ID') assert_equal(Complex(1), 1 ** Complex(1)) Index: mvm/test/ruby/test_rational.rb =================================================================== --- mvm/test/ruby/test_rational.rb (revision 19180) +++ mvm/test/ruby/test_rational.rb (revision 19181) @@ -209,6 +209,8 @@ assert_raise(ArgumentError){Rational(nil)} assert_raise(ArgumentError){Rational('')} assert_raise(ArgumentError){Rational(Object.new)} + assert_raise(ArgumentError){Rational()} + assert_raise(ArgumentError){Rational(1,2,3)} end def test_attr @@ -1067,6 +1069,11 @@ assert_equal(0.25, Rational(1,2).fdiv(2)) end + def test_ruby19 + assert_raise(NoMethodError){ Rational.new(1) } + assert_raise(NoMethodError){ Rational.new!(1) } + end + def test_fixed_bug if defined?(Rational::Unify) assert_instance_of(Fixnum, Rational(1,2) ** 0) # mathn's bug Index: mvm/test/fileutils/test_fileutils.rb =================================================================== --- mvm/test/fileutils/test_fileutils.rb (revision 19180) +++ mvm/test/fileutils/test_fileutils.rb (revision 19181) @@ -977,8 +977,8 @@ @f = f end - def read(n) - @f.read(n) + def read(*args) + @f.read(*args) end def write(str) Index: mvm/random.c =================================================================== --- mvm/random.c (revision 19180) +++ mvm/random.c (revision 19181) @@ -505,7 +505,7 @@ limit = (struct RBignum *)rb_big_minus((VALUE)limit, INT2FIX(1)); if (FIXNUM_P((VALUE)limit)) { if (FIX2LONG((VALUE)limit) == -1) - return DOUBLE2NUM(genrand_real(mt)); + return DBL2NUM(genrand_real(mt)); return LONG2NUM(limited_rand(mt, FIX2LONG((VALUE)limit))); } return limited_big_rand(mt, limit); @@ -522,7 +522,7 @@ } if (max == 0) { - return DOUBLE2NUM(genrand_real(mt)); + return DBL2NUM(genrand_real(mt)); } if (max < 0) max = -max; val = limited_rand(mt, max-1); Index: mvm/transcode.c =================================================================== --- mvm/transcode.c (revision 19180) +++ mvm/transcode.c (revision 19181) @@ -11,7 +11,6 @@ #include "ruby/ruby.h" #include "ruby/encoding.h" -#define PType (int) #include "transcode_data.h" #include <ctype.h> @@ -22,6 +21,11 @@ VALUE rb_cEncodingConverter; static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; +static VALUE sym_html, sym_text, sym_attr; +static VALUE sym_universal_newline_decoder; +static VALUE sym_crlf_newline_encoder; +static VALUE sym_cr_newline_encoder; +static VALUE sym_partial_input; static VALUE sym_invalid_byte_sequence; static VALUE sym_undefined_conversion; @@ -31,6 +35,12 @@ static VALUE sym_output_followed_by_input; static VALUE sym_incomplete_input; +static unsigned char * +allocate_converted_string(const char *sname, const char *dname, + const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, + size_t *dst_len_ptr); + /* dynamic structure, one per conversion (similar to iconv_t) */ /* may carry conversion state (e.g. for iso-2022-jp) */ typedef struct rb_transcoding { @@ -124,9 +134,11 @@ * Dispatch data and logic */ +#define SUPPLEMENTAL_CONVERSION(sname, dname) (*(sname) == '\0' || *(dname) == '\0') + typedef struct { - const char *from; - const char *to; + const char *sname; + const char *dname; const char *lib; /* maybe null. it means that don't load the library. */ const rb_transcoder *transcoder; } transcoder_entry_t; @@ -134,39 +146,39 @@ static st_table *transcoder_table; static transcoder_entry_t * -make_transcoder_entry(const char *from, const char *to) +make_transcoder_entry(const char *sname, const char *dname) { st_data_t val; st_table *table2; - if (!st_lookup(transcoder_table, (st_data_t)from, &val)) { + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { val = (st_data_t)st_init_strcasetable(); - st_add_direct(transcoder_table, (st_data_t)from, val); + st_add_direct(transcoder_table, (st_data_t)sname, val); } table2 = (st_table *)val; - if (!st_lookup(table2, (st_data_t)to, &val)) { + if (!st_lookup(table2, (st_data_t)dname, &val)) { transcoder_entry_t *entry = ALLOC(transcoder_entry_t); - entry->from = from; - entry->to = to; + entry->sname = sname; + entry->dname = dname; entry->lib = NULL; entry->transcoder = NULL; val = (st_data_t)entry; - st_add_direct(table2, (st_data_t)to, val); + st_add_direct(table2, (st_data_t)dname, val); } return (transcoder_entry_t *)val; } static transcoder_entry_t * -get_transcoder_entry(const char *from, const char *to) +get_transcoder_entry(const char *sname, const char *dname) { st_data_t val; st_table *table2; - if (!st_lookup(transcoder_table, (st_data_t)from, &val)) { + if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) { return NULL; } table2 = (st_table *)val; - if (!st_lookup(table2, (st_data_t)to, &val)) { + if (!st_lookup(table2, (st_data_t)dname, &val)) { return NULL; } return (transcoder_entry_t *)val; @@ -175,26 +187,26 @@ void rb_register_transcoder(const rb_transcoder *tr) { - const char *const from_e = tr->from_encoding; - const char *const to_e = tr->to_encoding; + const char *const sname = tr->src_encoding; + const char *const dname = tr->dst_encoding; transcoder_entry_t *entry; - entry = make_transcoder_entry(from_e, to_e); + entry = make_transcoder_entry(sname, dname); if (entry->transcoder) { rb_raise(rb_eArgError, "transcoder from %s to %s has been already registered", - from_e, to_e); + sname, dname); } entry->transcoder = tr; } static void -declare_transcoder(const char *from, const char *to, const char *lib) +declare_transcoder(const char *sname, const char *dname, const char *lib) { transcoder_entry_t *entry; - entry = make_transcoder_entry(from, to); + entry = make_transcoder_entry(sname, dname); entry->lib = lib; } @@ -228,27 +240,27 @@ static int transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg) { - const char *to = (const char *)key; + const char *dname = (const char *)key; search_path_bfs_t *bfs = (search_path_bfs_t *)arg; search_path_queue_t *q; - if (st_lookup(bfs->visited, (st_data_t)to, &val)) { + if (st_lookup(bfs->visited, (st_data_t)dname, &val)) { return ST_CONTINUE; } q = ALLOC(search_path_queue_t); - q->enc = to; + q->enc = dname; q->next = NULL; *bfs->queue_last_ptr = q; bfs->queue_last_ptr = &q->next; - st_add_direct(bfs->visited, (st_data_t)to, (st_data_t)bfs->base_enc); + st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc); return ST_CONTINUE; } static int -transcode_search_path(const char *from, const char *to, - void (*callback)(const char *from, const char *to, int depth, void *arg), +transcode_search_path(const char *sname, const char *dname, + void (*callback)(const char *sname, const char *dname, int depth, void *arg), void *arg) { search_path_bfs_t bfs; @@ -258,17 +270,17 @@ int found; int pathlen; - if (encoding_equal(from, to)) + if (encoding_equal(sname, dname)) return -1; q = ALLOC(search_path_queue_t); - q->enc = from; + q->enc = sname; q->next = NULL; bfs.queue_last_ptr = &q->next; bfs.queue = q; bfs.visited = st_init_strcasetable(); - st_add_direct(bfs.visited, (st_data_t)from, (st_data_t)NULL); + st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL); while (bfs.queue) { q = bfs.queue; @@ -282,8 +294,8 @@ } table2 = (st_table *)val; - if (st_lookup(table2, (st_data_t)to, &val)) { - st_add_direct(bfs.visited, (st_data_t)to, (st_data_t)q->enc); + if (st_lookup(table2, (st_data_t)dname, &val)) { + st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc); xfree(q); found = 1; goto cleanup; @@ -305,7 +317,7 @@ } if (found) { - const char *enc = to; + const char *enc = dname; int depth; pathlen = 0; while (1) { @@ -316,7 +328,7 @@ enc = (const char *)val; } depth = pathlen; - enc = to; + enc = dname; while (1) { st_lookup(bfs.visited, (st_data_t)enc, &val); if (!val) @@ -846,56 +858,99 @@ return ec; } +struct trans_open_t { + transcoder_entry_t **entries; + int num_additional; +}; + static void -trans_open_i(const char *from, const char *to, int depth, void *arg) +trans_open_i(const char *sname, const char *dname, int depth, void *arg) { - transcoder_entry_t ***entries_ptr = arg; - transcoder_entry_t **entries; + struct trans_open_t *toarg = arg; - if (!*entries_ptr) { - entries = ALLOC_N(transcoder_entry_t *, depth+1+2); - *entries_ptr = entries; + if (!toarg->entries) { + toarg->entries = ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional); } - else { - entries = *entries_ptr; - } - entries[depth] = get_transcoder_entry(from, to); + toarg->entries[depth] = get_transcoder_entry(sname, dname); } rb_econv_t * -rb_econv_open(const char *from, const char *to, int ecflags) +rb_econv_open(const char *sname, const char *dname, int ecflags) { transcoder_entry_t **entries = NULL; int num_trans; - int num_additional; static rb_econv_t *ec; int universal_newline_decoder_added = 0; rb_encoding *senc, *denc; int sidx, didx; + int num_encoders, num_decoders; + transcoder_entry_t *encoders[4], *decoders[1]; + + if ((ecflags & ECONV_CRLF_NEWLINE_ENCODER) && + (ecflags & ECONV_CR_NEWLINE_ENCODER)) + return NULL; + + if ((ecflags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER)) && + (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)) + return NULL; + + if ((ecflags & ECONV_HTML_TEXT_ENCODER) && + (ecflags & ECONV_HTML_ATTR_ENCODER)) + return NULL; + + num_encoders = 0; + if (ecflags & ECONV_CRLF_NEWLINE_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "crlf_newline"))) + return NULL; + if (ecflags & ECONV_CR_NEWLINE_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "cr_newline"))) + return NULL; + if (ecflags & ECONV_HTML_TEXT_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-text-escaped"))) + return NULL; + if (ecflags & ECONV_HTML_ATTR_ENCODER) + if (!(encoders[num_encoders++] = get_transcoder_entry("", "html-attr-escaped"))) + return NULL; + + num_decoders = 0; + if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER) + if (!(decoders[num_decoders++] = get_transcoder_entry("universal_newline", ""))) + return NULL; + senc = NULL; - if (*from) { - sidx = rb_enc_find_index(from); + if (*sname) { + sidx = rb_enc_find_index(sname); if (0 <= sidx) { senc = rb_enc_from_index(sidx); } } denc = NULL; - if (*to) { - didx = rb_enc_find_index(to); + if (*dname) { + didx = rb_enc_find_index(dname); if (0 <= didx) { denc = rb_enc_from_index(didx); } } - if (*from == '\0' && *to == '\0') { + if (*sname && (!senc || !rb_enc_asciicompat(senc)) && num_encoders) + return NULL; + + if (*dname && (!denc || !rb_enc_asciicompat(denc)) && num_decoders) + return NULL; + + if (*sname == '\0' && *dname == '\0') { num_trans = 0; - entries = ALLOC_N(transcoder_entry_t *, 1+2); + entries = ALLOC_N(transcoder_entry_t *, num_encoders+num_decoders); } else { - num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries); + struct trans_open_t toarg; + toarg.entries = NULL; + toarg.num_additional = num_encoders+num_decoders; + num_trans = transcode_search_path(sname, dname, trans_open_i, (void *)&toarg); + entries = toarg.entries; } if (num_trans < 0 || !entries) { @@ -903,42 +958,11 @@ return NULL; } - num_additional = 0; - if ((!*from || (senc && rb_enc_asciicompat(senc))) && - (ecflags & (ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER))) { - const char *name = (ecflags & ECONV_CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline"; - transcoder_entry_t *e = get_transcoder_entry("", name); - if (ecflags & ECONV_CRLF_NEWLINE_ENCODER) - ecflags &= ~ECONV_CR_NEWLINE_ENCODER; - else - ecflags &= ~ECONV_CRLF_NEWLINE_ENCODER; - if (!e) { - xfree(entries); - return NULL; - } - MEMMOVE(entries+1, entries, transcoder_entry_t *, num_trans); - entries[0] = e; - num_trans++; - num_additional++; - } - else { - ecflags &= ~(ECONV_CRLF_NEWLINE_ENCODER|ECONV_CR_NEWLINE_ENCODER); - } + MEMMOVE(entries+num_encoders, entries, transcoder_entry_t *, num_trans); + MEMMOVE(entries, encoders, transcoder_entry_t *, num_encoders); + MEMMOVE(entries+num_encoders+num_trans, decoders, transcoder_entry_t *, num_decoders); - if ((!*to || (denc && rb_enc_asciicompat(denc))) && - (ecflags & ECONV_UNIVERSAL_NEWLINE_DECODER)) { - transcoder_entry_t *e = get_transcoder_entry("universal_newline", ""); - if (!e) { - xfree(entries); - return NULL; - } - entries[num_trans++] = e; - num_additional++; - universal_newline_decoder_added = 1; - } - else { - ecflags &= ~ECONV_UNIVERSAL_NEWLINE_DECODER; - } + num_trans += num_encoders + num_decoders; ec = rb_econv_open_by_transcoder_entries(num_trans, entries); xfree(entries); @@ -946,10 +970,10 @@ return NULL; ec->flags = ecflags; - ec->source_encoding_name = from; - ec->destination_encoding_name = to; + ec->source_encoding_name = sname; + ec->destination_encoding_name = dname; - if (num_trans == num_additional) { + if (num_trans == num_encoders + num_decoders) { ec->last_tc = NULL; ec->last_trans_index = -1; } @@ -1246,8 +1270,8 @@ res == econv_undefined_conversion) { rb_transcoding *error_tc = ec->elems[result_position].tc; ec->last_error.error_tc = error_tc; - ec->last_error.source_encoding = error_tc->transcoder->from_encoding; - ec->last_error.destination_encoding = error_tc->transcoder->to_encoding; + ec->last_error.source_encoding = error_tc->transcoder->src_encoding; + ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding; ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc); ec->last_error.error_bytes_len = error_tc->recognized_len; ec->last_error.readagain_len = error_tc->readagain_len; @@ -1258,6 +1282,62 @@ static int output_replacement_character(rb_econv_t *ec); +static int +output_hex_charref(rb_econv_t *ec) +{ + int ret; + unsigned char utfbuf[1024]; + const unsigned char *utf; + size_t utf_len; + int utf_allocated = 0; + char charef_buf[16]; + const unsigned char *p; + + if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) { + utf = ec->last_error.error_bytes_start; + utf_len = ec->last_error.error_bytes_len; + } + else { + utf = allocate_converted_string(ec->last_error.source_encoding, "UTF-32BE", + ec->last_error.error_bytes_start, ec->last_error.error_bytes_len, + utfbuf, sizeof(utfbuf), + &utf_len); + if (!utf) + return -1; + if (utf != utfbuf && utf != ec->last_error.error_bytes_start) + utf_allocated = 1; + } + + if (utf_len % 4 != 0) + goto fail; + + p = utf; + while (4 <= utf_len) { + unsigned int u = 0; + u += p[0] << 24; + u += p[1] << 16; + u += p[2] << 8; + u += p[3]; + snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u); + + ret = rb_econv_insert_output(ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII"); + if (ret == -1) + goto fail; + + p += 4; + utf_len -= 4; + } + + if (utf_allocated) + xfree((void *)utf); + return 0; + + fail: + if (utf_allocated) + xfree((void *)utf); + return -1; +} + rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, @@ -1286,10 +1366,8 @@ ret == econv_incomplete_input) { /* deal with invalid byte sequence */ /* todo: add more alternative behaviors */ - if (ec->flags&ECONV_INVALID_IGNORE) { - goto resume; - } - else if (ec->flags&ECONV_INVALID_REPLACE) { + switch (ec->flags & ECONV_INVALID_MASK) { + case ECONV_INVALID_REPLACE: if (output_replacement_character(ec) == 0) goto resume; } @@ -1299,13 +1377,17 @@ /* valid character in source encoding * but no related character(s) in destination encoding */ /* todo: add more alternative behaviors */ - if (ec->flags&ECONV_UNDEF_IGNORE) { - goto resume; - } - else if (ec->flags&ECONV_UNDEF_REPLACE) { + switch (ec->flags & ECONV_UNDEF_MASK) { + case ECONV_UNDEF_REPLACE: if (output_replacement_character(ec) == 0) goto resume; - } + break; + + case ECONV_UNDEF_HEX_CHARREF: + if (output_hex_charref(ec) == 0) + goto resume; + break; + } } return ret; @@ -1323,18 +1405,19 @@ tr = tc->transcoder; if (tr->stateful_type == stateful_encoder) - return tr->from_encoding; - return tr->to_encoding; + return tr->src_encoding; + return tr->dst_encoding; } static unsigned char * -allocate_converted_string(const char *str_encoding, const char *insert_encoding, +allocate_converted_string(const char *sname, const char *dname, const unsigned char *str, size_t len, + unsigned char *caller_dst_buf, size_t caller_dst_bufsize, size_t *dst_len_ptr) { unsigned char *dst_str; size_t dst_len; - size_t dst_bufsize = len; + size_t dst_bufsize; rb_econv_t *ec; rb_econv_result_t res; @@ -1342,13 +1425,20 @@ const unsigned char *sp; unsigned char *dp; - if (dst_bufsize == 0) - dst_bufsize += 1; + if (caller_dst_buf) + dst_bufsize = caller_dst_bufsize; + else if (len == 0) + dst_bufsize = 1; + else + dst_bufsize = len; - ec = rb_econv_open(str_encoding, insert_encoding, 0); + ec = rb_econv_open(sname, dname, 0); if (ec == NULL) return NULL; - dst_str = xmalloc(dst_bufsize); + if (caller_dst_buf) + dst_str = caller_dst_buf; + else + dst_str = xmalloc(dst_bufsize); dst_len = 0; sp = str; dp = dst_str+dst_len; @@ -1356,24 +1446,34 @@ dst_len = dp - dst_str; while (res == econv_destination_buffer_full) { if (dst_bufsize * 2 < dst_bufsize) { - xfree(dst_str); - rb_econv_close(ec); - return NULL; + goto fail; } dst_bufsize *= 2; - dst_str = xrealloc(dst_str, dst_bufsize); + if (dst_str == caller_dst_buf) { + unsigned char *tmp; + tmp = xmalloc(dst_bufsize); + memcpy(tmp, dst_str, dst_bufsize/2); + dst_str = tmp; + } + else { + dst_str = xrealloc(dst_str, dst_bufsize); + } dp = dst_str+dst_len; res = rb_econv_convert(ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0); dst_len = dp - dst_str; } if (res != econv_finished) { - xfree(dst_str); - rb_econv_close(ec); - return NULL; + goto fail; } rb_econv_close(ec); *dst_len_ptr = dst_len; return dst_str; + + fail: + if (dst_str != caller_dst_buf) + xfree(dst_str); + rb_econv_close(ec); + return NULL; } /* result: 0:success -1:failure */ @@ -1382,7 +1482,8 @@ const unsigned char *str, size_t len, const char *str_encoding) { const char *insert_encoding = rb_econv_encoding_to_insert_output(ec); - const unsigned char *insert_str; + unsigned char insert_buf[4096]; + const unsigned char *insert_str = NULL; size_t insert_len; rb_transcoding *tc; @@ -1402,7 +1503,8 @@ insert_len = len; } else { - insert_str = allocate_converted_string(str_encoding, insert_encoding, str, len, &insert_len); + insert_str = allocate_converted_string(str_encoding, insert_encoding, + str, len, insert_buf, sizeof(insert_buf), &insert_len); if (insert_str == NULL) return -1; } @@ -1474,12 +1576,12 @@ memcpy(*data_end_p, insert_str, insert_len); *data_end_p += insert_len; - if (insert_str != str) + if (insert_str != str && insert_str != insert_buf) xfree((void*)insert_str); return 0; fail: - if (insert_str != str) + if (insert_str != str && insert_str != insert_buf) xfree((void*)insert_str); return -1; } @@ -1535,9 +1637,13 @@ if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) { transcoder_entry_t *entry = (transcoder_entry_t *)v; - const rb_transcoder *tr = load_transcoder_entry(entry); + const rb_transcoder *tr; + if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) { + return ST_CONTINUE; + } + tr = load_transcoder_entry(entry); if (tr && tr->stateful_type == stateful_encoder) { - data->stateless_enc = tr->from_encoding; + data->stateless_enc = tr->src_encoding; return ST_STOP; } } @@ -1643,20 +1749,20 @@ } static VALUE -econv_description(const char *senc, const char *denc, int ecflags, VALUE mesg) +econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) { int has_description = 0; if (NIL_P(mesg)) mesg = rb_str_new(NULL, 0); - if (*senc != '\0' || *denc != '\0') { - if (*senc == '\0') - rb_str_cat2(mesg, denc); - else if (*denc == '\0') - rb_str_cat2(mesg, senc); + if (*sname != '\0' || *dname != '\0') { + if (*sname == '\0') + rb_str_cat2(mesg, dname); + else if (*dname == '\0') + rb_str_cat2(mesg, sname); else - rb_str_catf(mesg, "%s to %s", senc, denc); + rb_str_catf(mesg, "%s to %s", sname, dname); has_description = 1; } @@ -1688,11 +1794,11 @@ } VALUE -rb_econv_open_exc(const char *senc, const char *denc, int ecflags) +rb_econv_open_exc(const char *sname, const char *dname, int ecflags) { VALUE mesg, exc; mesg = rb_str_new_cstr("code converter open failed ("); - econv_description(senc, denc, ecflags, mesg); + econv_description(sname, dname, ecflags, mesg); rb_str_cat2(mesg, ")"); exc = rb_exc_new3(rb_eNoConverter, mesg); return exc; @@ -1801,7 +1907,7 @@ tc = ec->last_tc; if (tc) { tr = tc->transcoder; - enc = rb_enc_find(tr->to_encoding); + enc = rb_enc_find(tr->dst_encoding); replacement = (const unsigned char *)get_replacement_character(enc, &len, &repl_enc); } else { @@ -1812,7 +1918,7 @@ ins_enc = rb_econv_encoding_to_insert_output(ec); if (*repl_enc && !encoding_equal(repl_enc, ins_enc)) { - replacement = allocate_converted_string(repl_enc, ins_enc, replacement, len, &len); + replacement = allocate_converted_string(repl_enc, ins_enc, replacement, len, NULL, 0, &len); if (!replacement) return -1; allocated = 1; @@ -1842,7 +1948,7 @@ encname2 = encname; } else { - str2 = allocate_converted_string(encname, encname2, str, len, &len2); + str2 = allocate_converted_string(encname, encname2, str, len, NULL, 0, &len2); if (!str2) return -1; } @@ -1878,8 +1984,8 @@ const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, unsigned char *(*resize_destination)(VALUE, int, int), - const char *from_encoding, - const char *to_encoding, + const char *src_encoding, + const char *dst_encoding, int ecflags, VALUE ecopts) { @@ -1890,9 +1996,9 @@ int max_output; VALUE exc; - ec = rb_econv_open_opts(from_encoding, to_encoding, ecflags, ecopts); + ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts); if (!ec) - rb_exc_raise(rb_econv_open_exc(from_encoding, to_encoding, ecflags)); + rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags)); last_tc = ec->last_tc; max_output = last_tc ? last_tc->transcoder->max_output : 1; @@ -1923,8 +2029,8 @@ const unsigned char *in_stop, unsigned char *out_stop, VALUE destination, unsigned char *(*resize_destination)(VALUE, int, int), - const char *from_encoding, - const char *to_encoding, + const char *src_encoding, + const char *dst_encoding, int ecflags, VALUE ecopts) { @@ -1936,9 +2042,9 @@ int max_output; VALUE exc; - ec = rb_econv_open_opts(from_encoding, to_encoding, ecflags, ecopts); + ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts); if (!ec) - rb_exc_raise(rb_econv_open_exc(from_encoding, to_encoding, ecflags)); + rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags)); last_tc = ec->last_tc; max_output = last_tc ? last_tc->transcoder->max_output : 1; @@ -2005,33 +2111,56 @@ econv_opts(VALUE opt) { VALUE v; - int options = 0; + int ecflags = 0; + v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { } - else if (v==sym_ignore) { - options |= ECONV_INVALID_IGNORE; - } else if (v==sym_replace) { - options |= ECONV_INVALID_REPLACE; + ecflags |= ECONV_INVALID_REPLACE; v = rb_hash_aref(opt, sym_replace); } else { rb_raise(rb_eArgError, "unknown value for invalid character option"); } + v = rb_hash_aref(opt, sym_undef); if (NIL_P(v)) { } - else if (v==sym_ignore) { - options |= ECONV_UNDEF_IGNORE; - } else if (v==sym_replace) { - options |= ECONV_UNDEF_REPLACE; + ecflags |= ECONV_UNDEF_REPLACE; } else { rb_raise(rb_eArgError, "unknown value for undefined character option"); } - return options; + + v = rb_hash_aref(opt, sym_html); + if (!NIL_P(v)) { + v = rb_convert_type(v, T_SYMBOL, "Symbol", "to_sym"); + if (v==sym_text) { + ecflags |= ECONV_HTML_TEXT_ENCODER|ECONV_UNDEF_HEX_CHARREF; + } + else if (v==sym_attr) { + ecflags |= ECONV_HTML_ATTR_ENCODER|ECONV_UNDEF_HEX_CHARREF; + } + else { + rb_raise(rb_eArgError, "unexpected value for html option: %s", rb_id2name(SYM2ID(v))); + } + } + + v = rb_hash_aref(opt, sym_universal_newline_decoder); + if (RTEST(v)) + ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER; + + v = rb_hash_aref(opt, sym_crlf_newline_encoder); + if (RTEST(v)) + ecflags |= ECONV_CRLF_NEWLINE_ENCODER; + + v = rb_hash_aref(opt, sym_cr_newline_encoder); + if (RTEST(v)) + ecflags |= ECONV_CR_NEWLINE_ENCODER; + + return ecflags; } int @@ -2096,43 +2225,54 @@ } static int -str_transcode_enc_args(VALUE str, VALUE arg1, VALUE arg2, - const char **sname, rb_encoding **senc, - const char **dname, rb_encoding **denc) +enc_arg(VALUE arg, const char **name_p, rb_encoding **enc_p) { - rb_encoding *from_enc, *to_enc; - const char *from_e, *to_e; - int from_encidx, to_encidx; - VALUE from_encval, to_encval; + rb_encoding *enc; + const char *n; + int encidx; + VALUE encval; - if ((to_encidx = rb_to_encoding_index(to_encval = arg1)) < 0) { - to_enc = 0; - to_encidx = 0; - to_e = StringValueCStr(to_encval); + if ((encidx = rb_to_encoding_index(encval = arg)) < 0) { + enc = NULL; + encidx = 0; + n = StringValueCStr(encval); } else { - to_enc = rb_enc_from_index(to_encidx); - to_e = rb_enc_name(to_enc); + enc = rb_enc_from_index(encidx); + n = rb_enc_name(enc); } + + *name_p = n; + *enc_p = enc; + + return encidx; +} + +static int +str_transcode_enc_args(VALUE str, VALUE arg1, VALUE arg2, + const char **sname_p, rb_encoding **senc_p, + const char **dname_p, rb_encoding **denc_p) +{ + rb_encoding *senc, *denc; + const char *sname, *dname; + int sencidx, dencidx; + + dencidx = enc_arg(arg1, &dname, &denc); + if (NIL_P(arg2)) { - from_encidx = rb_enc_get_index(str); - from_enc = rb_enc_from_index(from_encidx); - from_e = rb_enc_name(from_enc); + sencidx = rb_enc_get_index(str); + senc = rb_enc_from_index(sencidx); + sname = rb_enc_name(senc); } - else if ((from_encidx = rb_to_encoding_index(from_encval = arg2)) < 0) { - from_enc = 0; - from_e = StringValueCStr(from_encval); - } else { - from_enc = rb_enc_from_index(from_encidx); - from_e = rb_enc_name(from_enc); + sencidx = enc_arg(arg2, &sname, &senc); } - *sname = from_e; - *senc = from_enc; - *dname = to_e; - *denc = to_enc; - return to_encidx; + *sname_p = sname; + *senc_p = senc; + *dname_p = dname; + *denc_p = denc; + return dencidx; } static int @@ -2143,35 +2283,35 @@ long blen, slen; unsigned char *buf, *bp, *sp; const unsigned char *fromp; - rb_encoding *from_enc, *to_enc; - const char *from_e, *to_e; - int to_encidx; + rb_encoding *senc, *denc; + const char *sname, *dname; + int dencidx; if (argc < 1 || argc > 2) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc); } - to_encidx = str_transcode_enc_args(str, argv[0], argc==1 ? Qnil : argv[1], &from_e, &from_enc, &to_e, &to_enc); + dencidx = str_transcode_enc_args(str, argv[0], argc==1 ? Qnil : argv[1], &sname, &senc, &dname, &denc); if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECODER| ECONV_CRLF_NEWLINE_ENCODER| ECONV_CR_NEWLINE_ENCODER)) == 0) { - if (from_enc && from_enc == to_enc) { + if (senc && senc == denc) { return -1; } - if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) { + if (senc && denc && rb_enc_asciicompat(senc) && rb_enc_asciicompat(denc)) { if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { - return to_encidx; + return dencidx; } } - if (encoding_equal(from_e, to_e)) { + if (encoding_equal(sname, dname)) { return -1; } } else { - if (encoding_equal(from_e, to_e)) { - from_e = ""; - to_e = ""; + if (encoding_equal(sname, dname)) { + sname = ""; + dname = ""; } } @@ -2181,7 +2321,7 @@ dest = rb_str_tmp_new(blen); bp = (unsigned char *)RSTRING_PTR(dest); - transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, from_e, to_e, ecflags, ecopts); + transcode_loop(&fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts); if (fromp != sp+slen) { rb_raise(rb_eArgError, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp); } @@ -2190,12 +2330,12 @@ rb_str_set_len(dest, bp - buf); /* set encoding */ - if (!to_enc) { - to_encidx = rb_define_dummy_encoding(to_e); + if (!denc) { + dencidx = rb_define_dummy_encoding(dname); } *self = dest; - return to_encidx; + return dencidx; } static int @@ -2236,12 +2376,12 @@ /* * call-seq: * str.encode!(encoding [, options] ) => str - * str.encode!(to_encoding, from_encoding [, options] ) => str + * str.encode!(dst_encoding, src_encoding [, options] ) => str * * The first form transcodes the contents of <i>str</i> from * str.encoding to +encoding+. * The second form transcodes the contents of <i>str</i> from - * from_encoding to to_encoding. + * src_encoding to dst_encoding. * The options Hash gives details for conversion. See String#encode * for details. * Returns the string even if no changes were made. @@ -2261,12 +2401,12 @@ /* * call-seq: * str.encode(encoding [, options] ) => str - * str.encode(to_encoding, from_encoding [, options] ) => str + * str.encode(dst_encoding, src_encoding [, options] ) => str * * The first form returns a copy of <i>str</i> transcoded * to encoding +encoding+. * The second form returns a copy of <i>str</i> transcoded - * from from_encoding to to_encoding. + * from src_encoding to dst_encoding. * The options Hash gives details for conversion. Details * to be added. */ @@ -2319,20 +2459,71 @@ /* * call-seq: + * Encoding::Converter.stateless_encoding(string) => encoding or nil + * Encoding::Converter.stateless_encoding(encoding) => encoding or nil + * + * returns the corresponding stateless encoding. + * + * It returns nil if the argument is not a stateful encoding. + * + * "corresponding stateless encoding" is a stateless encoding which + * can represent all characters in the statefull encoding. + * + * So, no conversion undefined error occur from the stateful encoding to the stateless encoding. + * + * Currently, EUC-JP is the corresponding stateless encoding of ISO-2022-JP. + * + * Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:EUC-JP> + * + * (This may be changed in future because EUC-JP cannot distinguish JIS X 0208 1978 and 1983.) + */ +static VALUE +econv_s_stateless_encoding(VALUE klass, VALUE arg) +{ + const char *stateful_name, *stateless_name; + rb_encoding *stateful_enc, *stateless_enc; + + enc_arg(arg, &stateful_name, &stateful_enc); + + stateless_name = rb_econv_stateless_encoding(stateful_name); + + if (stateless_name == NULL) + return Qnil; + + stateless_enc = rb_enc_find(stateless_name); + + if (!stateless_enc) + stateless_enc = make_dummy_encoding(stateless_name); + + return rb_enc_from_encoding(stateless_enc); +} + +/* + * call-seq: * Encoding::Converter.new(source_encoding, destination_encoding) - * Encoding::Converter.new(source_encoding, destination_encoding, flags) + * Encoding::Converter.new(source_encoding, destination_encoding, opt) * - * possible flags: - * Encoding::Converter::UNIVERSAL_NEWLINE_DECODER # convert CRLF and CR to LF at last - * Encoding::Converter::CRLF_NEWLINE_ENCODER # convert LF to CRLF at first - * Encoding::Converter::CR_NEWLINE_ENCODER # convert LF to CR at first + * possible options elements: + * hash form: + * :universal_newline_decoder => true # convert CRLF and CR to LF at last + * :crlf_newline_encoder => true # convert LF to CRLF at first + * :cr_newline_encoder => true # convert LF to CR at first + * :invalid => nil # error on invalid byte sequence (default) + * :invalid => :replace # replace invalid byte sequence + * :undef => nil # error on undefined conversion (default) + * :undef => :replace # replace undefined conversion + * :replace => string # replacement string ("?" or "\uFFFD" if not specified) + * integer form: + * Encoding::Converter::UNIVERSAL_NEWLINE_DECODER + * Encoding::Converter::CRLF_NEWLINE_ENCODER + * Encoding::Converter::CR_NEWLINE_ENCODER * * Encoding::Converter.new creates an instance of Encoding::Converter. * * source_encoding and destination_encoding should be a string or * Encoding object. * - * flags should be an integer. + * opt should be nil, a hash or an integer. * * example: * # UTF-16BE to UTF-8 @@ -2340,38 +2531,36 @@ * * # (1) convert UTF-16BE to UTF-8 * # (2) convert CRLF and CR to LF - * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER) + * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline_decoder => true) * * # (1) convert LF to CRLF * # (2) convert UTF-8 to UTF-16BE - * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_ENCODER) + * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline_encoder => true) * */ static VALUE econv_init(int argc, VALUE *argv, VALUE self) { - VALUE source_encoding, destination_encoding, flags_v, opt, ecopts; + VALUE source_encoding, destination_encoding, opt, opthash, flags_v, ecopts; int sidx, didx; const char *sname, *dname; rb_encoding *senc, *denc; rb_econv_t *ec; int ecflags; - rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &flags_v); + rb_scan_args(argc, argv, "21", &source_encoding, &destination_encoding, &opt); - if (flags_v == Qnil) { + if (NIL_P(opt)) { ecflags = 0; ecopts = Qnil; } + else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) { + ecflags = NUM2INT(flags_v); + ecopts = Qnil; + } else { - opt = rb_check_convert_type(argv[argc-1], T_HASH, "Hash", "to_hash"); - if (!NIL_P(opt)) { - ecflags = rb_econv_prepare_opts(opt, &ecopts); - } - else { - ecflags = NUM2INT(flags_v); - ecopts = Qnil; - } + opthash = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); + ecflags = rb_econv_prepare_opts(opthash, &ecopts); } senc = NULL; @@ -2415,8 +2604,8 @@ ec->destination_encoding = denc; if (ec->last_tc) { - ec->source_encoding_name = ec->elems[0].tc->transcoder->from_encoding; - ec->destination_encoding_name = ec->last_tc->transcoder->to_encoding; + ec->source_encoding_name = ec->elems[0].tc->transcoder->src_encoding; + ec->destination_encoding_name = ec->last_tc->transcoder->dst_encoding; } else { ec->source_encoding_name = ""; @@ -2523,11 +2712,15 @@ * ec.primitive_convert(source_buffer, destination_buffer) -> symbol * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol - * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol + * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol * - * possible flags: - * Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source - * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input + * possible opt elements: + * hash form: + * :partial_input => true # source buffer may be part of larger source + * output_followed_by_input => true # stop conversion after output before input + * integer form: + * Encoding::Converter::PARTIAL_INPUT + * Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT * * possible results: * :invalid_byte_sequence @@ -2553,7 +2746,7 @@ * nil means unlimited. * If it is omitted, nil is assumed. * - * flags should be an integer or nil. + * opt should be nil, a hash or an integer. * nil means no flags. * If it is omitted, nil is assumed. * @@ -2579,14 +2772,14 @@ * primitive_convert stops conversion when one of following condition met. * - invalid byte sequence found in source buffer (:invalid_byte_sequence) * - unexpected end of source buffer (:incomplete_input) - * this occur only when PARTIAL_INPUT is not specified. + * this occur only when :partial_input is not specified. * - character not representable in output encoding (:undefined_conversion) * - after some output is generated, before input is done (:output_followed_by_input) - * this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified. + * this occur only when :output_followed_by_input is specified. * - destination buffer is full (:destination_buffer_full) * this occur only when destination_bytesize is non-nil. * - source buffer is empty (:source_buffer_empty) - * this occur only when PARTIAL_INPUT is specified. + * this occur only when :partial_input is specified. * - conversion is finished (:finished) * * example: @@ -2608,7 +2801,7 @@ static VALUE econv_primitive_convert(int argc, VALUE *argv, VALUE self) { - VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v; + VALUE input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v; rb_econv_t *ec = check_econv(self); rb_econv_result_t res; const unsigned char *ip, *is; @@ -2617,7 +2810,7 @@ unsigned long output_byteend; int flags; - rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &flags_v); + rb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt); if (NIL_P(output_byteoffset_v)) output_byteoffset = 0; /* dummy */ @@ -2629,10 +2822,23 @@ else output_bytesize = NUM2LONG(output_bytesize_v); - if (NIL_P(flags_v)) + if (NIL_P(opt)) { flags = 0; - else + } + else if (!NIL_P(flags_v = rb_check_to_integer(opt, "to_int"))) { flags = NUM2INT(flags_v); + } + else { + VALUE v; + opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); + flags = 0; + v = rb_hash_aref(opt, sym_partial_input); + if (RTEST(v)) + flags |= ECONV_PARTIAL_INPUT; + v = rb_hash_aref(opt, sym_output_followed_by_input); + if (RTEST(v)) + flags |= ECONV_OUTPUT_FOLLOWED_BY_INPUT; + } StringValue(output); if (!NIL_P(input)) @@ -2707,7 +2913,7 @@ * convert source_string and return destination_string. * * source_string is assumed as a part of source. - * i.e. Encoding::Converter::PARTIAL_INPUT is used internally. + * i.e. :partial_input=>true is specified internally. * finish method should be used at last. * * ec = Encoding::Converter.new("utf-8", "euc-jp") @@ -3086,7 +3292,7 @@ * * sets the replacement string. * - * ec = Encoding::Converter.new("utf-8", "us-ascii", Encoding::Converter::UNDEF_REPLACE) + * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace) * ec.replacement = "<undef>" * p ec.convert("a \u3042 b") #=> "a <undef> b" */ @@ -3286,6 +3492,9 @@ sym_undef = ID2SYM(rb_intern("undef")); sym_ignore = ID2SYM(rb_intern("ignore")); sym_replace = ID2SYM(rb_intern("replace")); + sym_html = ID2SYM(rb_intern("html")); + sym_text = ID2SYM(rb_intern("text")); + sym_attr = ID2SYM(rb_intern("attr")); sym_invalid_byte_sequence = ID2SYM(rb_intern("invalid_byte_sequence")); sym_undefined_conversion = ID2SYM(rb_intern("undefined_conversion")); @@ -3294,12 +3503,17 @@ sym_finished = ID2SYM(rb_intern("finished")); sym_output_followed_by_input = ID2SYM(rb_intern("output_followed_by_input")); sym_incomplete_input = ID2SYM(rb_intern("incomplete_input")); + sym_universal_newline_decoder = ID2SYM(rb_intern("universal_newline_decoder")); + sym_crlf_newline_encoder = ID2SYM(rb_intern("crlf_newline_encoder")); + sym_cr_newline_encoder = ID2SYM(rb_intern("cr_newline_encoder")); + sym_partial_input = ID2SYM(rb_intern("partial_input")); rb_define_method(rb_cString, "encode", str_encode, -1); rb_define_method(rb_cString, "encode!", str_encode_bang, -1); rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); + rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); @@ -3313,17 +3527,19 @@ rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0); rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0); rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1); + rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); - rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE)); rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK)); - rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE)); rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_HEX_CHARREF", INT2FIX(ECONV_UNDEF_HEX_CHARREF)); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER)); rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(ECONV_CRLF_NEWLINE_ENCODER)); rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(ECONV_CR_NEWLINE_ENCODER)); + rb_define_const(rb_cEncodingConverter, "HTML_TEXT_ENCODER", INT2FIX(ECONV_HTML_TEXT_ENCODER)); + rb_define_const(rb_cEncodingConverter, "HTML_ATTR_ENCODER", INT2FIX(ECONV_HTML_ATTR_ENCODER)); rb_define_method(rb_eConversionUndefined, "source_encoding_name", ecerr_source_encoding_name, 0); rb_define_method(rb_eConversionUndefined, "destination_encoding_name", ecerr_destination_encoding_name, 0); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/