ruby-changes:3429
From: ko1@a...
Date: 7 Jan 2008 11:49:25 +0900
Subject: [ruby-changes:3429] akr - Ruby:r14922 (trunk): * encoding.c (rb_enc_internal_get_index): extracted from
akr 2008-01-07 11:49:01 +0900 (Mon, 07 Jan 2008) New Revision: 14922 Modified files: trunk/ChangeLog trunk/encoding.c trunk/include/ruby/encoding.h trunk/marshal.c trunk/parse.y trunk/re.c trunk/string.c trunk/test/ruby/test_m17n.rb Log: * encoding.c (rb_enc_internal_get_index): extracted from rb_enc_get_index. (rb_enc_internal_set_index): extracted from rb_enc_associate_index * include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX. (ENCODING_GET): ditto. (ENCODING_IS_ASCII8BIT): defined. (ENCODING_CODERANGE_SET): defined. * re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT. * string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT. * parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT. * marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/marshal.c?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=14922&r2=14921&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14922&r2=14921&diff_format=u Index: encoding.c =================================================================== --- encoding.c (revision 14921) +++ encoding.c (revision 14922) @@ -436,7 +436,34 @@ return id_encoding; } +int +rb_enc_internal_get_index(VALUE obj) +{ + int i; + + i = ENCODING_GET_INLINED(obj); + if (i == ENCODING_INLINE_MAX) { + VALUE iv; + + iv = rb_ivar_get(obj, rb_id_encoding()); + i = NUM2INT(iv); + } + return i; +} + void +rb_enc_internal_set_index(VALUE obj, int idx) +{ + if (idx < ENCODING_INLINE_MAX) { + ENCODING_SET_INLINED(obj, idx); + return; + } + ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX); + rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); + return; +} + +void rb_enc_associate_index(VALUE obj, int idx) { enc_check_capable(obj); @@ -444,13 +471,7 @@ !rb_enc_asciicompat(rb_enc_from_index(idx))) { ENC_CODERANGE_CLEAR(obj); } - if (idx < ENCODING_INLINE_MAX) { - ENCODING_SET(obj, idx); - return; - } - ENCODING_SET(obj, ENCODING_INLINE_MAX); - rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx)); - return; + rb_enc_internal_set_index(obj, idx); } int @@ -476,17 +497,8 @@ int rb_enc_get_index(VALUE obj) { - int i; - if (!enc_capable(obj)) return -1; - i = ENCODING_GET(obj); - if (i == ENCODING_INLINE_MAX) { - VALUE iv; - - iv = rb_ivar_get(obj, rb_id_encoding()); - i = NUM2INT(iv); - } - return i; + return rb_enc_internal_get_index(obj); } rb_encoding* Index: include/ruby/encoding.h =================================================================== --- include/ruby/encoding.h (revision 14921) +++ include/ruby/encoding.h (revision 14922) @@ -22,12 +22,28 @@ #define ENCODING_INLINE_MAX 1023 #define ENCODING_SHIFT (FL_USHIFT+10) #define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT) -#define ENCODING_SET(obj,i) do {\ + +#define ENCODING_SET_INLINED(obj,i) do {\ RBASIC(obj)->flags &= ~ENCODING_MASK;\ - RBASIC(obj)->flags |= i << ENCODING_SHIFT;\ + RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\ } while (0) -#define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_SET(obj,i) do {\ + VALUE rb_encoding_set_obj = (obj); \ + int encoding_set_enc_index = (i); \ + if (encoding_set_enc_index < ENCODING_INLINE_MAX) \ + ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \ + else \ + rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \ +} while (0) +#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) +#define ENCODING_GET(obj) \ + (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \ + ENCODING_GET_INLINED(obj) : \ + rb_enc_internal_get_index(obj)) + +#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) + #define ENC_CODERANGE_MASK (FL_USER8|FL_USER9) #define ENC_CODERANGE_UNKNOWN 0 #define ENC_CODERANGE_7BIT FL_USER8 @@ -39,6 +55,12 @@ (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr)) #define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0) +#define ENCODING_CODERANGE_SET(obj, encindex, cr) \ + do { \ + VALUE rb_encoding_coderange_obj = (obj); \ + ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \ + ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \ + } while (0) typedef OnigEncodingType rb_encoding; @@ -56,6 +78,8 @@ void rb_enc_associate_index(VALUE, int); void rb_enc_associate(VALUE, rb_encoding*); void rb_enc_copy(VALUE dst, VALUE src); +int rb_enc_internal_get_index(VALUE obj); +void rb_enc_internal_set_index(VALUE obj, int encindex); VALUE rb_enc_str_new(const char*, long, rb_encoding*); VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int); Index: re.c =================================================================== --- re.c (revision 14921) +++ re.c (revision 14922) @@ -937,7 +937,7 @@ static VALUE rb_reg_fixed_encoding_p(VALUE re) { - if (ENCODING_GET(re) != 0 || FL_TEST(re, KCODE_FIXED)) + if (!ENCODING_IS_ASCII8BIT(re) || FL_TEST(re, KCODE_FIXED)) return Qtrue; else return Qfalse; Index: ChangeLog =================================================================== --- ChangeLog (revision 14921) +++ ChangeLog (revision 14922) @@ -1,3 +1,22 @@ +Mon Jan 7 11:44:45 2008 Tanaka Akira <akr@f...> + + * encoding.c (rb_enc_internal_get_index): extracted from + rb_enc_get_index. + (rb_enc_internal_set_index): extracted from rb_enc_associate_index + + * include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX. + (ENCODING_GET): ditto. + (ENCODING_IS_ASCII8BIT): defined. + (ENCODING_CODERANGE_SET): defined. + + * re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT. + + * string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT. + + * parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT. + + * marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT. + Mon Jan 7 02:14:07 2008 Tanaka Akira <akr@f...> * string.c (coderange_scan): avoid rb_enc_to_index. Index: string.c =================================================================== --- string.c (revision 14921) +++ string.c (revision 14922) @@ -1063,7 +1063,7 @@ rb_encoding *str_enc = rb_enc_get(str); rb_encoding *res_enc; int str_cr, ptr_cr, res_cr; - int str_a8 = ENCODING_GET(str) == 0; + int str_a8 = ENCODING_IS_ASCII8BIT(str); int ptr_a8 = ptr_enc == rb_ascii8bit_encoding(); str_cr = ENC_CODERANGE(str); Index: parse.y =================================================================== --- parse.y (revision 14921) +++ parse.y (revision 14922) @@ -8470,7 +8470,7 @@ if (c) { int opt, idx; rb_char_to_option_kcode(c, &opt, &idx); - if (idx != ENCODING_GET(str) && ENCODING_GET(str) && + if (idx != ENCODING_GET(str) && !ENCODING_IS_ASCII8BIT(str) && rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) { compile_error(PARSER_ARG "regexp encoding option '%c' differs from source encoding '%s'", Index: marshal.c =================================================================== --- marshal.c (revision 14921) +++ marshal.c (revision 14922) @@ -526,7 +526,7 @@ st_data_t num; int hasiv = 0; #define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \ - (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj))) + (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) if (limit == 0) { rb_raise(rb_eArgError, "exceed depth limit"); Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 14921) +++ test/ruby/test_m17n.rb (revision 14922) @@ -370,6 +370,15 @@ assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/}))) end + def test_regexp_windows_31j + begin + Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp") + rescue ArgumentError + err = $! + end + assert_match(/windows-31j/i, err.message) + end + def test_regexp_embed r = eval(e("/\xc2\xa1/")) assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) } -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml