ruby-changes:3755
From: ko1@a...
Date: Sat, 26 Jan 2008 01:40:25 +0900 (JST)
Subject: [ruby-changes:3755] naruse - Ruby:r15244 (trunk): * string.c (rb_str_usascii_new{,2}: defined.
naruse 2008-01-26 01:40:02 +0900 (Sat, 26 Jan 2008) New Revision: 15244 Modified files: trunk/ChangeLog trunk/array.c trunk/bignum.c trunk/encoding.c trunk/file.c trunk/hash.c trunk/numeric.c trunk/object.c trunk/string.c trunk/test/ruby/test_m17n.rb Log: * string.c (rb_str_usascii_new{,2}: defined. (rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty string. * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined. (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i): use rb_str_ascii_new. * array.c (recursive_join, inspect_ary): ditto. * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s, rb_mod_to_s): ditto. * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch, env_clear, env_to_s, env_inspect): ditto. * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto. * bignum.c (rb_big2str): ditto. * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname, file_inspect_join, Init_file): ditto. * test/ruby/test_ruby_m17n.rb: add checks for encoding of string. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/numeric.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/hash.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/file.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/array.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/bignum.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/object.c?r1=15244&r2=15243&diff_format=u http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=15244&r2=15243&diff_format=u Index: array.c =================================================================== --- array.c (revision 15243) +++ array.c (revision 15244) @@ -1233,7 +1233,7 @@ { VALUE *arg = (VALUE *)argp; if (recur) { - return rb_str_new2("[...]"); + return rb_usascii_str_new2("[...]"); } return rb_ary_join(arg[0], arg[1]); } @@ -1337,7 +1337,7 @@ static VALUE rb_ary_inspect(VALUE ary) { - if (RARRAY_LEN(ary) == 0) return rb_str_new2("[]"); + if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new2("[]"); return rb_exec_recursive(inspect_ary, ary, 0); } Index: encoding.c =================================================================== --- encoding.c (revision 15243) +++ encoding.c (revision 15244) @@ -838,9 +838,11 @@ static VALUE enc_inspect(VALUE self) { - return rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), + VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self), rb_enc_name((rb_encoding*)DATA_PTR(self)), (ENC_DUMMY_P(self) ? " (dummy)" : "")); + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; } /* @@ -854,7 +856,7 @@ static VALUE enc_name(VALUE self) { - return rb_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); + return rb_usascii_str_new2(rb_enc_name((rb_encoding*)DATA_PTR(self))); } static VALUE @@ -993,6 +995,12 @@ return enc_table.list[ENCINDEX_US_ASCII].enc; } +int +rb_usascii_encindex(void) +{ + return ENCINDEX_US_ASCII; +} + rb_encoding * rb_locale_encoding(void) { @@ -1066,11 +1074,11 @@ rb_locale_charmap(VALUE klass) { #if defined NO_LOCALE_CHARMAP - return rb_str_new2("ASCII-8BIT"); + return rb_usascii_str_new2("ASCII-8BIT"); #elif defined HAVE_LANGINFO_H char *codeset; codeset = nl_langinfo(CODESET); - return rb_str_new2(codeset); + return rb_usascii_str_new2(codeset); #elif defined _WIN32 return rb_sprintf("CP%d", GetACP()); #else @@ -1128,7 +1136,7 @@ rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg) { VALUE ary = (VALUE)arg; - VALUE str = rb_str_new2((char *)name); + VALUE str = rb_usascii_str_new2((char *)name); OBJ_FREEZE(str); rb_ary_push(ary, str); return ST_CONTINUE; @@ -1172,11 +1180,11 @@ if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) { return ST_CONTINUE; } - str = rb_str_new2(rb_enc_name(enc)); + str = rb_usascii_str_new2(rb_enc_name(enc)); OBJ_FREEZE(str); rb_ary_store(ary, idx, str); } - key = rb_str_new2((char *)name); + key = rb_usascii_str_new2((char *)name); OBJ_FREEZE(key); rb_hash_aset(aliases, key, str); return ST_CONTINUE; Index: ChangeLog =================================================================== --- ChangeLog (revision 15243) +++ ChangeLog (revision 15244) @@ -1,3 +1,30 @@ +Sat Jan 26 00:17:18 2008 NARUSE, Yui <naruse@r...> + + * string.c (rb_str_usascii_new{,2}: defined. + (rb_str_new): set US-ASCII and ENC_CODERANGE_7BIT when empty + string. + + * encoding.c (rb_usascii_encoding, rb_usascii_encindex): defined. + (rb_enc_inspect, enc_name, rb_locale_charmap, rb_enc_name_list_i): + use rb_str_ascii_new. + + * array.c (recursive_join, inspect_ary): ditto. + + * object.c (nil_to_s, nil_inspect, true_to_s, false_to_s, + rb_mod_to_s): ditto. + + * hash.c (inspect_hash, rb_hash_inspect, rb_f_getenv, env_fetch, + env_clear, env_to_s, env_inspect): ditto. + + * numeric.c (flo_to_s, int_chr, rb_fix2str): ditto. + + * bignum.c (rb_big2str): ditto. + + * file.c (rb_file_ftype, rb_file_s_dirname, rb_file_s_extname, + file_inspect_join, Init_file): ditto. + + * test/ruby/test_ruby_m17n.rb: add checks for encoding of string. + Sat Jan 26 01:35:46 2008 Tanaka Akira <akr@f...> * marshal.c (r_byte): use getbyte instead of getc. Index: string.c =================================================================== --- string.c (revision 15243) +++ string.c (revision 15244) @@ -278,6 +278,9 @@ if (ptr) { memcpy(RSTRING_PTR(str), ptr, len); } + else { + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + } STR_SET_LEN(str, len); RSTRING_PTR(str)[len] = '\0'; return str; @@ -290,6 +293,15 @@ } VALUE +rb_usascii_str_new(const char *ptr, long len) +{ + VALUE str = str_new(rb_cString, ptr, len); + + ENCODING_CODERANGE_SET(str, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + return str; +} + +VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) { VALUE str = str_new(rb_cString, ptr, len); @@ -308,6 +320,15 @@ } VALUE +rb_usascii_str_new2(const char *ptr) +{ + if (!ptr) { + rb_raise(rb_eArgError, "NULL pointer given"); + } + return rb_usascii_str_new(ptr, strlen(ptr)); +} + +VALUE rb_tainted_str_new(const char *ptr, long len) { VALUE str = rb_str_new(ptr, len); Index: object.c =================================================================== --- object.c (revision 15243) +++ object.c (revision 15244) @@ -806,7 +806,7 @@ static VALUE nil_to_s(VALUE obj) { - return rb_str_new2(""); + return rb_str_new(0, 0); } /* @@ -836,7 +836,7 @@ static VALUE nil_inspect(VALUE obj) { - return rb_str_new2("nil"); + return rb_usascii_str_new2("nil"); } /*********************************************************************** @@ -859,7 +859,7 @@ static VALUE true_to_s(VALUE obj) { - return rb_str_new2("true"); + return rb_usascii_str_new2("true"); } @@ -936,7 +936,7 @@ static VALUE false_to_s(VALUE obj) { - return rb_str_new2("false"); + return rb_usascii_str_new2("false"); } /* @@ -1090,7 +1090,7 @@ rb_mod_to_s(VALUE klass) { if (FL_TEST(klass, FL_SINGLETON)) { - VALUE s = rb_str_new2("#<"); + VALUE s = rb_usascii_str_new2("#<"); VALUE v = rb_iv_get(klass, "__attached__"); rb_str_cat2(s, "Class:"); Index: hash.c =================================================================== --- hash.c (revision 15243) +++ hash.c (revision 15244) @@ -1169,7 +1169,7 @@ { VALUE str; - if (recur) return rb_str_new2("{...}"); + if (recur) return rb_usascii_str_new2("{...}"); str = rb_str_buf_new2("{"); rb_hash_foreach(hash, inspect_i, str); rb_str_buf_cat2(str, "}"); @@ -1193,7 +1193,7 @@ rb_hash_inspect(VALUE hash) { if (RHASH_EMPTY_P(hash)) - return rb_str_new2("{}"); + return rb_usascii_str_new2("{}"); return rb_exec_recursive(inspect_hash, hash, 0); } @@ -1821,7 +1821,7 @@ if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) #endif { - VALUE str = rb_str_new2(env); + VALUE str = rb_usascii_str_new2(env); rb_obj_freeze(str); return str; @@ -1862,7 +1862,7 @@ #else if (strcmp(nam, PATH_ENV) == 0 && !rb_env_path_tainted()) #endif - return rb_str_new2(env); + return rb_usascii_str_new2(env); return env_str_new2(env); } @@ -2217,7 +2217,7 @@ static VALUE env_to_s(void) { - return rb_str_new2("ENV"); + return rb_usascii_str_new2("ENV"); } static VALUE @@ -2239,7 +2239,7 @@ rb_str_buf_cat2(str, "\""); rb_str_buf_cat(str, *env, s-*env); rb_str_buf_cat2(str, "\"=>"); - i = rb_inspect(rb_str_new2(s+1)); + i = rb_inspect(rb_usascii_str_new2(s+1)); rb_str_buf_append(str, i); } env++; Index: numeric.c =================================================================== --- numeric.c (revision 15243) +++ numeric.c (revision 15244) @@ -504,9 +504,9 @@ char *p, *e; if (isinf(value)) - return rb_str_new2(value < 0 ? "-Infinity" : "Infinity"); + return rb_usascii_str_new2(value < 0 ? "-Infinity" : "Infinity"); else if(isnan(value)) - return rb_str_new2("NaN"); + return rb_usascii_str_new2("NaN"); sprintf(buf, "%#.15g", value); /* ensure to print decimal point */ if (!(e = strchr(buf, 'e'))) { @@ -522,7 +522,7 @@ while (p[-1]=='0' && ISDIGIT(p[-2])) p--; memmove(p, e, strlen(e)+1); - return rb_str_new2(buf); + return rb_usascii_str_new2(buf); } /* @@ -1851,7 +1851,12 @@ rb_raise(rb_eRangeError, "%ld out of char range", i); } c = i; - return rb_str_new(&c, 1); + if (i < 0x80) { + return rb_usascii_str_new(&c, 1); + } + else { + return rb_str_new(&c, 1); + } case 1: break; default: @@ -1968,7 +1973,7 @@ rb_raise(rb_eArgError, "invalid radix %d", base); } if (val == 0) { - return rb_str_new2("0"); + return rb_usascii_str_new2("0"); } if (val < 0) { val = -val; @@ -1982,7 +1987,7 @@ *--b = '-'; } - return rb_str_new2(b); + return rb_usascii_str_new2(b); } /* Index: bignum.c =================================================================== --- bignum.c (revision 15243) +++ bignum.c (revision 15244) @@ -904,7 +904,7 @@ return rb_fix2str(x, base); } if (BIGZEROP(x)) { - return rb_str_new2("0"); + return rb_usascii_str_new2("0"); } if (base < 2 || 36 < base) Index: test/ruby/test_m17n.rb =================================================================== --- test/ruby/test_m17n.rb (revision 15243) +++ test/ruby/test_m17n.rb (revision 15244) @@ -962,4 +962,55 @@ assert_equal(Encoding::ASCII_8BIT, v.encoding) } end + + def test_empty_string + assert_equal("".encoding, Encoding::US_ASCII) + end + + def test_nil_to_s + assert_equal(nil.to_s.encoding, Encoding::US_ASCII) + end + + def test_nil_inspect + assert_equal(nil.inspect.encoding, Encoding::US_ASCII) + end + + def test_true_to_s + assert_equal(true.to_s.encoding, Encoding::US_ASCII) + end + + def test_false_to_s + assert_equal(false.to_s.encoding, Encoding::US_ASCII) + end + + def test_fixnum_to_s + assert_equal(1.to_s.encoding, Encoding::US_ASCII) + end + + def test_float_to_s + assert_equal(1.0.to_s.encoding, Encoding::US_ASCII) + end + + def test_bignum_to_s + assert_equal((1<<129).to_s.encoding, Encoding::US_ASCII) + end + + def test_array_to_s + assert_equal([].to_s.encoding, Encoding::US_ASCII) + assert_equal([nil].to_s.encoding, Encoding::US_ASCII) + assert_equal([1].to_s.encoding, Encoding::US_ASCII) + assert_equal([""].to_s.encoding, Encoding::US_ASCII) + assert_equal(["a"].to_s.encoding, Encoding::US_ASCII) + assert_equal([nil,1,"","a","\x20",[]].to_s.encoding, Encoding::US_ASCII) + end + + def test_hash_to_s + assert_equal({}.to_s.encoding, Encoding::US_ASCII) + assert_equal({1=>nil,"foo"=>""}.to_s.encoding, Encoding::US_ASCII) + end + + def test_encoding_to_s + assert_equal(Encoding::US_ASCII.to_s.encoding, Encoding::US_ASCII) + assert_equal(Encoding::US_ASCII.inspect.encoding, Encoding::US_ASCII) + end end Index: file.c =================================================================== --- file.c (revision 15243) +++ file.c (revision 15244) @@ -1632,7 +1632,7 @@ t = "unknown"; } - return rb_str_new2(t); + return rb_usascii_str_new2(t); } /* @@ -2917,7 +2917,7 @@ p = root; } if (p == name) - return rb_str_new2("."); + return rb_usascii_str_new2("."); #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && isdirsep(*(name + 2))) { const char *top = skiproot(name + 2); @@ -2965,7 +2965,7 @@ e = strrchr(p, '.'); /* get the last dot of the last component */ if (!e || e == p || !e[1]) /* no dot, or the only dot is first or end? */ - return rb_str_new2(""); + return rb_str_new(0, 0); extname = rb_str_new(e, chompdirsep(e) - e); /* keep the dot, too! */ OBJ_INFECT(extname, fname); return extname; @@ -3014,7 +3014,7 @@ file_inspect_join(VALUE ary, VALUE argp, int recur) { VALUE *arg = (VALUE *)argp; - if (recur) return rb_str_new2("[...]"); + if (recur) return rb_usascii_str_new2("[...]"); return rb_file_join(arg[0], arg[1]); } @@ -4516,14 +4516,14 @@ rb_define_singleton_method(rb_cFile, "extname", rb_file_s_extname, 1); rb_define_singleton_method(rb_cFile, "path", rb_file_s_path, 1); - separator = rb_obj_freeze(rb_str_new2("/")); + separator = rb_obj_freeze(rb_usascii_str_new2("/")); rb_define_const(rb_cFile, "Separator", separator); rb_define_const(rb_cFile, "SEPARATOR", separator); rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1); rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2); #ifdef DOSISH - rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_str_new2("\\"))); + rb_define_const(rb_cFile, "ALT_SEPARATOR", rb_obj_freeze(rb_usascii_str_new2("\\"))); #else rb_define_const(rb_cFile, "ALT_SEPARATOR", Qnil); #endif -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/