[前][次][番号順一覧][スレッド一覧]

ruby-changes:3728

From: ko1@a...
Date: Thu, 24 Jan 2008 20:44:00 +0900 (JST)
Subject: [ruby-changes:3728] usa - Ruby:r15217 (trunk): * parser.y (parser_str_new): automatically update string literal's

usa	2008-01-24 20:43:40 +0900 (Thu, 24 Jan 2008)

  New Revision: 15217

  Modified files:
    trunk/ChangeLog
    trunk/parse.y

  Log:
    * parser.y (parser_str_new): automatically update string literal's
      encoding from US-ASCII to ASCII-8BIT when script encoding is US-ASCII
      and the string includes non-ascii bytes. [ruby-dev:33348]
    
    * parser.y (reg_fragment_check_gen, reg_compile_gen): automatically
      update regexp literal's encoding from US-ASCII to ASCII-8BIT when
      script encoding is US-ASCII, the regexp has no kcode option and the
      regexp includes non-ascii bytes. [ruby-dev:33353]
    


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=15217&r2=15216&diff_format=u
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=15217&r2=15216&diff_format=u

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 15216)
+++ ChangeLog	(revision 15217)
@@ -1,3 +1,14 @@
+Thu Jan 24 20:21:07 2008  NAKAMURA Usaku  <usa@r...>
+
+	* parser.y (parser_str_new): automatically update string literal's
+	  encoding from US-ASCII to ASCII-8BIT when script encoding is US-ASCII
+	  and the string includes non-ascii bytes. [ruby-dev:33348]
+
+	* parser.y (reg_fragment_check_gen, reg_compile_gen): automatically
+	  update regexp literal's encoding from US-ASCII to ASCII-8BIT when
+	  script encoding is US-ASCII, the regexp has no kcode option and the
+	  regexp includes non-ascii bytes. [ruby-dev:33353]
+
 Thu Jan 24 19:36:22 2008  Nobuyoshi Nakada  <nobu@r...>
 
 	* lib/uri/generic.rb (URI::Generic::inspect): use Kernel#to_s instead
Index: parse.y
===================================================================
--- parse.y	(revision 15216)
+++ parse.y	(revision 15217)
@@ -270,7 +270,7 @@
 #define STR_NEW(p,n) rb_enc_str_new((p),(n),parser->enc)
 #define STR_NEW0() rb_enc_str_new(0,0,rb_usascii_encoding())
 #define STR_NEW2(p) rb_enc_str_new((p),strlen(p),parser->enc)
-#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func))
+#define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
 #define STR_ENC(m) ((m)?parser->enc:rb_enc_from_index(0))
 #define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
 #define TOK_INTERN(mb) rb_intern3(tok(), toklen(), STR_ENC(mb))
@@ -456,8 +456,9 @@
 #define RE_OPTION_ENCODING_SHIFT 8
 #define RE_OPTION_ENCODING(e) (((e)&0xff)<<RE_OPTION_ENCODING_SHIFT)
 #define RE_OPTION_ENCODING_IDX(o) (((o)>>RE_OPTION_ENCODING_SHIFT)&0xff)
-#define RE_OPTION_ENCODING_NONE(o) ((o)&32)
+#define RE_OPTION_ENCODING_NONE(o) ((o)&RE_OPTION_ARG_ENCODING_NONE)
 #define RE_OPTION_MASK  0xff
+#define RE_OPTION_ARG_ENCODING_NONE 32
 
 #define NODE_STRTERM NODE_ZARRAY	/* nothing to gc */
 #define NODE_HEREDOC NODE_ARRAY 	/* 1, 3 to gc */
@@ -4838,15 +4839,18 @@
 };
 
 static VALUE
-parser_str_new(const char *p, long n, rb_encoding *enc, int func)
+parser_str_new(const char *p, long n, rb_encoding *enc, int func, rb_encoding *enc0)
 {
     VALUE str;
 
     str = rb_enc_str_new(p, n, enc);
-    if (!(func & STR_FUNC_REGEXP) &&
-        rb_enc_asciicompat(enc) &&
-        rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
-        rb_enc_associate(str, rb_usascii_encoding());
+    if (!(func & STR_FUNC_REGEXP) && rb_enc_asciicompat(enc)) {
+	if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
+	    rb_enc_associate(str, rb_usascii_encoding());
+	}
+	else if (enc0 == rb_usascii_encoding()) {
+	    rb_enc_associate(str, rb_ascii8bit_encoding());
+	}
     }
 
     return str;
@@ -8488,6 +8492,10 @@
 reg_fragment_check_gen(struct parser_params* parser, VALUE str, int options)
 {
     VALUE err;
+    if (!RE_OPTION_ENCODING_IDX(options) &&
+	parser->enc == rb_usascii_encoding()) {
+	options |= RE_OPTION_ARG_ENCODING_NONE;
+    }
     reg_fragment_setenc_gen(parser, str, options);
     err = rb_reg_check_preprocess(str);
     if (err != Qnil) {
@@ -8581,6 +8589,10 @@
 {
     VALUE re;
 
+    if (!RE_OPTION_ENCODING_IDX(options) &&
+	parser->enc == rb_usascii_encoding()) {
+	options |= RE_OPTION_ARG_ENCODING_NONE;
+    }
     reg_fragment_setenc(str, options);
     re = rb_reg_compile(str, options & RE_OPTION_MASK);
     if (NIL_P(re)) {

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]