ruby-changes:3429
From: ko1@a...
Date: 7 Jan 2008 11:49:25 +0900
Subject: [ruby-changes:3429] akr - Ruby:r14922 (trunk): * encoding.c (rb_enc_internal_get_index): extracted from
akr 2008-01-07 11:49:01 +0900 (Mon, 07 Jan 2008)
New Revision: 14922
Modified files:
trunk/ChangeLog
trunk/encoding.c
trunk/include/ruby/encoding.h
trunk/marshal.c
trunk/parse.y
trunk/re.c
trunk/string.c
trunk/test/ruby/test_m17n.rb
Log:
* encoding.c (rb_enc_internal_get_index): extracted from
rb_enc_get_index.
(rb_enc_internal_set_index): extracted from rb_enc_associate_index
* include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX.
(ENCODING_GET): ditto.
(ENCODING_IS_ASCII8BIT): defined.
(ENCODING_CODERANGE_SET): defined.
* re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT.
* string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT.
* parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT.
* marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT.
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/parse.y?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/marshal.c?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/re.c?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=14922&r2=14921&diff_format=u
http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14922&r2=14921&diff_format=u
Index: encoding.c
===================================================================
--- encoding.c (revision 14921)
+++ encoding.c (revision 14922)
@@ -436,7 +436,34 @@
return id_encoding;
}
+int
+rb_enc_internal_get_index(VALUE obj)
+{
+ int i;
+
+ i = ENCODING_GET_INLINED(obj);
+ if (i == ENCODING_INLINE_MAX) {
+ VALUE iv;
+
+ iv = rb_ivar_get(obj, rb_id_encoding());
+ i = NUM2INT(iv);
+ }
+ return i;
+}
+
void
+rb_enc_internal_set_index(VALUE obj, int idx)
+{
+ if (idx < ENCODING_INLINE_MAX) {
+ ENCODING_SET_INLINED(obj, idx);
+ return;
+ }
+ ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
+ rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
+ return;
+}
+
+void
rb_enc_associate_index(VALUE obj, int idx)
{
enc_check_capable(obj);
@@ -444,13 +471,7 @@
!rb_enc_asciicompat(rb_enc_from_index(idx))) {
ENC_CODERANGE_CLEAR(obj);
}
- if (idx < ENCODING_INLINE_MAX) {
- ENCODING_SET(obj, idx);
- return;
- }
- ENCODING_SET(obj, ENCODING_INLINE_MAX);
- rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
- return;
+ rb_enc_internal_set_index(obj, idx);
}
int
@@ -476,17 +497,8 @@
int
rb_enc_get_index(VALUE obj)
{
- int i;
-
if (!enc_capable(obj)) return -1;
- i = ENCODING_GET(obj);
- if (i == ENCODING_INLINE_MAX) {
- VALUE iv;
-
- iv = rb_ivar_get(obj, rb_id_encoding());
- i = NUM2INT(iv);
- }
- return i;
+ return rb_enc_internal_get_index(obj);
}
rb_encoding*
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h (revision 14921)
+++ include/ruby/encoding.h (revision 14922)
@@ -22,12 +22,28 @@
#define ENCODING_INLINE_MAX 1023
#define ENCODING_SHIFT (FL_USHIFT+10)
#define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT)
-#define ENCODING_SET(obj,i) do {\
+
+#define ENCODING_SET_INLINED(obj,i) do {\
RBASIC(obj)->flags &= ~ENCODING_MASK;\
- RBASIC(obj)->flags |= i << ENCODING_SHIFT;\
+ RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\
} while (0)
-#define ENCODING_GET(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+#define ENCODING_SET(obj,i) do {\
+ VALUE rb_encoding_set_obj = (obj); \
+ int encoding_set_enc_index = (i); \
+ if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
+ ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
+ else \
+ rb_enc_internal_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
+} while (0)
+#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+#define ENCODING_GET(obj) \
+ (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
+ ENCODING_GET_INLINED(obj) : \
+ rb_enc_internal_get_index(obj))
+
+#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
+
#define ENC_CODERANGE_MASK (FL_USER8|FL_USER9)
#define ENC_CODERANGE_UNKNOWN 0
#define ENC_CODERANGE_7BIT FL_USER8
@@ -39,6 +55,12 @@
(RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
+#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
+ do { \
+ VALUE rb_encoding_coderange_obj = (obj); \
+ ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
+ ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
+ } while (0)
typedef OnigEncodingType rb_encoding;
@@ -56,6 +78,8 @@
void rb_enc_associate_index(VALUE, int);
void rb_enc_associate(VALUE, rb_encoding*);
void rb_enc_copy(VALUE dst, VALUE src);
+int rb_enc_internal_get_index(VALUE obj);
+void rb_enc_internal_set_index(VALUE obj, int encindex);
VALUE rb_enc_str_new(const char*, long, rb_encoding*);
VALUE rb_enc_reg_new(const char*, long, rb_encoding*, int);
Index: re.c
===================================================================
--- re.c (revision 14921)
+++ re.c (revision 14922)
@@ -937,7 +937,7 @@
static VALUE
rb_reg_fixed_encoding_p(VALUE re)
{
- if (ENCODING_GET(re) != 0 || FL_TEST(re, KCODE_FIXED))
+ if (!ENCODING_IS_ASCII8BIT(re) || FL_TEST(re, KCODE_FIXED))
return Qtrue;
else
return Qfalse;
Index: ChangeLog
===================================================================
--- ChangeLog (revision 14921)
+++ ChangeLog (revision 14922)
@@ -1,3 +1,22 @@
+Mon Jan 7 11:44:45 2008 Tanaka Akira <akr@f...>
+
+ * encoding.c (rb_enc_internal_get_index): extracted from
+ rb_enc_get_index.
+ (rb_enc_internal_set_index): extracted from rb_enc_associate_index
+
+ * include/ruby/encoding.h (ENCODING_SET): work over ENCODING_INLINE_MAX.
+ (ENCODING_GET): ditto.
+ (ENCODING_IS_ASCII8BIT): defined.
+ (ENCODING_CODERANGE_SET): defined.
+
+ * re.c (rb_reg_fixed_encoding_p): use ENCODING_IS_ASCII8BIT.
+
+ * string.c (rb_enc_str_buf_cat): use ENCODING_IS_ASCII8BIT.
+
+ * parse.y (reg_fragment_setenc_gen): use ENCODING_IS_ASCII8BIT.
+
+ * marshal.c (has_ivars): use ENCODING_IS_ASCII8BIT.
+
Mon Jan 7 02:14:07 2008 Tanaka Akira <akr@f...>
* string.c (coderange_scan): avoid rb_enc_to_index.
Index: string.c
===================================================================
--- string.c (revision 14921)
+++ string.c (revision 14922)
@@ -1063,7 +1063,7 @@
rb_encoding *str_enc = rb_enc_get(str);
rb_encoding *res_enc;
int str_cr, ptr_cr, res_cr;
- int str_a8 = ENCODING_GET(str) == 0;
+ int str_a8 = ENCODING_IS_ASCII8BIT(str);
int ptr_a8 = ptr_enc == rb_ascii8bit_encoding();
str_cr = ENC_CODERANGE(str);
Index: parse.y
===================================================================
--- parse.y (revision 14921)
+++ parse.y (revision 14922)
@@ -8470,7 +8470,7 @@
if (c) {
int opt, idx;
rb_char_to_option_kcode(c, &opt, &idx);
- if (idx != ENCODING_GET(str) && ENCODING_GET(str) &&
+ if (idx != ENCODING_GET(str) && !ENCODING_IS_ASCII8BIT(str) &&
rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
compile_error(PARSER_ARG
"regexp encoding option '%c' differs from source encoding '%s'",
Index: marshal.c
===================================================================
--- marshal.c (revision 14921)
+++ marshal.c (revision 14922)
@@ -526,7 +526,7 @@
st_data_t num;
int hasiv = 0;
#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \
- (!SPECIAL_CONST_P(obj) && ENCODING_GET(obj)))
+ (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
if (limit == 0) {
rb_raise(rb_eArgError, "exceed depth limit");
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb (revision 14921)
+++ test/ruby/test_m17n.rb (revision 14922)
@@ -370,6 +370,15 @@
assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
end
+ def test_regexp_windows_31j
+ begin
+ Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp")
+ rescue ArgumentError
+ err = $!
+ end
+ assert_match(/windows-31j/i, err.message)
+ end
+
def test_regexp_embed
r = eval(e("/\xc2\xa1/"))
assert_raise(ArgumentError) { eval(s("/\xc2\xa1\#{r}/s")) }
--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml