[前][次][番号順一覧][スレッド一覧]

ruby-changes:2628

From: ko1@a...
Date: 6 Dec 2007 18:28:48 +0900
Subject: [ruby-changes:2628] akr - Ruby:r14119 (trunk): * encoding.c (rb_enc_precise_mbclen): new function for mbclen with

akr	2007-12-06 18:28:26 +0900 (Thu, 06 Dec 2007)

  New Revision: 14119

  Modified files:
    trunk/ChangeLog
    trunk/enc/euc_jp.c
    trunk/enc/sjis.c
    trunk/enc/utf8.c
    trunk/encoding.c
    trunk/include/ruby/encoding.h
    trunk/include/ruby/oniguruma.h
    trunk/io.c
    trunk/string.c
    trunk/test/ruby/test_m17n.rb

  Log:
    * encoding.c (rb_enc_precise_mbclen): new function for mbclen with
      validation.
    
    * include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
      (MBCLEN_CHARFOUND): new macro.
      (MBCLEN_INVALID): new macro.
      (MBCLEN_NEEDMORE): new macro.
    
    * include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
      by precise_mbc_enc_len.
      (ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
      (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
      (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
      (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
      (ONIGENC_MBCLEN_CHARFOUND): new macro.
      (ONIGENC_MBCLEN_INVALID): new macro.
      (ONIGENC_MBCLEN_NEEDMORE): new macro.
      (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
    
    * enc/euc_jp.c: validation implemented.
    
    * enc/sjis.c: ditto.
    
    * enc/utf8.c: ditto.
    
    * string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
      encoding.
      (rb_str_valid_encoding_p): new method String#valid_encoding?.
    
    * io.c (rb_io_getc): use rb_enc_precise_mbclen.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/oniguruma.h?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/utf8.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/string.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/io.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/include/ruby/encoding.h?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/euc_jp.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/enc/sjis.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/encoding.c?r1=14119&r2=14118
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/test/ruby/test_m17n.rb?r1=14119&r2=14118

Index: encoding.c
===================================================================
--- encoding.c	(revision 14118)
+++ encoding.c	(revision 14119)
@@ -495,6 +495,12 @@
 }
 
 int
+rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
+{
+    return ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
+}
+
+int
 rb_enc_codelen(int c, rb_encoding *enc)
 {
     int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
Index: include/ruby/encoding.h
===================================================================
--- include/ruby/encoding.h	(revision 14118)
+++ include/ruby/encoding.h	(revision 14119)
@@ -68,9 +68,15 @@
 #define rb_enc_mbminlen(enc) (enc)->min_enc_len
 #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
 
-/* ptr,encoding -> mbclen */
+/* ptr,endptr,encoding -> mbclen */
 int rb_enc_mbclen(const char*, const char *, rb_encoding*);
 
+/* ptr,endptr,encoding -> chlen, invalid or needmore */
+int rb_enc_precise_mbclen(const char*, const char *, rb_encoding*);
+#define MBCLEN_CHARFOUND(ret)     ONIGENC_MBCLEN_CHARFOUND(ret)
+#define MBCLEN_INVALID(ret)       ONIGENC_MBCLEN_INVALID(ret)
+#define MBCLEN_NEEDMORE(ret)      ONIGENC_MBCLEN_NEEDMORE(ret)
+
 /* code,encoding -> codelen */
 int rb_enc_codelen(int, rb_encoding*);
 
Index: include/ruby/oniguruma.h
===================================================================
--- include/ruby/oniguruma.h	(revision 14118)
+++ include/ruby/oniguruma.h	(revision 14119)
@@ -144,7 +144,7 @@
 typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
 
 typedef struct OnigEncodingTypeST {
-  int    (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
+  int    (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
   const char*   name;
   int           max_enc_len;
   int           min_enc_len;
@@ -282,7 +282,32 @@
 #define ONIGENC_STEP_BACK(enc,start,s,n) \
         onigenc_step_back((enc),(start),(s),(n))
 
-#define ONIGENC_MBC_ENC_LEN(enc,p,e)           (enc)->mbc_enc_len(p,e,enc)
+
+#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n)   (n)
+#define ONIGENC_CONSTRUCT_MBCLEN_INVALID()      (-1)
+#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)    (-1-n)
+
+static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; }
+static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; }
+#define ONIGENC_MBCLEN_CHARFOUND(r)     onigenc_mbclen_charfound(r)
+#define ONIGENC_MBCLEN_INVALID(r)       ((r) == -1)
+#define ONIGENC_MBCLEN_NEEDMORE(r)      onigenc_mbclen_needmore(r)
+
+#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e)   (enc)->precise_mbc_enc_len(p,e,enc)
+
+static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
+{
+    int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
+    int r;
+    if (ONIGENC_MBCLEN_INVALID(ret))
+        return 1;
+    else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret)))
+        return e-p+r;
+    else
+        return ONIGENC_MBCLEN_CHARFOUND(ret);
+}
+
+#define ONIGENC_MBC_ENC_LEN(enc,p,e)           onigenc_mbclen_recover(p,e,enc)
 #define ONIGENC_MBC_MAXLEN(enc)               ((enc)->max_enc_len)
 #define ONIGENC_MBC_MAXLEN_DIST(enc)           ONIGENC_MBC_MAXLEN(enc)
 #define ONIGENC_MBC_MINLEN(enc)               ((enc)->min_enc_len)
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 14118)
+++ ChangeLog	(revision 14119)
@@ -1,3 +1,36 @@
+Thu Dec  6 18:22:11 2007  Tanaka Akira  <akr@f...>
+
+	* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
+	  validation.
+
+	* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
+	  (MBCLEN_CHARFOUND): new macro.
+	  (MBCLEN_INVALID): new macro.
+	  (MBCLEN_NEEDMORE): new macro.
+
+	* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
+	  by precise_mbc_enc_len.
+	  (ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
+	  (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
+	  (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
+	  (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
+	  (ONIGENC_MBCLEN_CHARFOUND): new macro.
+	  (ONIGENC_MBCLEN_INVALID): new macro.
+	  (ONIGENC_MBCLEN_NEEDMORE): new macro.
+	  (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
+
+	* enc/euc_jp.c: validation implemented.
+
+	* enc/sjis.c: ditto.
+
+	* enc/utf8.c: ditto.
+
+	* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
+	  encoding.
+	  (rb_str_valid_encoding_p): new method String#valid_encoding?.
+
+	* io.c (rb_io_getc): use rb_enc_precise_mbclen.
+
 Thu Dec  6 01:37:23 2007  Nobuyoshi Nakada  <nobu@r...>
 
 	* regparse.c (i_apply_case_fold): fix for negative character class.  a
Index: enc/euc_jp.c
===================================================================
--- enc/euc_jp.c	(revision 14118)
+++ enc/euc_jp.c	(revision 14119)
@@ -50,10 +50,85 @@
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
 };
 
+typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2 } state_t;
+#define A ACCEPT
+#define F FAILURE
+static const signed char trans[][0x100] = {
+  { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 1, 2,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+  },
+  { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 
+  },
+  { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 
+  },
+
+};
+#undef A
+#undef F
+
 static int
 mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
 {
-  return EncLen_EUCJP[*p];
+  int firstbyte = *p++;
+  state_t s;
+  s = trans[0][firstbyte];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-1);
+  s = trans[s][*p++];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-2);
+  s = trans[s][*p++];
+  return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
+                       ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 }
 
 static OnigCodePoint
Index: enc/utf8.c
===================================================================
--- enc/utf8.c	(revision 14118)
+++ enc/utf8.c	(revision 14119)
@@ -56,13 +56,189 @@
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-  4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
+  4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 };
 
+typedef enum {
+  FAILURE = -2,
+  ACCEPT,
+  S0, S1, S2, S3,
+  S4, S5, S6, S7
+} state_t;
+#define A ACCEPT
+#define F FAILURE
+static const signed char trans[][0x100] = {
+  { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+    /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S3   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S4   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S5   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S6   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+  { /* S7   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 
+  },
+};
+#undef A
+#undef F
+
 static int
 utf8_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
 {
-  return EncLen_UTF8[*p];
+  int firstbyte = *p++;
+  state_t s;
+  s = trans[0][firstbyte];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1);
+  s = trans[s][*p++];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2);
+  s = trans[s][*p++];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3);
+  s = trans[s][*p++];
+  return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
+                       ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 }
 
 static int
Index: enc/sjis.c
===================================================================
--- enc/sjis.c	(revision 14118)
+++ enc/sjis.c	(revision 14119)
@@ -70,10 +70,62 @@
 #define SJIS_ISMB_FIRST(byte)  (EncLen_SJIS[byte] > 1)
 #define SJIS_ISMB_TRAIL(byte)  SJIS_CAN_BE_TRAIL_TABLE[(byte)]
 
+typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t;
+#define A ACCEPT
+#define F FAILURE
+static const signed char trans[][0x100] = {
+  { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F
+  },
+  { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
+    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
+    /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+    /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F
+  }
+};
+#undef A
+#undef F
+
 static int
 mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
 {
-  return EncLen_SJIS[*p];
+  int firstbyte = *p++;
+  state_t s;
+  s = trans[0][firstbyte];
+  if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
+                                  ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_SJIS[firstbyte]-1);
+  s = trans[s][*p++];
+  return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
+                       ONIGENC_CONSTRUCT_MBCLEN_INVALID();
 }
 
 static int
Index: string.c
===================================================================
--- string.c	(revision 14118)
+++ string.c	(revision 14119)
@@ -2919,10 +2919,20 @@
     str_cat_char(result, '"', enc);
     p = RSTRING_PTR(str); pend = RSTRING_END(str);
     while (p < pend) {
-	int c = rb_enc_codepoint(p, pend, enc);
-	int n = rb_enc_codelen(c, enc);
+	int c;
+	int n;
 	int cc;
 
+        n = rb_enc_precise_mbclen(p, pend, enc);
+        if (!MBCLEN_CHARFOUND(n)) {
+            p++;
+            n = 1;
+            goto escape_codepoint;
+        }
+
+	c = rb_enc_codepoint(p, pend, enc);
+	n = rb_enc_codelen(c, enc);
+
 	p += n;
 	if (c == '"'|| c == '\\' ||
 	    (c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
@@ -2954,19 +2964,21 @@
 	    prefix_escape(result, 'e', enc);
 	}
 	else if (rb_enc_isprint(c, enc)) {
-	    char buf[5];
-
-	    rb_enc_mbcput(c, buf, enc);
-	    rb_str_buf_cat(result, buf, n);
+	    rb_str_buf_cat(result, p-n, n);
 	}
 	else {
 	    char buf[5];
-	    char *s = buf;
+	    char *s;
+            char *q;
 
-	    sprintf(buf, "\\%03o", c & 0377);
-	    while (*s) {
-		str_cat_char(result, *s++, enc);
-	    }
+escape_codepoint:
+            for (q = p-n; q < p; q++) {
+                s = buf;
+                sprintf(buf, "\\%03o", *q & 0377);
+                while (*s) {
+                    str_cat_char(result, *s++, enc);
+                }
+            }
 	}
     }
     str_cat_char(result, '"', enc);
@@ -5232,6 +5244,25 @@
     return str;
 }
 
+static VALUE
+rb_str_valid_encoding_p(VALUE str)
+{
+    char *p = RSTRING_PTR(str);
+    char *pend = RSTRING_END(str);
+    rb_encoding *enc = rb_enc_get(str);
+
+    while (p < pend) {
+	int n;
+
+        n = rb_enc_precise_mbclen(p, pend, enc);
+        if (!MBCLEN_CHARFOUND(n)) {
+            return Qfalse;
+        }
+        p += n;
+    }
+    return Qtrue;
+}
+
 /**********************************************************************
  * Document-class: Symbol
  *
@@ -5644,6 +5675,7 @@
 
     rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
     rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
+    rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
 
     id_to_s = rb_intern("to_s");
 
Index: io.c
===================================================================
--- io.c	(revision 14118)
+++ io.c	(revision 14119)
@@ -2127,7 +2127,7 @@
 {
     rb_encoding *enc;
     rb_io_t *fptr;
-    int n, left;
+    int r, n;
     VALUE str;
 
     GetOpenFile(io, fptr);
@@ -2138,22 +2138,30 @@
     if (io_fillbuf(fptr) < 0) {
 	return Qnil;
     }
-    n = rb_enc_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_len, enc);
-    if (n < fptr->rbuf_len) {
+    r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
+    if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) {
 	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
 	fptr->rbuf_off += n;
 	fptr->rbuf_len -= n;
     }
+    else if (MBCLEN_NEEDMORE(r)) {
+	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len);
+        fptr->rbuf_len = 0;
+getc_needmore:
+        if (io_fillbuf(fptr) != -1) {
+            rb_str_cat(str, fptr->rbuf+fptr->rbuf_off, 1);
+            fptr->rbuf_off++;
+            fptr->rbuf_len--;
+            r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc);
+            if (MBCLEN_NEEDMORE(r)) {
+                goto getc_needmore;
+            }
+        }
+    }
     else {
-	str = rb_str_new(0, n);
-	left = fptr->rbuf_len;
-	MEMCPY(RSTRING_PTR(str), fptr->rbuf+fptr->rbuf_off, char, left);
-	if (io_fillbuf(fptr) < 0) {
-	    return Qnil;
-	}
-	MEMCPY(RSTRING_PTR(str)+left, fptr->rbuf, char, n-left);
-	fptr->rbuf_off += left;
-	fptr->rbuf_len -= left;
+	str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1);
+	fptr->rbuf_off++;
+	fptr->rbuf_len--;
     }
     rb_enc_associate(str, enc);
 
Index: test/ruby/test_m17n.rb
===================================================================
--- test/ruby/test_m17n.rb	(revision 14118)
+++ test/ruby/test_m17n.rb	(revision 14119)
@@ -26,43 +26,75 @@
   end
 
   def test_string_mixed_unicode
-    assert_raise(SyntaxError) { eval(a(%{"\xc0\xa0\\u{6666}"})) }
-    assert_raise(SyntaxError) { eval(e(%{"\xc0\xa0\\u{6666}"})) }
-    assert_raise(SyntaxError) { eval(s(%{"\xc0\xa0\\u{6666}"})) }
-    assert_nothing_raised { eval(u(%{"\xc0\xa0\\u{6666}"})) }
-    assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc0\xa0"})) }
-    assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc0\xa0"})) }
-    assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc0\xa0"})) }
-    assert_nothing_raised { eval(u(%{"\\u{6666}\xc0\xa0"})) }
+    assert_raise(SyntaxError) { eval(a(%{"\xc2\xa0\\u{6666}"})) }
+    assert_raise(SyntaxError) { eval(e(%{"\xc2\xa0\\u{6666}"})) }
+    assert_raise(SyntaxError) { eval(s(%{"\xc2\xa0\\u{6666}"})) }
+    assert_nothing_raised { eval(u(%{"\xc2\xa0\\u{6666}"})) }
+    assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa0"})) }
+    assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa0"})) }
+    assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa0"})) }
+    assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa0"})) }
   end
 
+  def test_string_inspect
+    assert_equal('"\376"', e("\xfe").inspect)
+    assert_equal('"\216"', e("\x8e").inspect)
+    assert_equal('"\217"', e("\x8f").inspect)
+    assert_equal('"\217\241"', e("\x8f\xa1").inspect)
+    assert_equal('"\357"', s("\xef").inspect)
+    assert_equal('"\302"', u("\xc2").inspect)
+    assert_equal('"\340\200"', u("\xe0\x80").inspect)
+    assert_equal('"\360\200\200"', u("\xf0\x80\x80").inspect)
+    assert_equal('"\370\200\200\200"', u("\xf8\x80\x80\x80").inspect)
+    assert_equal('"\374\200\200\200\200"', u("\xfc\x80\x80\x80\x80").inspect)
+
+    assert_equal('"\376 "', e("\xfe ").inspect)
+    assert_equal('"\216 "', e("\x8e ").inspect)
+    assert_equal('"\217 "', e("\x8f ").inspect)
+    assert_equal('"\217\241 "', e("\x8f\xa1 ").inspect)
+    assert_equal('"\357 "', s("\xef ").inspect)
+    assert_equal('"\302 "', u("\xc2 ").inspect)
+    assert_equal('"\340\200 "', u("\xe0\x80 ").inspect)
+    assert_equal('"\360\200\200 "', u("\xf0\x80\x80 ").inspect)
+    assert_equal('"\370\200\200\200 "', u("\xf8\x80\x80\x80 ").inspect)
+    assert_equal('"\374\200\200\200\200 "', u("\xfc\x80\x80\x80\x80 ").inspect)
+
+
+    assert_equal(e("\"\\241\x8f\xa1\xa1\""), e("\xa1\x8f\xa1\xa1").inspect)
+
+    assert_equal('"\201."', s("\x81.").inspect)
+    assert_equal(s("\"\x81@\""), s("\x81@").inspect)
+
+    assert_equal('"\374"', u("\xfc").inspect)
+  end
+
   def test_regexp_too_short_multibyte_character
     assert_raise(SyntaxError) { eval('/\xfe/e') }
     assert_raise(SyntaxError) { eval('/\x8e/e') }
     assert_raise(SyntaxError) { eval('/\x8f/e') }
     assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
     assert_raise(SyntaxError) { eval('/\xef/s') }
-    assert_raise(SyntaxError) { eval('/\xc0/u') }
+    assert_raise(SyntaxError) { eval('/\xc2/u') }
     assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
     assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
-    assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
-    assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
+    #assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
+    #assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
 
     # raw 8bit
     assert_raise(SyntaxError) { eval("/\xfe/e") }
-    assert_raise(SyntaxError) { eval("/\xc0/u") }
+    assert_raise(SyntaxError) { eval("/\xc2/u") }
 
     # invalid suffix
-    assert_raise(SyntaxError) { eval('/\xc0\xff/u') }
-    assert_raise(SyntaxError) { eval('/\xc0 /u') }
-    #assert_raise(SyntaxError) { eval('/\xc0\x20/u') }
+    assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
+    assert_raise(SyntaxError) { eval('/\xc2 /u') }
+    #assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
   end
 
   def assert_regexp_generic_encoding(r)
     assert(!r.fixed_encoding?)
     %w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
-      # "\xc0\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Shift_JIS and UTF-8.
-      assert_nothing_raised { r =~ "\xc0\xa1".force_encoding(ename) }
+      # "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Shift_JIS and UTF-8.
+      assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
     }
   end
 
@@ -71,9 +103,9 @@
     %w[ASCII-8BIT EUC-JP Shift_JIS UTF-8].each {|ename|
       enc = Encoding.find(ename)
       if enc == r.encoding
-        assert_nothing_raised { r =~ "\xc0\xa1".force_encoding(enc) }
+        assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
       else
-        assert_raise(ArgumentError) { r =~ "\xc0\xa1".force_encoding(enc) }
+        assert_raise(ArgumentError) { r =~ "\xc2\xa1".force_encoding(enc) }
       end
     }
   end
@@ -115,77 +147,77 @@
       assert_equal(0, r =~ e("a"))
       assert_equal(0, r =~ s("a"))
       assert_equal(0, r =~ u("a"))
-      assert_equal(nil, r =~ a("\xc0\xa1"))
-      assert_equal(nil, r =~ e("\xc0\xa1"))
-      assert_equal(nil, r =~ s("\xc0\xa1"))
-      assert_equal(nil, r =~ u("\xc0\xa1"))
+      assert_equal(nil, r =~ a("\xc2\xa1"))
+      assert_equal(nil, r =~ e("\xc2\xa1"))
+      assert_equal(nil, r =~ s("\xc2\xa1"))
+      assert_equal(nil, r =~ u("\xc2\xa1"))
     }
   end
 
   def test_regexp_ascii
     assert_regexp_fixed_ascii8bit(/a/n)
-    assert_regexp_fixed_ascii8bit(/\xc0\xa1/n)
-    assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/})))
-    assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/n})))
-    assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc0\xa1/})))
+    assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
+    assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
+    assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
+    assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
 
     [/a/n].each {|r|
       assert_equal(0, r =~ a("a"))
       assert_equal(0, r =~ e("a"))
       assert_equal(0, r =~ s("a"))
       assert_equal(0, r =~ u("a"))
-      assert_equal(nil, r =~ a("\xc0\xa1"))
-      assert_raise(ArgumentError) { r =~ e("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
+      assert_equal(nil, r =~ a("\xc2\xa1"))
+      assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
     }
 
-    [/\xc0\xa1/n, eval(a(%{/\xc0\xa1/})), eval(a(%{/\xc0\xa1/n}))].each {|r|
+    [/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
       assert_equal(nil, r =~ a("a"))
       assert_equal(nil, r =~ e("a"))
       assert_equal(nil, r =~ s("a"))
       assert_equal(nil, r =~ u("a"))
-      assert_equal(0, r =~ a("\xc0\xa1"))
-      assert_raise(ArgumentError) { r =~ e("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
+      assert_equal(0, r =~ a("\xc2\xa1"))
+      assert_raise(ArgumentError) { r =~ e("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
     }
   end
 
   def test_regexp_euc
     assert_regexp_fixed_eucjp(/a/e)
-    assert_regexp_fixed_eucjp(/\xc0\xa1/e)
-    assert_regexp_fixed_eucjp(eval(e(%{/\xc0\xa1/})))
-    assert_regexp_fixed_eucjp(eval(e(%q{/\xc0\xa1/})))
+    assert_regexp_fixed_eucjp(/\xc2\xa1/e)
+    assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
+    assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
 
     [/a/e].each {|r|
       assert_equal(0, r =~ a("a"))
       assert_equal(0, r =~ e("a"))
       assert_equal(0, r =~ s("a"))
       assert_equal(0, r =~ u("a"))
-      assert_raise(ArgumentError) { r =~ a("\xc0\xa1") }
-      assert_equal(nil, r =~ e("\xc0\xa1"))
-      assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
+      assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
+      assert_equal(nil, r =~ e("\xc2\xa1"))
+      assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
     }
 
-    [/\xc0\xa1/e, eval(e(%{/\xc0\xa1/})), eval(e(%q{/\xc0\xa1/}))].each {|r|
+    [/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
       assert_equal(nil, r =~ a("a"))
       assert_equal(nil, r =~ e("a"))
       assert_equal(nil, r =~ s("a"))
       assert_equal(nil, r =~ u("a"))
-      assert_raise(ArgumentError) { r =~ a("\xc0\xa1") }
-      assert_equal(0, r =~ e("\xc0\xa1"))
-      assert_raise(ArgumentError) { r =~ s("\xc0\xa1") }
-      assert_raise(ArgumentError) { r =~ u("\xc0\xa1") }
+      assert_raise(ArgumentError) { r =~ a("\xc2\xa1") }
+      assert_equal(0, r =~ e("\xc2\xa1"))
+      assert_raise(ArgumentError) { r =~ s("\xc2\xa1") }
+      assert_raise(ArgumentError) { r =~ u("\xc2\xa1") }
     }
   end
 
   def test_regexp_sjis
     assert_regexp_fixed_sjis(/a/s)
-    assert_regexp_fixed_sjis(/\xc0\xa1/s)
-    assert_regexp_fixed_sjis(eval(s(%{/\xc0\xa1/})))
-    assert_regexp_fixed_sjis(eval(s(%q{/\xc0\xa1/})))
+    assert_regexp_fixed_sjis(/\xc2\xa1/s)
+    assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
+    assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
   end
 
   def test_begin_end_offset
@@ -223,10 +255,10 @@
     assert_encoding("ASCII-8BIT", Regexp.quote(s("a")).encoding)
     assert_encoding("ASCII-8BIT", Regexp.quote(u("a")).encoding)
 
-    assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc0\xa1")).encoding)
-    assert_encoding("EUC-JP",     Regexp.quote(e("\xc0\xa1")).encoding)
-    assert_encoding("Shift_JIS",  Regexp.quote(s("\xc0\xa1")).encoding)
-    assert_encoding("UTF-8",      Regexp.quote(u("\xc0\xa1")).encoding)
+    assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
+    assert_encoding("EUC-JP",     Regexp.quote(e("\xc2\xa1")).encoding)
+    assert_encoding("Shift_JIS",  Regexp.quote(s("\xc2\xa1")).encoding)
+    assert_encoding("UTF-8",      Regexp.quote(u("\xc2\xa1")).encoding)
   end
 
   def test_union_0
@@ -254,10 +286,10 @@
   end
 
   def test_union_1_nonascii_string
-    assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc0\xa1")))
-    assert_regexp_fixed_eucjp(Regexp.union(e("\xc0\xa1")))
-    assert_regexp_fixed_sjis(Regexp.union(s("\xc0\xa1")))
-    assert_regexp_fixed_utf8(Regexp.union(u("\xc0\xa1")))
+    assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
+    assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
+    assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
+    assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
   end
 
   def test_union_1_regexp
@@ -271,7 +303,7 @@
   def test_union_2
     ary = [
       a(""), e(""), s(""), u(""),
-      a("\xc0\xa1"), e("\xc0\xa1"), s("\xc0\xa1"), u("\xc0\xa1")
+      a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
     ]
     ary.each {|s1|
       ary.each {|s2|
@@ -304,26 +336,26 @@
 
   def test_dynamic_ascii_regexp
     assert_regexp_fixed_ascii8bit(/#{}/n)
-    assert_regexp_fixed_ascii8bit(/#{}\xc0\xa1/n)
-    assert_regexp_fixed_ascii8bit(/\xc0\xa1#{}/n)
-    #assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/s') }
-    #assert_raise(SyntaxError) { s1, s2 = s('\xc0'), s('\xa1'); /#{s1}#{s2}/ }
+    assert_regexp_fixed_ascii8bit(/#{}\xc2\xa1/n)
+    assert_regexp_fixed_ascii8bit(/\xc2\xa1#{}/n)
+    #assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/s') }
+    #assert_raise(SyntaxError) { s1, s2 = s('\xc2'), s('\xa1'); /#{s1}#{s2}/ }
   end
 
   def test_dynamic_eucjp_regexp
     assert_regexp_fixed_eucjp(/#{}/e)
-    assert_regexp_fixed_eucjp(/#{}\xc0\xa1/e)
-    assert_regexp_fixed_eucjp(/\xc0\xa1#{}/e)
-    assert_raise(RegexpError) { eval('/\xc0#{}/e') }
-    assert_raise(RegexpError) { eval('/#{}\xc0/e') }
-    #assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/e') }
-    #assert_raise(SyntaxError) { s1, s2 = e('\xc0'), e('\xa1'); /#{s1}#{s2}/ }
+    assert_regexp_fixed_eucjp(/#{}\xc2\xa1/e)
+    assert_regexp_fixed_eucjp(/\xc2\xa1#{}/e)
+    assert_raise(RegexpError) { eval('/\xc2#{}/e') }
+    assert_raise(RegexpError) { eval('/#{}\xc2/e') }
+    #assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/e') }
+    #assert_raise(SyntaxError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
   end
 
   def test_dynamic_sjis_regexp
     assert_regexp_fixed_sjis(/#{}/s)
-    assert_regexp_fixed_sjis(/#{}\xc0\xa1/s)
-    assert_regexp_fixed_sjis(/\xc0\xa1#{}/s)
+    assert_regexp_fixed_sjis(/#{}\xc2\xa1/s)
+    assert_regexp_fixed_sjis(/\xc2\xa1#{}/s)
     assert_raise(RegexpError) { eval('/\x81#{}/s') }
     assert_raise(RegexpError) { eval('/#{}\x81/s') }
     #assert_raise(SyntaxError) { eval('/\x81#{}\xa1/s') }
@@ -332,49 +364,49 @@
 
   def test_dynamic_utf8_regexp
     assert_regexp_fixed_utf8(/#{}/u)
-    assert_regexp_fixed_utf8(/#{}\xc0\xa1/u)
-    assert_regexp_fixed_utf8(/\xc0\xa1#{}/u)
-    assert_raise(RegexpError) { eval('/\xc0#{}/u') }
-    assert_raise(RegexpError) { eval('/#{}\xc0/u') }
-    #assert_raise(SyntaxError) { eval('/\xc0#{}\xa1/u') }
-    #assert_raise(SyntaxError) { s1, s2 = u('\xc0'), u('\xa1'); /#{s1}#{s2}/ }
+    assert_regexp_fixed_utf8(/#{}\xc2\xa1/u)
+    assert_regexp_fixed_utf8(/\xc2\xa1#{}/u)
+    assert_raise(RegexpError) { eval('/\xc2#{}/u') }
+    assert_raise(RegexpError) { eval('/#{}\xc2/u') }
+    #assert_raise(SyntaxError) { eval('/\xc2#{}\xa1/u') }
+    #assert_raise(SyntaxError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
   end
 
   def test_regexp_mixed_unicode
-    assert_raise(SyntaxError) { eval(a(%{/\xc0\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\xc0\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\xc0\xa0\\u{6666}/})) }
-    assert_nothing_raised { eval(u(%{/\xc0\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc0\xa0/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc0\xa0/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc0\xa0/})) }
-    assert_nothing_raised { eval(u(%{/\\u{6666}\xc0\xa0/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0\\u{6666}/})) }
+    assert_nothing_raised { eval(u(%{/\xc2\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa0/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa0/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa0/})) }
+    assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa0/})) }
 
-    assert_raise(SyntaxError) { eval(a(%{/\\xc0\\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\xc0\\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\xc0\\xa0\\u{6666}/})) }
-    assert_nothing_raised { eval(u(%{/\\xc0\\xa0\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc0\\xa0/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc0\\xa0/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc0\\xa0/})) }
-    assert_nothing_raised { eval(u(%{/\\u{6666}\\xc0\\xa0/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0\\u{6666}/})) }
+    assert_nothing_raised { eval(u(%{/\\xc2\\xa0\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa0/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa0/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa0/})) }
+    assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa0/})) }
 
-    assert_raise(SyntaxError) { eval(a(%{/\xc0\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\xc0\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\xc0\xa0#{}\\u{6666}/})) }
-    assert_nothing_raised { eval(u(%{/\xc0\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc0\xa0/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc0\xa0/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc0\xa0/})) }
-    assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc0\xa0/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\xc2\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\xc2\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\xc2\xa0#{}\\u{6666}/})) }
+    assert_nothing_raised { eval(u(%{/\xc2\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\xc2\xa0/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\xc2\xa0/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\xc2\xa0/})) }
+    assert_nothing_raised { eval(u(%{/\\u{6666}#{}\xc2\xa0/})) }
 
-    assert_raise(SyntaxError) { eval(a(%{/\\xc0\\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\xc0\\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\xc0\\xa0#{}\\u{6666}/})) }
-    assert_nothing_raised { eval(u(%{/\\xc0\\xa0#{}\\u{6666}/})) }
-    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc0\\xa0/})) }
-    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc0\\xa0/})) }
-    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc0\\xa0/})) }
-    assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc0\\xa0/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa0#{}\\u{6666}/})) }
+    assert_nothing_raised { eval(u(%{/\\xc2\\xa0#{}\\u{6666}/})) }
+    assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{}\\xc2\\xa0/})) }
+    assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{}\\xc2\\xa0/})) }
+    assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{}\\xc2\\xa0/})) }
+    assert_nothing_raised { eval(u(%{/\\u{6666}#{}\\xc2\\xa0/})) }
   end
 end

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]