[前][次][番号順一覧][スレッド一覧]

ruby-changes:2839

From: ko1@a...
Date: 19 Dec 2007 19:20:41 +0900
Subject: [ruby-changes:2839] naruse - Ruby:r14330 (trunk): * ext/nkf/nkf-utf8/nkf.c: Updated.

naruse	2007-12-19 19:19:38 +0900 (Wed, 19 Dec 2007)

  New Revision: 14330

  Modified files:
    trunk/ChangeLog
    trunk/ext/nkf/lib/kconv.rb
    trunk/ext/nkf/nkf-utf8/nkf.c
    trunk/ext/nkf/nkf.c

  Log:
    * ext/nkf/nkf-utf8/nkf.c: Updated.
    
    * ext/nkf/nkf.c (rb_nkf_enc_get): added.
      (find encoding or replicate default encoding)
    
    * ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant.
    
    * ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding.
    
    * ext/nkf/lib/kconv.rb: refactoring.


  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14330&r2=14329
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/lib/kconv.rb?r1=14330&r2=14329
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/nkf.c?r1=14330&r2=14329
  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/nkf-utf8/nkf.c?r1=14330&r2=14329

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 14329)
+++ ChangeLog	(revision 14330)
@@ -1,3 +1,16 @@
+Wed Dec 19 19:18:06 2007  NARUSE, Yui <naruse@r...>
+
+	* ext/nkf/nkf-utf8/nkf.c: Updated.
+
+	* ext/nkf/nkf.c (rb_nkf_enc_get): added.
+	  (find encoding or replicate default encoding)
+
+	* ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant.
+
+	* ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding.
+
+	* ext/nkf/lib/kconv.rb: refactoring.
+
 Wed Dec 19 19:11:08 2007  Yukihiro Matsumoto  <matz@r...>
 
 	* bignum.c (rb_big_mul0): blocking check for bigger numbers.
Index: ext/nkf/nkf.c
===================================================================
--- ext/nkf/nkf.c	(revision 14329)
+++ ext/nkf/nkf.c	(revision 14330)
@@ -61,6 +61,13 @@
 #include "nkf-utf8/utf8tbl.c"
 #include "nkf-utf8/nkf.c"
 
+rb_encoding* rb_nkf_enc_get(const char *name)
+{
+    int idx = rb_enc_find_index(name);
+    if (idx < 0) idx = rb_enc_replicate(name, rb_default_encoding());
+    return rb_enc_from_index(idx);
+}
+
 int nkf_split_options(const char *arg)
 {
     int count = 0;
@@ -126,16 +133,13 @@
 static VALUE
 rb_nkf_convert(VALUE obj, VALUE opt, VALUE src)
 {
-    char *opt_ptr, *opt_end;
-    volatile VALUE v;
-    char *encname;
-    int idx;
+    rb_encoding *to_enc;
+    const char *to_e;
+    int to_encidx;
 
     reinit();
     StringValue(opt);
-    opt_ptr = RSTRING_PTR(opt);
-    opt_end = opt_ptr + RSTRING_LEN(opt);
-    nkf_split_options(opt_ptr);
+    nkf_split_options(RSTRING_PTR(opt));
 
     incsize = INCSIZE;
 
@@ -144,7 +148,6 @@
     input = (unsigned char *)RSTRING_PTR(src);
     i_len = RSTRING_LEN(src);
     result = rb_str_new(0, i_len*3 + 10);
-    v = result;
 
     output_ctr = 0;
     output     = (unsigned char *)RSTRING_PTR(result);
@@ -154,15 +157,9 @@
     kanji_convert(NULL);
     rb_str_set_len(result, output_ctr);
     OBJ_INFECT(result, src);
-    encname = nkf_enc_name(output_encoding);
-    fprintf(stderr, "%s\n", encname);
-    idx = rb_enc_find_index(encname);
-    fprintf(stderr, "%d\n", idx);
-    if (idx <= 0) {
-	idx = rb_enc_replicate(encname, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII)));
-	fprintf(stderr, "%d\n", idx);
-    }
-    rb_enc_associate_index(result, idx);
+
+    rb_enc_associate(result, rb_nkf_enc_get(nkf_enc_name(output_encoding)));
+
     return result;
 }
 
@@ -178,9 +175,6 @@
 static VALUE
 rb_nkf_guess(VALUE obj, VALUE src)
 {
-    char* codename;
-    rb_encoding* enc;
-
     reinit();
 
     input_ctr = 0;
@@ -192,13 +186,7 @@
     kanji_convert( NULL );
     guess_f = FALSE;
 
-    codename = get_guessed_code();
-    enc = rb_enc_find(codename);
-    if (enc <= 0) {
-	int idx = rb_enc_replicate(codename, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII)));
-	enc = rb_enc_from_index(idx);
-    }
-    return rb_enc_from_encoding(enc);
+    return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code()));
 }
 
 
@@ -483,6 +471,18 @@
     rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1);
     rb_define_alias(rb_singleton_class(mNKF), "guess", "guess");
 
+    rb_define_const(mNKF, "AUTO",	Qnil);
+    rb_define_const(mNKF, "NOCONV",	Qnil);
+    rb_define_const(mNKF, "UNKNOWN",	Qnil);
+    rb_define_const(mNKF, "BINARY",	rb_enc_from_encoding(rb_nkf_enc_get("BINARY")));
+    rb_define_const(mNKF, "ASCII",	rb_enc_from_encoding(rb_nkf_enc_get("US_ASCII")));
+    rb_define_const(mNKF, "JIS",	rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP")));
+    rb_define_const(mNKF, "EUC",	rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP")));
+    rb_define_const(mNKF, "SJIS",	rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS")));
+    rb_define_const(mNKF, "UTF8",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-8")));
+    rb_define_const(mNKF, "UTF16",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-16")));
+    rb_define_const(mNKF, "UTF32",	rb_enc_from_encoding(rb_nkf_enc_get("UTF-32")));
+
     /* Full version string of nkf */
     rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION));
     /* Version of nkf */
Index: ext/nkf/nkf-utf8/nkf.c
===================================================================
--- ext/nkf/nkf-utf8/nkf.c	(revision 14329)
+++ ext/nkf/nkf-utf8/nkf.c	(revision 14330)
@@ -320,6 +320,7 @@
     {"ISO-2022-JP-1",		ISO_2022_JP_1},
     {"ISO-2022-JP-3",		ISO_2022_JP_3},
     {"SHIFT_JIS",		SHIFT_JIS},
+    {"SJIS",			SHIFT_JIS},
     {"WINDOWS-31J",		WINDOWS_31J},
     {"CSWINDOWS31J",		WINDOWS_31J},
     {"CP932",			WINDOWS_31J},
Index: ext/nkf/lib/kconv.rb
===================================================================
--- ext/nkf/lib/kconv.rb	(revision 14329)
+++ ext/nkf/lib/kconv.rb	(revision 14330)
@@ -18,9 +18,9 @@
   #
   # Public Constants
   #
-  
+
   #Constant of Encoding
-  
+
   # Auto-Detect
   AUTO = NKF::AUTO
   # ISO-2022-JP
@@ -45,12 +45,10 @@
   UNKNOWN = NKF::UNKNOWN
 
   #
+  #
   # Private Constants
   #
   
-  # Revision of kconv.rb
-  REVISION = %q$Revision$
-  
   #Regexp of Encoding
   
   # Regexp of Shift_JIS string (private constant)
@@ -83,7 +81,7 @@
   #
   
   # call-seq:
-  #    Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
+  #    Kconv.kconv(str, to_enc, from_enc=nil)
   #
   # Convert <code>str</code> to out_code.
   # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
@@ -92,42 +90,11 @@
   # This method decode MIME encoded string and
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want to decode them, use NKF.nkf.
-  def kconv(str, out_code, in_code = AUTO)
-    opt = '-'
-    case in_code
-    when ::NKF::JIS
-      opt << 'J'
-    when ::NKF::EUC
-      opt << 'E'
-    when ::NKF::SJIS
-      opt << 'S'
-    when ::NKF::UTF8
-      opt << 'W'
-    when ::NKF::UTF16
-      opt << 'W16'
-    when ::NKF::UTF32
-      opt << 'W32'
-    end
+  def kconv(str, to_enc, from_enc=nil)
+    opt = ''
+    opt += ' --ic=' + from_enc.name if from_enc
+    opt += ' --oc=' + to_enc.name if to_enc
 
-    case out_code
-    when ::NKF::JIS
-      opt << 'j'
-    when ::NKF::EUC
-      opt << 'e'
-    when ::NKF::SJIS
-      opt << 's'
-    when ::NKF::UTF8
-      opt << 'w'
-    when ::NKF::UTF16
-      opt << 'w16'
-    when ::NKF::UTF32
-      opt << 'w32'
-    when ::NKF::NOCONV
-      return str
-    end
-
-    opt = '' if opt == '-'
-
     ::NKF::nkf(opt, str)
   end
   module_function :kconv
@@ -146,7 +113,7 @@
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want it, use NKF.nkf('-jxm0', str).
   def tojis(str)
-    ::NKF::nkf('-jm', str).force_encoding("iso-2022-JP")
+    ::NKF::nkf('-jm', str)
   end
   module_function :tojis
 
@@ -160,7 +127,7 @@
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want it, use NKF.nkf('-exm0', str).
   def toeuc(str)
-    ::NKF::nkf('-em', str).force_encoding("EUC-JP")
+    ::NKF::nkf('-em', str)
   end
   module_function :toeuc
 
@@ -174,7 +141,7 @@
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want it, use NKF.nkf('-sxm0', str).
   def tosjis(str)
-    ::NKF::nkf('-sm', str).force_encoding("Shift_JIS")
+    ::NKF::nkf('-sm', str)
   end
   module_function :tosjis
 
@@ -188,7 +155,7 @@
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want it, use NKF.nkf('-wxm0', str).
   def toutf8(str)
-    ::NKF::nkf('-wm', str).force_encoding("UTF-8")
+    ::NKF::nkf('-wm', str)
   end
   module_function :toutf8
 
@@ -227,22 +194,13 @@
   # call-seq:
   #    Kconv.guess(str)   -> integer
   #
-  # Guess input encoding by NKF.guess2
+  # Guess input encoding by NKF.guess
   def guess(str)
     ::NKF::guess(str)
   end
   module_function :guess
 
-  # call-seq:
-  #    Kconv.guess_old(str)   -> integer
   #
-  # Guess input encoding by NKF.guess1
-  def guess_old(str)
-    ::NKF::guess1(str)
-  end
-  module_function :guess_old
-
-  #
   # isEncoding
   #
 
@@ -283,7 +241,7 @@
 
 class String
   # call-seq:
-  #    String#kconv(out_code, in_code = Kconv::AUTO)
+  #    String#kconv(to_enc, from_enc)
   #
   # Convert <code>self</code> to out_code.
   # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
@@ -292,8 +250,8 @@
   # This method decode MIME encoded string and
   # convert halfwidth katakana to fullwidth katakana.
   # If you don't want to decode them, use NKF.nkf.
-  def kconv(out_code, in_code=Kconv::AUTO)
-    Kconv::kconv(self, out_code, in_code)
+  def kconv(to_enc, from_enc=nil)
+    Kconv::kconv(self, to_enc, from_enc)
   end
   
   #

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml

[前][次][番号順一覧][スレッド一覧]