ruby-changes:2839
From: ko1@a...
Date: 19 Dec 2007 19:20:41 +0900
Subject: [ruby-changes:2839] naruse - Ruby:r14330 (trunk): * ext/nkf/nkf-utf8/nkf.c: Updated.
naruse 2007-12-19 19:19:38 +0900 (Wed, 19 Dec 2007) New Revision: 14330 Modified files: trunk/ChangeLog trunk/ext/nkf/lib/kconv.rb trunk/ext/nkf/nkf-utf8/nkf.c trunk/ext/nkf/nkf.c Log: * ext/nkf/nkf-utf8/nkf.c: Updated. * ext/nkf/nkf.c (rb_nkf_enc_get): added. (find encoding or replicate default encoding) * ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant. * ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding. * ext/nkf/lib/kconv.rb: refactoring. http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ChangeLog?r1=14330&r2=14329 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/lib/kconv.rb?r1=14330&r2=14329 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/nkf.c?r1=14330&r2=14329 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi/trunk/ext/nkf/nkf-utf8/nkf.c?r1=14330&r2=14329 Index: ChangeLog =================================================================== --- ChangeLog (revision 14329) +++ ChangeLog (revision 14330) @@ -1,3 +1,16 @@ +Wed Dec 19 19:18:06 2007 NARUSE, Yui <naruse@r...> + + * ext/nkf/nkf-utf8/nkf.c: Updated. + + * ext/nkf/nkf.c (rb_nkf_enc_get): added. + (find encoding or replicate default encoding) + + * ext/nkf/nkf.c (NKF::<ENCODING>): redefine encoding constant. + + * ext/nkf/lib/kconv.rb (Kconv::<ENCODING>): redefined as Encoding. + + * ext/nkf/lib/kconv.rb: refactoring. + Wed Dec 19 19:11:08 2007 Yukihiro Matsumoto <matz@r...> * bignum.c (rb_big_mul0): blocking check for bigger numbers. Index: ext/nkf/nkf.c =================================================================== --- ext/nkf/nkf.c (revision 14329) +++ ext/nkf/nkf.c (revision 14330) @@ -61,6 +61,13 @@ #include "nkf-utf8/utf8tbl.c" #include "nkf-utf8/nkf.c" +rb_encoding* rb_nkf_enc_get(const char *name) +{ + int idx = rb_enc_find_index(name); + if (idx < 0) idx = rb_enc_replicate(name, rb_default_encoding()); + return rb_enc_from_index(idx); +} + int nkf_split_options(const char *arg) { int count = 0; @@ -126,16 +133,13 @@ static VALUE rb_nkf_convert(VALUE obj, VALUE opt, VALUE src) { - char *opt_ptr, *opt_end; - volatile VALUE v; - char *encname; - int idx; + rb_encoding *to_enc; + const char *to_e; + int to_encidx; reinit(); StringValue(opt); - opt_ptr = RSTRING_PTR(opt); - opt_end = opt_ptr + RSTRING_LEN(opt); - nkf_split_options(opt_ptr); + nkf_split_options(RSTRING_PTR(opt)); incsize = INCSIZE; @@ -144,7 +148,6 @@ input = (unsigned char *)RSTRING_PTR(src); i_len = RSTRING_LEN(src); result = rb_str_new(0, i_len*3 + 10); - v = result; output_ctr = 0; output = (unsigned char *)RSTRING_PTR(result); @@ -154,15 +157,9 @@ kanji_convert(NULL); rb_str_set_len(result, output_ctr); OBJ_INFECT(result, src); - encname = nkf_enc_name(output_encoding); - fprintf(stderr, "%s\n", encname); - idx = rb_enc_find_index(encname); - fprintf(stderr, "%d\n", idx); - if (idx <= 0) { - idx = rb_enc_replicate(encname, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII))); - fprintf(stderr, "%d\n", idx); - } - rb_enc_associate_index(result, idx); + + rb_enc_associate(result, rb_nkf_enc_get(nkf_enc_name(output_encoding))); + return result; } @@ -178,9 +175,6 @@ static VALUE rb_nkf_guess(VALUE obj, VALUE src) { - char* codename; - rb_encoding* enc; - reinit(); input_ctr = 0; @@ -192,13 +186,7 @@ kanji_convert( NULL ); guess_f = FALSE; - codename = get_guessed_code(); - enc = rb_enc_find(codename); - if (enc <= 0) { - int idx = rb_enc_replicate(codename, rb_enc_find(rb_enc_name(ONIG_ENCODING_ASCII))); - enc = rb_enc_from_index(idx); - } - return rb_enc_from_encoding(enc); + return rb_enc_from_encoding(rb_nkf_enc_get(get_guessed_code())); } @@ -483,6 +471,18 @@ rb_define_module_function(mNKF, "guess", rb_nkf_guess, 1); rb_define_alias(rb_singleton_class(mNKF), "guess", "guess"); + rb_define_const(mNKF, "AUTO", Qnil); + rb_define_const(mNKF, "NOCONV", Qnil); + rb_define_const(mNKF, "UNKNOWN", Qnil); + rb_define_const(mNKF, "BINARY", rb_enc_from_encoding(rb_nkf_enc_get("BINARY"))); + rb_define_const(mNKF, "ASCII", rb_enc_from_encoding(rb_nkf_enc_get("US_ASCII"))); + rb_define_const(mNKF, "JIS", rb_enc_from_encoding(rb_nkf_enc_get("ISO-2022-JP"))); + rb_define_const(mNKF, "EUC", rb_enc_from_encoding(rb_nkf_enc_get("EUC-JP"))); + rb_define_const(mNKF, "SJIS", rb_enc_from_encoding(rb_nkf_enc_get("Shift_JIS"))); + rb_define_const(mNKF, "UTF8", rb_enc_from_encoding(rb_nkf_enc_get("UTF-8"))); + rb_define_const(mNKF, "UTF16", rb_enc_from_encoding(rb_nkf_enc_get("UTF-16"))); + rb_define_const(mNKF, "UTF32", rb_enc_from_encoding(rb_nkf_enc_get("UTF-32"))); + /* Full version string of nkf */ rb_define_const(mNKF, "VERSION", rb_str_new2(RUBY_NKF_VERSION)); /* Version of nkf */ Index: ext/nkf/nkf-utf8/nkf.c =================================================================== --- ext/nkf/nkf-utf8/nkf.c (revision 14329) +++ ext/nkf/nkf-utf8/nkf.c (revision 14330) @@ -320,6 +320,7 @@ {"ISO-2022-JP-1", ISO_2022_JP_1}, {"ISO-2022-JP-3", ISO_2022_JP_3}, {"SHIFT_JIS", SHIFT_JIS}, + {"SJIS", SHIFT_JIS}, {"WINDOWS-31J", WINDOWS_31J}, {"CSWINDOWS31J", WINDOWS_31J}, {"CP932", WINDOWS_31J}, Index: ext/nkf/lib/kconv.rb =================================================================== --- ext/nkf/lib/kconv.rb (revision 14329) +++ ext/nkf/lib/kconv.rb (revision 14330) @@ -18,9 +18,9 @@ # # Public Constants # - + #Constant of Encoding - + # Auto-Detect AUTO = NKF::AUTO # ISO-2022-JP @@ -45,12 +45,10 @@ UNKNOWN = NKF::UNKNOWN # + # # Private Constants # - # Revision of kconv.rb - REVISION = %q$Revision$ - #Regexp of Encoding # Regexp of Shift_JIS string (private constant) @@ -83,7 +81,7 @@ # # call-seq: - # Kconv.kconv(str, out_code, in_code = Kconv::AUTO) + # Kconv.kconv(str, to_enc, from_enc=nil) # # Convert <code>str</code> to out_code. # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv. @@ -92,42 +90,11 @@ # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want to decode them, use NKF.nkf. - def kconv(str, out_code, in_code = AUTO) - opt = '-' - case in_code - when ::NKF::JIS - opt << 'J' - when ::NKF::EUC - opt << 'E' - when ::NKF::SJIS - opt << 'S' - when ::NKF::UTF8 - opt << 'W' - when ::NKF::UTF16 - opt << 'W16' - when ::NKF::UTF32 - opt << 'W32' - end + def kconv(str, to_enc, from_enc=nil) + opt = '' + opt += ' --ic=' + from_enc.name if from_enc + opt += ' --oc=' + to_enc.name if to_enc - case out_code - when ::NKF::JIS - opt << 'j' - when ::NKF::EUC - opt << 'e' - when ::NKF::SJIS - opt << 's' - when ::NKF::UTF8 - opt << 'w' - when ::NKF::UTF16 - opt << 'w16' - when ::NKF::UTF32 - opt << 'w32' - when ::NKF::NOCONV - return str - end - - opt = '' if opt == '-' - ::NKF::nkf(opt, str) end module_function :kconv @@ -146,7 +113,7 @@ # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-jxm0', str). def tojis(str) - ::NKF::nkf('-jm', str).force_encoding("iso-2022-JP") + ::NKF::nkf('-jm', str) end module_function :tojis @@ -160,7 +127,7 @@ # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-exm0', str). def toeuc(str) - ::NKF::nkf('-em', str).force_encoding("EUC-JP") + ::NKF::nkf('-em', str) end module_function :toeuc @@ -174,7 +141,7 @@ # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-sxm0', str). def tosjis(str) - ::NKF::nkf('-sm', str).force_encoding("Shift_JIS") + ::NKF::nkf('-sm', str) end module_function :tosjis @@ -188,7 +155,7 @@ # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-wxm0', str). def toutf8(str) - ::NKF::nkf('-wm', str).force_encoding("UTF-8") + ::NKF::nkf('-wm', str) end module_function :toutf8 @@ -227,22 +194,13 @@ # call-seq: # Kconv.guess(str) -> integer # - # Guess input encoding by NKF.guess2 + # Guess input encoding by NKF.guess def guess(str) ::NKF::guess(str) end module_function :guess - # call-seq: - # Kconv.guess_old(str) -> integer # - # Guess input encoding by NKF.guess1 - def guess_old(str) - ::NKF::guess1(str) - end - module_function :guess_old - - # # isEncoding # @@ -283,7 +241,7 @@ class String # call-seq: - # String#kconv(out_code, in_code = Kconv::AUTO) + # String#kconv(to_enc, from_enc) # # Convert <code>self</code> to out_code. # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv. @@ -292,8 +250,8 @@ # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want to decode them, use NKF.nkf. - def kconv(out_code, in_code=Kconv::AUTO) - Kconv::kconv(self, out_code, in_code) + def kconv(to_enc, from_enc=nil) + Kconv::kconv(self, to_enc, from_enc) end # -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml