ruby-changes:7147
From: akr <ko1@a...>
Date: Sun, 17 Aug 2008 02:06:50 +0900 (JST)
Subject: [ruby-changes:7147] Ruby:r18666 (trunk): * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off,
akr 2008-08-17 02:06:35 +0900 (Sun, 17 Aug 2008) New Revision: 18666 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=18666 Log: * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off, crbuf_len, crbuf_capa. (MakeOpenFile): initialize them. * io.c (io_shift_crbuf): new function. (io_getc): use econv. (rb_io_fptr_finalize): finalize readconv and crbuf. Modified files: trunk/ChangeLog trunk/include/ruby/io.h trunk/io.c trunk/test/ruby/test_io_m17n.rb Index: include/ruby/io.h =================================================================== --- include/ruby/io.h (revision 18665) +++ include/ruby/io.h (revision 18666) @@ -36,17 +36,26 @@ char *path; /* pathname for file */ void (*finalize)(struct rb_io_t*,int); /* finalize proc */ long refcnt; + char *wbuf; /* wbuf_off + wbuf_len <= wbuf_capa */ int wbuf_off; int wbuf_len; int wbuf_capa; + char *rbuf; /* rbuf_off + rbuf_len <= rbuf_capa */ int rbuf_off; int rbuf_len; int rbuf_capa; + VALUE tied_io_for_writing; - rb_encoding *enc; - rb_encoding *enc2; + rb_encoding *enc; /* int_enc if enc2. ext_enc otherwise. */ + rb_encoding *enc2; /* ext_enc if not NULL. */ + + rb_econv_t *readconv; + char *crbuf; /* crbuf_off + crbuf_len <= crbuf_capa */ + int crbuf_off; + int crbuf_len; + int crbuf_capa; } rb_io_t; #define HAVE_RB_IO_T 1 @@ -89,6 +98,11 @@ fp->rbuf_off = 0;\ fp->rbuf_len = 0;\ fp->rbuf_capa = 0;\ + fp->readconv = NULL;\ + fp->crbuf = NULL;\ + fp->crbuf_off = 0;\ + fp->crbuf_len = 0;\ + fp->crbuf_capa = 0;\ fp->tied_io_for_writing = 0;\ fp->enc = 0;\ fp->enc2 = 0;\ Index: ChangeLog =================================================================== --- ChangeLog (revision 18665) +++ ChangeLog (revision 18666) @@ -1,3 +1,13 @@ +Sun Aug 17 01:29:46 2008 Tanaka Akira <akr@f...> + + * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off, + crbuf_len, crbuf_capa. + (MakeOpenFile): initialize them. + + * io.c (io_shift_crbuf): new function. + (io_getc): use econv. + (rb_io_fptr_finalize): finalize readconv and crbuf. + Sun Aug 17 00:02:07 2008 Tanaka Akira <akr@f...> * include/ruby/encoding.h (rb_econv_check_error): declared. Index: io.c =================================================================== --- io.c (revision 18665) +++ io.c (revision 18666) @@ -2269,13 +2269,76 @@ } static VALUE +io_shift_crbuf(rb_io_t *fptr, int len) +{ + VALUE str; + str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len); + fptr->crbuf_off += len; + fptr->crbuf_len -= len; + OBJ_TAINT(str); + rb_enc_associate(str, fptr->enc); + /* xxx: set coderange */ + if (fptr->crbuf_len == 0) + fptr->crbuf_off = 0; + if (fptr->crbuf_off < fptr->crbuf_capa/2) { + memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); + fptr->crbuf_off = 0; + } + return str; +} + +static VALUE io_getc(rb_io_t *fptr, rb_encoding *enc) { int r, n, cr = 0; VALUE str; - if (rb_enc_dummy_p(enc)) { - rb_raise(rb_eNotImpError, "getc against dummy encoding is not currently supported"); + if (fptr->enc2) { + if (!fptr->readconv) { + fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0); + if (!fptr->readconv) + rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name); + fptr->crbuf_off = 0; + fptr->crbuf_len = 0; + fptr->crbuf_capa = 1024; + fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa); + } + + while (1) { + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + if (fptr->crbuf_len) { + r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc); + if (!MBCLEN_NEEDMORE_P(r)) + break; + if (fptr->crbuf_len == fptr->crbuf_capa) { + rb_raise(rb_eIOError, "too long character"); + } + } + if (fptr->rbuf_len == 0) { + if (io_fillbuf(fptr) == -1) { + if (fptr->crbuf_len == 0) + return Qnil; + /* return an incomplete character just before EOF */ + return io_shift_crbuf(fptr, fptr->crbuf_len); + } + } + ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off; + se = sp + fptr->rbuf_len; + ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; + de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; + res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT); + fptr->rbuf_off += sp - ss; + fptr->rbuf_len -= sp - ss; + fptr->crbuf_len += dp - ds; + rb_econv_check_error(fptr->readconv); + } + if (MBCLEN_INVALID_P(r)) { + r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc); + return io_shift_crbuf(fptr, r); + } + return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r)); } if (io_fillbuf(fptr) < 0) { @@ -2766,6 +2829,14 @@ free(fptr->wbuf); fptr->wbuf = 0; } + if (fptr->readconv) { + rb_econv_close(fptr->readconv); + fptr->readconv = NULL; + } + if (fptr->crbuf) { + free(fptr->crbuf); + fptr->crbuf = NULL; + } free(fptr); return 1; } @@ -3370,6 +3441,8 @@ char *enc2name; int idx, idx2; + /* parse estr as "enc" or "enc2:enc" */ + p0 = strrchr(estr, ':'); if (!p0) p1 = estr; else p1 = p0 + 1; Index: test/ruby/test_io_m17n.rb =================================================================== --- test/ruby/test_io_m17n.rb (revision 18665) +++ test/ruby/test_io_m17n.rb (revision 18666) @@ -220,12 +220,10 @@ with_tmpdir { src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp") generate_file('tmp', src) - assert_raise(NotImplementedError) do - open("tmp", "r:iso-2022-jp:euc-jp") {|f| - assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc) - assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc) - } - end + open("tmp", "r:iso-2022-jp:euc-jp") {|f| + assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc) + assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc) + } } end -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/