ruby-changes:25645
From: tadf <ko1@a...>
Date: Sun, 18 Nov 2012 00:22:47 +0900 (JST)
Subject: [ruby-changes:25645] tadf:r37702 (trunk): * complex.c (string_to_c_strict, string_to_c): rewrote without regexp.
tadf 2012-11-18 00:19:55 +0900 (Sun, 18 Nov 2012) New Revision: 37702 http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=37702 Log: * complex.c (string_to_c_strict, string_to_c): rewrote without regexp. * rational.c (string_to_r_strict, string_to_r): ditto. Modified files: trunk/ChangeLog trunk/complex.c trunk/rational.c Index: complex.c =================================================================== --- complex.c (revision 37701) +++ complex.c (revision 37702) @@ -1496,160 +1496,264 @@ return rb_complex_new1(self); } -static VALUE comp_pat0, comp_pat1, comp_pat2, a_slash, a_dot_and_an_e, - null_string, underscores_pat, an_underscore; +#include <ctype.h> -#define WS "\\s*" -#define DIGITS "(?:[0-9](?:_[0-9]|[0-9])*)" -#define NUMERATOR "(?:" DIGITS "?\\.)?" DIGITS "(?:[eE][-+]?" DIGITS ")?" -#define DENOMINATOR DIGITS -#define NUMBER "[-+]?" NUMERATOR "(?:\\/" DENOMINATOR ")?" -#define NUMBERNOS NUMERATOR "(?:\\/" DENOMINATOR ")?" -#define PATTERN0 "\\A" WS "(" NUMBER ")@(" NUMBER ")" WS -#define PATTERN1 "\\A" WS "([-+])?(" NUMBERNOS ")?[iIjJ]" WS -#define PATTERN2 "\\A" WS "(" NUMBER ")(([-+])(" NUMBERNOS ")?[iIjJ])?" WS +static int +read_sign(const char **s, + char **b) +{ + int sign = '?'; -static void -make_patterns(void) + if (**s == '-' || **s == '+') { + sign = **b = **s; + (*s)++; + (*b)++; + } + return sign; +} + +static int +read_digits(const char **s, int strict, + char **b) { - static const char comp_pat0_source[] = PATTERN0; - static const char comp_pat1_source[] = PATTERN1; - static const char comp_pat2_source[] = PATTERN2; - static const char underscores_pat_source[] = "_+"; + int us = 1; - if (comp_pat0) return; + if (!isdigit((unsigned char)**s)) + return 0; - comp_pat0 = rb_reg_new(comp_pat0_source, sizeof comp_pat0_source - 1, 0); - rb_gc_register_mark_object(comp_pat0); + while (isdigit((unsigned char)**s) || **s == '_') { + if (**s == '_') { + if (strict) { + if (us) + return 0; + } + us = 1; + } + else { + **b = **s; + (*b)++; + us = 0; + } + (*s)++; + } + if (us) + do { + (*s)--; + } while (**s == '_'); + return 1; +} - comp_pat1 = rb_reg_new(comp_pat1_source, sizeof comp_pat1_source - 1, 0); - rb_gc_register_mark_object(comp_pat1); +static int +read_num(const char **s, int strict, + char **b) +{ + if (**s != '.') { + if (!read_digits(s, strict, b)) + return 0; + } - comp_pat2 = rb_reg_new(comp_pat2_source, sizeof comp_pat2_source - 1, 0); - rb_gc_register_mark_object(comp_pat2); + if (**s == '.') { + **b = **s; + (*s)++; + (*b)++; + if (!read_digits(s, strict, b)) { + (*b)--; + return 0; + } + } - a_slash = rb_usascii_str_new2("/"); - rb_gc_register_mark_object(a_slash); + if (**s == 'e' || **s == 'E') { + **b = **s; + (*s)++; + (*b)++; + read_sign(s, b); + if (!read_digits(s, strict, b)) { + (*b)--; + return 0; + } + } + return 1; +} - a_dot_and_an_e = rb_usascii_str_new2(".eE"); - rb_gc_register_mark_object(a_dot_and_an_e); +static int +read_den(const char **s, int strict, + char **b) +{ + if (!read_digits(s, strict, b)) + return 0; + return 1; +} - null_string = rb_usascii_str_new2(""); - rb_gc_register_mark_object(null_string); +static int +read_rat_nos(const char **s, int strict, + char **b) +{ + if (!read_num(s, strict, b)) + return 0; + if (**s == '/') { + **b = **s; + (*s)++; + (*b)++; + if (!read_den(s, strict, b)) { + (*b)--; + return 0; + } + } + return 1; +} - underscores_pat = rb_reg_new(underscores_pat_source, - sizeof underscores_pat_source - 1, 0); - rb_gc_register_mark_object(underscores_pat); +static int +read_rat(const char **s, int strict, + char **b) +{ + read_sign(s, b); + if (!read_rat_nos(s, strict, b)) + return 0; + return 1; +} - an_underscore = rb_usascii_str_new2("_"); - rb_gc_register_mark_object(an_underscore); +static int +isimagunit(int c) +{ + return (c == 'i' || c == 'I' || + c == 'j' || c == 'J'); } -#define id_match rb_intern("match") -#define f_match(x,y) rb_funcall((x), id_match, 1, (y)) +VALUE rb_cstr_to_rat(const char *, int); -#define id_gsub_bang rb_intern("gsub!") -#define f_gsub_bang(x,y,z) rb_funcall((x), id_gsub_bang, 2, (y), (z)) - static VALUE -string_to_c_internal(VALUE self) +str2num(char *s) { - VALUE s; + if (strchr(s, '/')) + return rb_cstr_to_rat(s, 0); + if (strpbrk(s, ".eE")) { + double d = rb_cstr_to_dbl(s, 0); + return DBL2NUM(d); + } + return rb_cstr_to_inum(s, 10, 0); +} - s = self; +static int +read_comp(const char **s, int strict, + VALUE *ret, char **b) +{ + char *bb; + int sign; + VALUE num, num2; - if (RSTRING_LEN(s) == 0) - return rb_assoc_new(Qnil, self); + bb = *b; - { - VALUE m, sr, si, re, r, i; - int po; + sign = read_sign(s, b); - m = f_match(comp_pat0, s); - if (!NIL_P(m)) { - sr = rb_reg_nth_match(1, m); - si = rb_reg_nth_match(2, m); - re = rb_reg_match_post(m); - po = 1; - } - if (NIL_P(m)) { - m = f_match(comp_pat1, s); - if (!NIL_P(m)) { - sr = Qnil; - si = rb_reg_nth_match(1, m); - if (NIL_P(si)) - si = rb_usascii_str_new2(""); - { - VALUE t; + if (isimagunit(**s)) { + (*s)++; + num = INT2FIX((sign == '-') ? -1 : + 1); + *ret = rb_complex_raw2(ZERO, num); + return 1; /* e.g. "i" */ + } - t = rb_reg_nth_match(2, m); - if (NIL_P(t)) - t = rb_usascii_str_new2("1"); - rb_str_concat(si, t); - } - re = rb_reg_match_post(m); - po = 0; - } + if (!read_rat_nos(s, strict, b)) { + **b = '\0'; + num = str2num(bb); + *ret = rb_complex_raw2(num, ZERO); + return 0; /* e.g. "1/" */ + } + **b = '\0'; + num = str2num(bb); + + if (isimagunit(**s)) { + (*s)++; + *ret = rb_complex_raw2(ZERO, num); + return 1; /* e.g. "3i" */ + } + + if (**s == '@') { + (*s)++; + bb = *b; + if (!read_rat(s, strict, b)) { + num = rb_complex_raw2(num, ZERO); + return 0; /* e.g. "1@x" */ } - if (NIL_P(m)) { - m = f_match(comp_pat2, s); - if (NIL_P(m)) - return rb_assoc_new(Qnil, self); - sr = rb_reg_nth_match(1, m); - if (NIL_P(rb_reg_nth_match(2, m))) - si = Qnil; - else { - VALUE t; + **b = '\0'; + num2 = str2num(bb); + *ret = rb_complex_polar(num, num2); + return 1; /* e.g. "1@2" */ + } - si = rb_reg_nth_match(3, m); - t = rb_reg_nth_match(4, m); - if (NIL_P(t)) - t = rb_usascii_str_new2("1"); - rb_str_concat(si, t); + if (**s == '-' || **s == '+') { + bb = *b; + sign = read_sign(s, b); + if (isimagunit(**s)) + num2 = INT2FIX((sign == '-') ? -1 : + 1); + else { + if (!read_rat_nos(s, strict, b)) { + *ret = rb_complex_raw2(num, ZERO); + return 0; /* e.g. "1+xi" */ } - re = rb_reg_match_post(m); - po = 0; + **b = '\0'; + num2 = str2num(bb); } - r = INT2FIX(0); - i = INT2FIX(0); - if (!NIL_P(sr)) { - if (strchr(RSTRING_PTR(sr), '/')) - r = f_to_r(sr); - else if (strpbrk(RSTRING_PTR(sr), ".eE")) - r = f_to_f(sr); - else - r = f_to_i(sr); + if (!isimagunit(**s)) { + *ret = rb_complex_raw2(num, ZERO); + return 0; /* e.g. "1+3x" */ } - if (!NIL_P(si)) { - if (strchr(RSTRING_PTR(si), '/')) - i = f_to_r(si); - else if (strpbrk(RSTRING_PTR(si), ".eE")) - i = f_to_f(si); - else - i = f_to_i(si); - } - if (po) - return rb_assoc_new(rb_complex_polar(r, i), re); - else - return rb_assoc_new(rb_complex_new2(r, i), re); + (*s)++; + *ret = rb_complex_raw2(num, num2); + return 1; /* e.g. "1+2i" */ } + /* !(@, - or +) */ + { + *ret = rb_complex_raw2(num, ZERO); + return 1; /* e.g. "3" */ + } } +static int +parse_comp(const char *s, int strict, + VALUE *num) +{ + char *buf, *b; + + buf = ALLOCA_N(char, strlen(s) + 1); + b = buf; + + while (isspace((unsigned char)*s)) + s++; + + if (!read_comp(&s, strict, num, &b)) + return 0; + + while (isspace((unsigned char)*s)) + s++; + + if (strict) + if (*s != '\0') + return 0; + return 1; +} + static VALUE string_to_c_strict(VALUE self) { - VALUE a = string_to_c_internal(self); - if (NIL_P(RARRAY_PTR(a)[0]) || RSTRING_LEN(RARRAY_PTR(a)[1]) > 0) { - VALUE s = f_inspect(self); + const char *s; + VALUE num; + + rb_must_asciicompat(self); + + s = RSTRING_PTR(self); + + if (memchr(s, 0, RSTRING_LEN(self))) + rb_raise(rb_eArgError, "string contains null byte"); + + if (!parse_comp(s, 1, &num)) { + VALUE ins = f_inspect(self); rb_raise(rb_eArgError, "invalid value for convert(): %s", - StringValuePtr(s)); + StringValuePtr(ins)); } - return RARRAY_PTR(a)[0]; + + return num; } -#define id_gsub rb_intern("gsub") -#define f_gsub(x,y,z) rb_funcall((x), id_gsub, 2, (y), (z)) - /* * call-seq: * str.to_c -> complex @@ -1674,19 +1778,16 @@ static VALUE string_to_c(VALUE self) { - VALUE s, a, backref; + const char *s; + VALUE num; - backref = rb_backref_get(); - rb_match_busy(backref); + rb_must_asciicompat(self); - s = f_gsub(self, underscores_pat, an_underscore); - a = string_to_c_internal(s); + s = RSTRING_PTR(self); - rb_backref_set(backref); + (void)parse_comp(s, 0, &num); - if (!NIL_P(RARRAY_PTR(a)[0])) - return RARRAY_PTR(a)[0]; - return rb_complex_new1(INT2FIX(0)); + return num; } static VALUE @@ -2054,8 +2155,6 @@ rb_define_method(rb_cNilClass, "to_c", nilclass_to_c, 0); rb_define_method(rb_cNumeric, "to_c", numeric_to_c, 0); - make_patterns(); - rb_define_method(rb_cString, "to_c", string_to_c, 0); rb_define_private_method(CLASS_OF(rb_cComplex), "convert", nucomp_s_convert, -1); Index: ChangeLog =================================================================== --- ChangeLog (revision 37701) +++ ChangeLog (revision 37702) @@ -1,3 +1,8 @@ +Sun Nov 18 00:14:46 2012 Tadayoshi Funaba <tadf@d...> + + * complex.c (string_to_c_strict, string_to_c): rewrote without regexp. + * rational.c (string_to_r_strict, string_to_r): ditto. + Sat Nov 17 23:53:05 2012 Tadayoshi Funaba <tadf@d...> * complex.c (make_patterns): should not accept extra sign. Index: rational.c =================================================================== --- rational.c (revision 37701) +++ rational.c (revision 37702) @@ -1956,146 +1956,186 @@ return rb_rational_new2(p, q); } -static VALUE rat_pat, an_e_pat, a_dot_pat, underscores_pat, an_underscore; +#include <ctype.h> -#define WS "\\s*" -#define DIGITS "(?:[0-9](?:_[0-9]|[0-9])*)" -#define NUMERATOR "(?:" DIGITS "?\\.)?" DIGITS "(?:[eE][-+]?" DIGITS ")?" -#define DENOMINATOR DIGITS -#define PATTERN "\\A" WS "([-+])?(" NUMERATOR ")(?:\\/(" DENOMINATOR "))?" WS +static int +read_sign(const char **s) +{ + int sign = '?'; -static void -make_patterns(void) + if (**s == '-' || **s == '+') { + sign = **s; + (*s)++; + } + return sign; +} + +static int +read_digits(const char **s, int strict, + VALUE *num, int *count) { - static const char rat_pat_source[] = PATTERN; - static const char an_e_pat_source[] = "[eE]"; - static const char a_dot_pat_source[] = "\\."; - static const char underscores_pat_source[] = "_+"; + int us = 1; - if (rat_pat) return; + if (!isdigit((unsigned char)**s)) + return 0; - rat_pat = rb_reg_new(rat_pat_source, sizeof rat_pat_source - 1, 0); - rb_gc_register_mark_object(rat_pat); + *num = ZERO; - an_e_pat = rb_reg_new(an_e_pat_source, sizeof an_e_pat_source - 1, 0); - rb_gc_register_mark_object(an_e_pat); + while (isdigit((unsigned char)**s) || **s == '_') { + if (**s == '_') { + if (strict) { + if (us) + return 0; + } + us = 1; + } + else { + *num = f_mul(*num, INT2FIX(10)); + *num = f_add(*num, INT2FIX(**s - '0')); + if (count) + (*count)++; + us = 0; + } + (*s)++; + } + if (us) + do { + (*s)--; + } while (**s == '_'); + return 1; +} - a_dot_pat = rb_reg_new(a_dot_pat_source, sizeof a_dot_pat_source - 1, 0); - rb_gc_register_mark_object(a_dot_pat); +static int +read_num(const char **s, int numsign, int strict, + VALUE *num) +{ + VALUE ip, fp, exp; - underscores_pat = rb_reg_new(underscores_pat_source, - sizeof underscores_pat_source - 1, 0); - rb_gc_register_mark_object(underscores_pat); + *num = rb_rational_raw2(ZERO, ONE); + exp = Qnil; - an_underscore = rb_usascii_str_new2("_"); - rb_gc_register_mark_object(an_underscore); -} + if (**s != '.') { + if (!read_digits(s, strict, &ip, NULL)) + return 0; + *num = rb_rational_raw2(ip, ONE); + } -#define id_match rb_intern("match") -#define f_match(x,y) rb_funcall((x), id_match, 1, (y)) + if (**s == '.') { + int count = 0; -#define id_split rb_intern("split") -#define f_split(x,y) rb_funcall((x), id_split, 1, (y)) + (*s)++; + if (!read_digits(s, strict, &fp, &count)) + return 0; + { + VALUE l = f_expt10(INT2NUM(count)); + *num = f_mul(*num, l); + *num = f_add(*num, fp); + *num = f_div(*num, l); + } + } -#include <ctype.h> + if (**s == 'e' || **s == 'E') { + int expsign; -static VALUE -string_to_r_internal(VALUE self) -{ - VALUE s, m; + (*s)++; + expsign = read_sign(s); + if (!read_digits(s, strict, &exp, NULL)) + return 0; + if (expsign == '-') + exp = f_negate(exp); + } - s = self; + if (numsign == '-') + *num = f_negate(*num); + if (!NIL_P(exp)) { + VALUE l = f_expt10(exp); + *num = f_mul(*num, l); + } + return 1; +} - if (RSTRING_LEN(s) == 0) - return rb_assoc_new(Qnil, self); +static int +read_den(const char **s, int strict, + VALUE *num) +{ + if (!read_digits(s, strict, num, NULL)) + return 0; + return 1; +} - m = f_match(rat_pat, s); +static int +read_rat_nos(const char **s, int sign, int strict, + VALUE *num) +{ + VALUE den; - if (!NIL_P(m)) { - VALUE v, ifp, exp, ip, fp; - VALUE si = rb_reg_nth_match(1, m); - VALUE nu = rb_reg_nth_match(2, m); - VALUE de = rb_reg_nth_match(3, m); - VALUE re = rb_reg_match_post(m); + if (!read_num(s, sign, strict, num)) + return 0; + if (**s == '/') { + (*s)++; + if (!read_den(s, strict, &den)) + return 0; + if (!(FIXNUM_P(den) && FIX2LONG(den) == 1)) + *num = f_div(*num, den); + } + return 1; +} - { - VALUE a; +static int +read_rat(const char **s, int strict, + VALUE *num) +{ + int sign; - if (!strpbrk(RSTRING_PTR(nu), "eE")) { - ifp = nu; /* not a copy */ - exp = Qnil; - } - else { - a = f_split(nu, an_e_pat); - ifp = RARRAY_PTR(a)[0]; - if (RARRAY_LEN(a) != 2) - exp = Qnil; - else - exp = RARRAY_PTR(a)[1]; - } + sign = read_sign(s); + if (!read_rat_nos(s, sign, strict, num)) + return 0; + return 1; +} - if (!strchr(RSTRING_PTR(ifp), '.')) { - ip = ifp; /* not a copy */ - fp = Qnil; - } - else { - a = f_split(ifp, a_dot_pat); - ip = RARRAY_PTR(a)[0]; - if (RARRAY_LEN(a) != 2) - fp = Qnil; - else - fp = RARRAY_PTR(a)[1]; - } - } +static int +parse_rat(const char *s, int strict, + VALUE *num) +{ + while (isspace((unsigned char)*s)) + s++; - v = rb_rational_new1(f_to_i(ip)); + if (!read_rat(&s, strict, num)) + return 0; - if (!NIL_P(fp)) { - char *p = RSTRING_PTR(fp); - long count = 0; - VALUE l; + while (isspace((unsigned char)*s)) + s++; - while (*p) { - if (rb_isdigit(*p)) - count++; - p++; - } - l = f_expt10(LONG2NUM(count)); - v = f_mul(v, l); - v = f_add(v, f_to_i(fp)); - v = f_div(v, l); - } - if (!NIL_P(si) && *RSTRING_PTR(si) == '-') - v = f_negate(v); - if (!NIL_P(exp)) - v = f_mul(v, f_expt10(f_to_i(exp))); -#if 0 - if (!NIL_P(de) && (!NIL_P(fp) || !NIL_P(exp))) - return rb_assoc_new(v, rb_usascii_str_new2("dummy")); -#endif - if (!NIL_P(de)) - v = f_div(v, f_to_i(de)); - - return rb_assoc_new(v, re); - } - return rb_assoc_new(Qnil, self); + if (strict) + if (*s != '\0') + return 0; + return 1; } static VALUE string_to_r_strict(VALUE self) { - VALUE a = string_to_r_internal(self); - if (NIL_P(RARRAY_PTR(a)[0]) || RSTRING_LEN(RARRAY_PTR(a)[1]) > 0) { - VALUE s = f_inspect(self); + const char *s; + VALUE num; + + rb_must_asciicompat(self); + + s = RSTRING_PTR(self); + + if (memchr(s, 0, RSTRING_LEN(self))) + rb_raise(rb_eArgError, "string contains null byte"); + + if (!parse_rat(s, 1, &num)) { + VALUE ins = f_inspect(self); rb_raise(rb_eArgError, "invalid value for convert(): %s", - StringValuePtr(s)); + StringValuePtr(ins)); } - return RARRAY_PTR(a)[0]; + + if (RB_TYPE_P(num, T_FLOAT)) + rb_raise(rb_eFloatDomainError, "Infinity"); + return num; } -#define id_gsub rb_intern("gsub") -#define f_gsub(x,y,z) rb_funcall((x), id_gsub, 2, (y), (z)) - /* * call-seq: * str.to_r -> rational @@ -2120,28 +2160,32 @@ static VALUE string_to_r(VALUE self) { - VALUE s, a, a1, backref; + const char *s; + VALUE num; - backref = rb_backref_get(); - rb_match_busy(backref); + rb_must_asciicompat(self); - s = f_gsub(self, underscores_pat, an_underscore); - a = string_to_r_internal(s); + s = RSTRING_PTR(self); - rb_backref_set(backref); + (void)parse_rat(s, 0, &num); - a1 = RARRAY_PTR(a)[0]; - if (!NIL_P(a1)) { - if (RB_TYPE_P(a1, T_FLOAT)) - rb_raise(rb_eFloatDomainError, "Infinity"); - return a1; - } - return rb_rational_new1(INT2FIX(0)); + if (RB_TYPE_P(num, T_FLOAT)) + rb_raise(rb_eFloatDomainError, "Infinity"); + return num; } -#define id_to_r rb_intern("to_r") -#define f_to_r(x) rb_funcall((x), id_to_r, 0) +VALUE +rb_cstr_to_rat(const char *s, int strict) /* for complex's internal */ +{ + VALUE num; + (void)parse_rat(s, strict, &num); + + if (RB_TYPE_P(num, T_FLOAT)) + rb_raise(rb_eFloatDomainError, "Infinity"); + return num; +} + static VALUE nurat_s_convert(int argc, VALUE *argv, VALUE klass) { @@ -2369,8 +2413,6 @@ rb_define_method(rb_cFloat, "to_r", float_to_r, 0); rb_define_method(rb_cFloat, "rationalize", float_rationalize, -1); - make_patterns(); - rb_define_method(rb_cString, "to_r", string_to_r, 0); rb_define_private_method(CLASS_OF(rb_cRational), "convert", nurat_s_convert, -1); -- ML: ruby-changes@q... Info: http://www.atdot.net/~ko1/quickml/