[前][次][番号順一覧][スレッド一覧]

ruby-changes:29769

From: akr <ko1@a...>
Date: Sun, 7 Jul 2013 15:04:04 +0900 (JST)
Subject: [ruby-changes:29769] akr:r41821 (trunk): * bignum.c: (bigsub_core): Use bary_sub.

akr	2013-07-07 15:03:52 +0900 (Sun, 07 Jul 2013)

  New Revision: 41821

  http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=41821

  Log:
    * bignum.c: (bigsub_core): Use bary_sub.
      (bary_sub): Returns a borrow flag.  Use bary_subb.
      (bary_subb): New function for actually calculating subtraction with
      borrow.
      (bary_sub_one): New function.
      (bigadd_core): Use bary_add.
      (bary_add): Returns a carry flag.  Use bary_addc.
      (bary_addc): New function for actually calculating addition with
      carry.
      (bary_add_one): New function.
      (bary_muladd_1xN): Extracted from bary_mul_normal.
      (bigmul1_normal): Removed.
      (bary_mul_karatsuba): New function.
      (bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal.
      (bary_mul): Remove most and least significant zeros before actual
      multiplication.  Use bary_sq_fast, bary_mul_balance,
      bary_mul_karatsuba and bigmul1_toom3 as bigmul0.
      (bigmul1_balance): Removed.
      (bigmul1_karatsuba): Removed.
      (bigsqr_fast): Removed.
      (bary_sparse_p): Extracted from big_sparse_p.
      (big_sparse_p): Removed.
      (bigmul0): Use bary_mul.

  Modified files:
    trunk/ChangeLog
    trunk/bignum.c

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 41820)
+++ ChangeLog	(revision 41821)
@@ -1,3 +1,29 @@ https://github.com/ruby/ruby/blob/trunk/ChangeLog#L1
+Sun Jul  7 14:41:57 2013  Tanaka Akira  <akr@f...>
+
+	* bignum.c: (bigsub_core): Use bary_sub.
+	  (bary_sub): Returns a borrow flag.  Use bary_subb.
+	  (bary_subb): New function for actually calculating subtraction with
+	  borrow.
+	  (bary_sub_one): New function.
+	  (bigadd_core): Use bary_add.
+	  (bary_add): Returns a carry flag.  Use bary_addc.
+	  (bary_addc): New function for actually calculating addition with
+	  carry.
+	  (bary_add_one): New function.
+	  (bary_muladd_1xN): Extracted from bary_mul_normal.
+	  (bigmul1_normal): Removed.
+	  (bary_mul_karatsuba): New function.
+	  (bary_mul1): Invoke rb_thread_check_ints after bary_mul_normal.
+	  (bary_mul): Remove most and least significant zeros before actual
+	  multiplication.  Use bary_sq_fast, bary_mul_balance,
+	  bary_mul_karatsuba and bigmul1_toom3 as bigmul0.
+	  (bigmul1_balance): Removed.
+	  (bigmul1_karatsuba): Removed.
+	  (bigsqr_fast): Removed.
+	  (bary_sparse_p): Extracted from big_sparse_p.
+	  (big_sparse_p): Removed.
+	  (bigmul0): Use bary_mul.
+
 Sun Jul  7 11:54:33 2013  Kouhei Sutou  <kou@c...>
 
 	* NEWS: Add REXML::Text#<< related updates.
Index: bignum.c
===================================================================
--- bignum.c	(revision 41820)
+++ bignum.c	(revision 41821)
@@ -101,14 +101,18 @@ static void bary_small_rshift(BDIGIT *zd https://github.com/ruby/ruby/blob/trunk/bignum.c#L101
 static void bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
 static void bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
 static void bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl);
-static void bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
+static int bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
+static int bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow);
 static void bary_divmod(BDIGIT *qds, size_t nq, BDIGIT *rds, size_t nr, BDIGIT *xds, size_t nx, BDIGIT *yds, size_t ny);
-static void bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
+static int bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn);
+static int bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry);
 static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags);
 static int bary_2comp(BDIGIT *ds, size_t n);
 
-static VALUE bigsqr_fast(VALUE x);
+static void bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn);
+static inline int bary_sparse_p(BDIGIT *ds, size_t n);
 static VALUE bigmul0(VALUE x, VALUE y);
+static VALUE bigmul1_toom3(VALUE x, VALUE y);
 
 #define BIGNUM_DEBUG 0
 #if BIGNUM_DEBUG
@@ -3459,38 +3463,58 @@ rb_big_neg(VALUE x) https://github.com/ruby/ruby/blob/trunk/bignum.c#L3463
 static void
 bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
 {
+    bary_sub(zds, zn, xds, xn, yds, yn);
+}
+
+static int
+bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow)
+{
     BDIGIT_DBL_SIGNED num;
-    long i;
+    size_t i;
+
+    assert(yn <= xn);
+    assert(xn <= zn);
 
-    for (i = 0, num = 0; i < yn; i++) {
+    num = borrow ? -1 : 0;
+    for (i = 0; i < yn; i++) {
 	num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i];
 	zds[i] = BIGLO(num);
 	num = BIGDN(num);
     }
-    while (num && i < xn) {
+    for (; i < xn; i++) {
+        if (num == 0) goto num_is_zero;
 	num += xds[i];
-	zds[i++] = BIGLO(num);
+	zds[i] = BIGLO(num);
 	num = BIGDN(num);
     }
+    if (num == 0) goto num_is_zero;
+    for (; i < zn; i++) {
+	zds[i] = BDIGMAX;
+    }
+    return 1;
+
+  num_is_zero:
     if (xds == zds && xn == zn)
-        return;
-    while (i < xn) {
+        return 0;
+    for (; i < xn; i++) {
 	zds[i] = xds[i];
-	i++;
     }
-    assert(i <= zn);
-    while (i < zn) {
-	zds[i++] = 0;
+    for (; i < zn; i++) {
+	zds[i] = 0;
     }
+    return 0;
 }
 
-static void
+static int
 bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
 {
-    assert(yn <= xn);
-    assert(xn <= zn);
+    return bary_subb(zds, zn, xds, xn, yds, yn, 0);
+}
 
-    bigsub_core(xds, xn, yds, yn, zds, zn);
+static int
+bary_sub_one(BDIGIT *zds, size_t zn)
+{
+    return bary_subb(zds, zn, zds, zn, NULL, 0, 1);
 }
 
 static VALUE
@@ -3712,8 +3736,17 @@ bigadd_int(VALUE x, long y) https://github.com/ruby/ruby/blob/trunk/bignum.c#L3736
 static void
 bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn)
 {
-    BDIGIT_DBL num = 0;
-    long i;
+    bary_add(zds, zn, xds, xn, yds, yn);
+}
+
+static int
+bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry)
+{
+    BDIGIT_DBL num;
+    size_t i;
+
+    assert(xn <= zn);
+    assert(yn <= zn);
 
     if (xn > yn) {
 	BDIGIT *tds;
@@ -3721,32 +3754,47 @@ bigadd_core(BDIGIT *xds, long xn, BDIGIT https://github.com/ruby/ruby/blob/trunk/bignum.c#L3754
 	i = xn; xn = yn; yn = i;
     }
 
-    i = 0;
-    while (i < xn) {
+    num = carry ? 1 : 0;
+    for (i = 0; i < xn; i++) {
 	num += (BDIGIT_DBL)xds[i] + yds[i];
-	zds[i++] = BIGLO(num);
+	zds[i] = BIGLO(num);
 	num = BIGDN(num);
     }
-    while (num && i < yn) {
+    for (; i < yn; i++) {
+        if (num == 0) goto num_is_zero;
 	num += yds[i];
-	zds[i++] = BIGLO(num);
+	zds[i] = BIGLO(num);
 	num = BIGDN(num);
     }
-    while (i < yn) {
+    for (; i < zn; i++) {
+        if (num == 0) goto num_is_zero;
+	zds[i] = BIGLO(num);
+	num = BIGDN(num);
+    }
+    return num != 0;
+
+  num_is_zero:
+    if (yds == zds && yn == zn)
+        return 0;
+    for (; i < yn; i++) {
 	zds[i] = yds[i];
-	i++;
     }
-    if (num) zds[i++] = (BDIGIT)num;
-    assert(i <= zn);
-    while (i < zn) {
-	zds[i++] = 0;
+    for (; i < zn; i++) {
+	zds[i] = 0;
     }
+    return 0;
 }
 
-static void
+static int
 bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn)
 {
-    bigadd_core(xds, xn, yds, yn, zds, zn);
+    return bary_addc(zds, zn, xds, xn, yds, yn, 0);
+}
+
+static int
+bary_add_one(BDIGIT *zds, size_t zn)
+{
+    return bary_addc(zds, zn, NULL, 0, zds, zn, 1);
 }
 
 static VALUE
@@ -3871,65 +3919,55 @@ bary_mul_single(BDIGIT *zds, size_t zl, https://github.com/ruby/ruby/blob/trunk/bignum.c#L3919
     zds[1] = (BDIGIT)BIGDN(n);
 }
 
-static VALUE
-bigmul1_single(VALUE x, VALUE y)
+static int
+bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl)
 {
-    VALUE z = bignew(2, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
-    BDIGIT *xds, *yds, *zds;
+    BDIGIT_DBL n;
+    BDIGIT_DBL dd;
+    size_t j;
 
-    xds = BDIGITS(x);
-    yds = BDIGITS(y);
-    zds = BDIGITS(z);
+    assert(zl > yl);
 
-    bary_mul_single(zds, 2, xds[0], yds[0]);
+    if (x == 0)
+        return 0;
+    dd = x;
+    n = 0;
+    for (j = 0; j < yl; j++) {
+        BDIGIT_DBL ee = n + dd * yds[j];
+        if (ee) {
+            n = zds[j] + ee;
+            zds[j] = BIGLO(n);
+            n = BIGDN(n);
+        }
+        else {
+            n = 0;
+        }
 
-    return z;
+    }
+    for (; j < zl; j++) {
+        if (n == 0)
+            break;
+        n += zds[j];
+        zds[j] = BIGLO(n);
+        n = BIGDN(n);
+    }
+    return n != 0;
 }
 
 static void
 bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
     size_t i;
-    size_t j = zl;
-    BDIGIT_DBL n = 0;
 
     assert(xl + yl <= zl);
 
-    while (j--) zds[j] = 0;
+    MEMZERO(zds, BDIGIT, zl);
     for (i = 0; i < xl; i++) {
-	BDIGIT_DBL dd;
-	dd = xds[i];
-	if (dd == 0) continue;
-	n = 0;
-	for (j = 0; j < yl; j++) {
-	    BDIGIT_DBL ee = n + dd * yds[j];
-	    n = zds[i + j] + ee;
-	    if (ee) zds[i + j] = BIGLO(n);
-	    n = BIGDN(n);
-	}
-	if (n) {
-	    zds[i + j] = (BDIGIT)n;
-	}
+        bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl);
     }
 }
 
-static VALUE
-bigmul1_normal(VALUE x, VALUE y)
-{
-    size_t xl = RBIGNUM_LEN(x), yl = RBIGNUM_LEN(y), zl = xl + yl;
-    VALUE z = bignew(zl, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
-    BDIGIT *xds, *yds, *zds;
-
-    xds = BDIGITS(x);
-    yds = BDIGITS(y);
-    zds = BDIGITS(z);
-
-    bary_mul_normal(zds, zl, xds, xl, yds, yl);
-
-    rb_thread_check_ints();
-    return z;
-}
-
+/* balancing multiplication by slicing larger argument */
 static void
 bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
@@ -3961,6 +3999,168 @@ bary_mul_balance(BDIGIT *zds, size_t zl, https://github.com/ruby/ruby/blob/trunk/bignum.c#L3999
         ALLOCV_END(work);
 }
 
+/* multiplication by karatsuba method */
+static void
+bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
+{
+    VALUE work = 0;
+    BDIGIT *wds;
+    size_t wl;
+
+    size_t n;
+    int sub_p, borrow, carry1, carry2, carry3;
+
+    int odd_x = 0;
+    int odd_y = 0;
+
+    BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3;
+
+    assert(xl + yl <= zl);
+    assert(xl <= yl);
+    assert(yl < 2 * xl);
+
+    if (yl & 1) {
+        odd_y = 1;
+        yl--;
+        if (yl < xl) {
+            odd_x = 1;
+            xl--;
+        }
+    }
+
+    n = yl / 2;
+
+    assert(n < xl);
+
+    wl = n;
+    wds = ALLOCV_N(BDIGIT, work, wl);
+
+    /* Karatsuba algorithm:
+     *
+     * x = x0 + r*x1
+     * y = y0 + r*y1
+     * z = x*y
+     *   = (x0 + r*x1) * (y0 + r*y1)
+     *   = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1
+     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1
+     *   = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1
+     */
+
+    xds0 = xds;
+    xds1 = xds + n;
+    yds0 = yds;
+    yds1 = yds + n;
+    zds0 = zds;
+    zds1 = zds + n;
+    zds2 = zds + 2*n;
+    zds3 = zds + 3*n;
+
+    sub_p = 1;
+
+    /* zds0:? zds1:? zds2:? zds3:? wds:? */
+
+    if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) {
+        bary_2comp(zds0, n);
+        sub_p = !sub_p;
+    }
+
+    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */
+
+    if (bary_sub(wds, n, yds, n, yds+n, n)) {
+        bary_2comp(wds, n);
+        sub_p = !sub_p;
+    }
+
+    /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */
+
+    bary_mul(zds1, 2*n, zds0, n, wds, n);
+
+    /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
+
+    borrow = 0;
+    if (sub_p) {
+        borrow = !bary_2comp(zds1, 2*n);
+    }
+    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */
+
+    MEMCPY(wds, zds1, BDIGIT, n);
+
+    /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
+
+    bary_mul(zds0, 2*n, xds0, n, yds0, n);
+
+    /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */
+
+    carry1 = bary_add(wds, n, wds, n, zds0, n);
+    carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1);
+
+    /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    carry2 = bary_add(zds1, n, zds1, n, wds, n);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    MEMCPY(wds, zds2, BDIGIT, n);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    carry3 = bary_add(zds1, n, zds1, n, zds2, n);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */
+
+    bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n);
+
+    /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */
+
+    if (carry2)
+        bary_add_one(zds2, zl-2*n);
+
+    if (borrow && carry1)
+        borrow = carry1 = 0;
+    if (borrow && carry3)
+        borrow = carry3 = 0;
+
+    if (borrow)
+        bary_sub_one(zds3, zl-3*n);
+    else if (carry1 || carry3) {
+        BDIGIT c = carry1 + carry3;
+        bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1);
+    }
+
+    /*
+    if (SIZEOF_BDIGITS * zl <= 16) {
+        uint128_t z, x, y;
+        ssize_t i;
+        for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; }
+        for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; }
+        for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; }
+        assert(z == x * y);
+    }
+    */
+
+    if (odd_x && odd_y) {
+        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
+        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1);
+    }
+    else if (odd_x) {
+        bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl);
+    }
+    else if (odd_y) {
+        bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl);
+    }
+
+    if (work)
+        ALLOCV_END(work);
+}
+
 static void
 bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
@@ -3975,6 +4175,7 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT https://github.com/ruby/ruby/blob/trunk/bignum.c#L4175
     else {
         l = xl + yl;
         bary_mul_normal(zds, zl, xds, xl, yds, yl);
+        rb_thread_check_ints();
     }
     MEMZERO(zds + l, BDIGIT, zl - l);
 }
@@ -3982,45 +4183,92 @@ bary_mul1(BDIGIT *zds, size_t zl, BDIGIT https://github.com/ruby/ruby/blob/trunk/bignum.c#L4183
 static void
 bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl)
 {
+    size_t nlsz; /* number of least significant zero BDIGITs */
+
     assert(xl + yl <= zl);
 
-    if ((xl < yl ? xl : yl) < KARATSUBA_MUL_DIGITS)
-        bary_mul1(zds, zl, xds, xl, yds, yl);
-    else {
+    while (0 < xl && xds[xl-1] == 0)
+        xl--;
+    while (0 < yl && yds[yl-1] == 0)
+        yl--;
+
+    nlsz = 0;
+    while (0 < xl && xds[0] == 0) {
+        xds++;
+        xl--;
+        nlsz++;
+    }
+    while (0 < yl && yds[0] == 0) {
+        yds++;
+        yl--;
+        nlsz++;
+    }
+    if (nlsz) {
+        MEMZERO(zds, BDIGIT, nlsz);
+        zds += nlsz;
+        zl -= nlsz;
+    }
+
+    /* make sure that y is longer than x */
+    if (xl > yl) {
+        BDIGIT *tds;
+        size_t tl;
+	tds = xds; xds = yds; yds = tds;
+	tl = xl; xl = yl; yl = tl;
+    }
+    assert(xl <= yl);
+
+    if (xl == 0) {
+        MEMZERO(zds, BDIGIT, zl);
+        return;
+    }
+
+    /* normal multiplication when x is small */
+    if (xl < KARATSUBA_MUL_DIGITS) {
+      normal:
+        if (xds == yds && xl == yl)
+            bary_sq_fast(zds, zl, xds, xl);
+        else
+            bary_mul1(zds, zl, xds, xl, yds, yl);
+        return;
+    }
+
+    /* normal multiplication when x or y is a sparse bignum */
+    if (bary_sparse_p(xds, xl)) goto normal;
+    if (bary_sparse_p(yds, yl)) {
+        bary_mul1(zds, zl, yds, yl, xds, xl);
+        return;
+    }
+
+    /* balance multiplication by slicing y when x is much smaller than y */
+    if (2 * xl <= yl) {
+        bary_mul_balance(zds, zl, xds, xl, yds, yl);
+        return;
+    }
+
+    if (xl < TOOM3_MUL_DIGITS) {
+        /* multiplication by karatsuba method */
+        bary_mul_karatsuba(zds, zl, xds, xl, yds, yl);
+        return;
+    }
+
+    if (3*xl <= 2*(yl + 2)) {
+        bary_mul_balance(zds, zl, xds, xl, yds, yl);
+        return;
+    }
+
+    {
         VALUE x, y, z;
         x = bignew(xl, 1);
 	MEMCPY(BDIGITS(x), xds, BDIGIT, xl);
         y = bignew(yl, 1);
 	MEMCPY(BDIGITS(y), yds, BDIGIT, yl);
-        z = bigtrunc(bigmul0(x, y));
+        z = bigtrunc(bigmul1_toom3(x, y));
         MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z));
         MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z));
     }
 }
 
-/* balancing multiplication by slicing larger argument */
-static VALUE
-bigmul1_balance(VALUE x, VALUE y)
-{
-    VALUE z;
-    long xn, yn, zn;
-    BDIGIT *xds, *yds, *zds;
-
-    xn = RBIGNUM_LEN(x);
-    yn = RBIGNUM_LEN(y);
-    assert(2 * xn <= yn || 3 * xn <= 2*(yn+2));
-
-    zn = xn + yn;
-    z = bignew(zn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
-
-    xds = BDIGITS(x);
-    yds = BDIGITS(y);
-    zds = BDIGITS(z);
-
-    bary_mul_balance(zds, zn, xds, xn, yds, yn);
-
-    return z;
-}
 
 /* split a bignum into high and low bignums */
 static void
@@ -4054,102 +4302,6 @@ big_split(VALUE v, long n, volatile VALU https://github.com/ruby/ruby/blob/trunk/bignum.c#L4302
     *ph = h;
 }
 
-/* multiplication by karatsuba method */
-static VALUE
-bigmul1_karatsuba(VALUE x, VALUE y)
-{
-    long i, n, xn, yn, t1n, t2n;
-    VALUE xh, xl, yh, yl, z, t1, t2, t3;
-    BDIGIT *zds;
-
-    xn = RBIGNUM_LEN(x);
-    yn = RBIGNUM_LEN(y);
-    n = yn / 2;
-    big_split(x, n, &xh, &xl);
-    if (x == y) {
-	yh = xh; yl = xl;
-    }
-    else big_split(y, n, &yh, &yl);
-
-    /* x = xh * b + xl
-     * y = yh * b + yl
-     *
-     * Karatsuba method:
-     *   x * y = z2 * b^2 + z1 * b + z0
-     *   where
-     *     z2 = xh * yh
-     *     z0 = xl * yl
-     *     z1 = (xh + xl) * (yh + yl) - z2 - z0
-     *
-     *  ref: http://en.wikipedia.org/wiki/Karatsuba_algorithm
-     */
-
-    /* allocate a result bignum */
-    z = bignew(xn + yn, RBIGNUM_SIGN(x)==RBIGNUM_SIGN(y));
-    zds = BDIGITS(z);
-
-    /* t1 <- xh * yh */
-    t1 = bigmul0(xh, yh);
-    t1n = big_real_len(t1);
-
-    /* copy t1 into high bytes of the result (z2) */
-    MEMCPY(zds + 2 * n, BDIGITS(t1), BDIGIT, t1n);
-    for (i = 2 * n + t1n; i < xn + yn; i++) zds[i] = 0;
-
-    if (!BIGZEROP(xl) && !BIGZEROP(yl)) {
-	/* t2 <- xl * yl */
-	t2 = bigmul0(xl, yl);
-	t2n = big_real_len(t2);
-
-	/* copy t2 into low bytes of the result (z0) */
-	MEMCPY(zds, BDIGITS(t2), BDIGIT, t2n);
-	for (i = t2n; i < 2 * n; i++) zds[i] = 0;
-    }
-    else {
-	t2 = Qundef;
-	t2n = 0;
-
-	/* copy 0 into low bytes of the result (z0) */
-	for (i = 0; i < 2 * n; i++) zds[i] = 0;
-    }
-
-    /* xh <- xh + xl */
-    if (RBIGNUM_LEN(xl) > RBIGNUM_LEN(xh)) {
-	t3 = xl; xl = xh; xh = t3;
-    }
-    /* xh has a margin for carry */
-    bigadd_core(BDIGITS(xh), RBIGNUM_LEN(xh),
-		BDIGITS(xl), RBIGNUM_LEN(xl),
-		BDIGITS(xh), RBIGNUM_LEN(xh));
-
-    /* yh <- yh + yl */
-    if (x != y) {
-	if (RBIGNUM_LEN(yl) > RBIGNUM_LEN(yh)) {
-	    t3 = yl; yl = yh; yh = t3;
-	}
-	/* yh has a margin for carry */
-	bigadd_core(BDIGITS(yh), RBIGNUM_LEN(yh),
-		    BDIGITS(yl), RBIGNUM_LEN(yl),
-		    BDIGITS(yh), RBIGNUM_LEN(yh));
-    }
-    else yh = xh;
-
-    /* t3 <- xh * yh */
-    t3 = bigmul0(xh, yh);
-
-    i = xn + yn - n;
-    /* subtract t1 from t3 */
-    bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t1), t1n, BDIGITS(t3), big_real_len(t3));
-
-    /* subtract t2 from t3; t3 is now the middle term of the product */
-    if (t2 != Qundef) bigsub_core(BDIGITS(t3), big_real_len(t3), BDIGITS(t2), t2n, BDIGITS(t3), big_real_len(t3));
-
-    /* add t3 to middle bytes of the result (z1) */
-    bigadd_core(zds + n, i, BDIGITS(t3), big_real_len(t3), zds + n, i);
-
-    return z;
-}
-
 static void
 biglsh_bang(BDIGIT *xds, long xn, unsigned long shift)
 {
@@ -4421,70 +4573,41 @@ bary_sq_fast(BDIGIT *zds, size_t zn, BDI https://github.com/ruby/ruby/blob/trunk/bignum.c#L4573
     }
 }
 
-static VAL (... truncated)

--
ML: ruby-changes@q...
Info: http://www.atdot.net/~ko1/quickml/

[前][次][番号順一覧][スレッド一覧]