;; Toom-Cook 3-way multiplication. Used for bignums large enough to ;; make it faster than Karatsuba (a.k.a. Toom-Cook 2-way). Ideally, ;; beyond a given size we'd then switch to FFT-based multiplication. ;; This follows the naming of Algorithm 1.4 from [MCA, 1.3.3] ;; Currently unused because the implementation is long-winded and ;; doesn't provide as much speedup to make it worth it. (define (@bignum-2-times-toom-cook-3 a b) (let* ((rs (fx* (##core#inline "C_u_i_integer_signum" a) (##core#inline "C_u_i_integer_signum" b))) (same? (eqv? a b)) (a ((##core#primitive "C_u_integer_abs") a)) (n (bignum-digit-count b)) (1/3n (fx/ (fx+ n 2) 3)) (2/3n (fxshl 1/3n 1)) (bits (fx* 1/3n (foreign-value "C_BIGNUM_DIGIT_LENGTH" int))) (a2 (%extract-digits a 2/3n #f)) (a1 (%extract-digits a 1/3n 2/3n)) (a0 (%extract-digits a 0 1/3n))) (if same? ; This looks pointless, but reduces garbage (let* ((v0 (%* a0 a0)) (a02 (%+ a0 a2)) (a02+a1 (%+ a02 a1)) (v1 (%* a02+a1 a02+a1)) (a02-a1 (%- a02 a1)) (v-1 (%* a02-a1 a02-a1)) (v2^ (%+ (%+ a0 (%* a1 2)) (%* a2 4))) (v2 (%* v2^ v2^)) (voo (%* a2 a2)) ;; MCA says this is equivalent to shifting by 1 and ;; dividing by 3. However, dividing by 6 is faster. (t1 (%- ((##core#primitive "C_u_integer_quotient") 'toom-cook-3 (%+ (%* 3 v0) (%+ (%* 2 v-1) v2)) 6) (%* 2 voo))) (t2 (arithmetic-shift (%+ v1 v-1) -1)) (c0 v0) (c1 (%- v1 t1)) (c2 (%- (%- t2 v0) voo)) (c3 (%- t1 t2)) (c4 voo)) (%+ (arithmetic-shift c4 (fx* bits 4)) (%+ (arithmetic-shift c3 (fx* bits 3)) (%+ (arithmetic-shift c2 (fx* bits 2)) (%+ (arithmetic-shift c1 (fx* bits 1)) c0))))) (let* ((b ((##core#primitive "C_u_integer_abs") b)) (b2 (%extract-digits b 2/3n #f)) (b1 (%extract-digits b 1/3n 2/3n)) (b0 (%extract-digits b 0 1/3n)) (v0 (%* a0 b0)) (a02 (%+ a0 a2)) (b02 (%+ b0 b2)) (v1 (%* (%+ a02 a1) (%+ b02 b1))) (v-1 (%* (%- a02 a1) (%- b02 b1))) (v2 (%* (%+ (%+ a0 (%* a1 2)) (%* a2 4)) (%+ (%+ b0 (%* b1 2)) (%* b2 4)))) (voo (%* a2 b2)) (t1 (%- ((##core#primitive "C_u_integer_quotient") 'toom-cook-3 (%+ (%* 3 v0) (%+ (%* 2 v-1) v2)) 6) (%* 2 voo))) (t2 (arithmetic-shift (%+ v1 v-1) -1)) (c0 v0) (c1 (%- v1 t1)) (c2 (%- (%- t2 v0) voo)) (c3 (%- t1 t2)) (c4 voo)) (%* rs (%+ (arithmetic-shift c4 (fx* bits 4)) (%+ (arithmetic-shift c3 (fx* bits 3)) (%+ (arithmetic-shift c2 (fx* bits 2)) (%+ (arithmetic-shift c1 (fx* bits 1)) c0)))))))))