Skip to content

Commit c3fe1be

Browse files
committed
lift comba limit for s_mp_sqr_comba
1 parent c421004 commit c3fe1be

13 files changed

+36
-120
lines changed

demo/test.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,7 +1929,7 @@ static int test_s_mp_sqr_karatsuba(void)
19291929
for (size = MP_SQR_KARATSUBA_CUTOFF; size < (MP_SQR_KARATSUBA_CUTOFF + 20); size++) {
19301930
DO(mp_rand(&a, size));
19311931
DO(s_mp_sqr_karatsuba(&a, &b));
1932-
DO(s_mp_sqr(&a, &c));
1932+
DO(s_mp_sqr_comba(&a, &c));
19331933
if (mp_cmp(&b, &c) != MP_EQ) {
19341934
fprintf(stderr, "Karatsuba squaring failed at size %d\n", size);
19351935
goto LBL_ERR;
@@ -2002,7 +2002,7 @@ static int test_s_mp_sqr_toom(void)
20022002
for (size = MP_SQR_TOOM_CUTOFF; size < (MP_SQR_TOOM_CUTOFF + 20); size++) {
20032003
DO(mp_rand(&a, size));
20042004
DO(s_mp_sqr_toom(&a, &b));
2005-
DO(s_mp_sqr(&a, &c));
2005+
DO(s_mp_sqr_comba(&a, &c));
20062006
if (mp_cmp(&b, &c) != MP_EQ) {
20072007
fprintf(stderr, "Toom-Cook 3-way squaring failed at size %d\n", size);
20082008
goto LBL_ERR;

etc/tune.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static uint64_t s_time_sqr(int size)
133133
goto LBL_ERR;
134134
}
135135
if (s_check_result == 1) {
136-
if ((e = s_mp_sqr(&a,&c)) != MP_OKAY) {
136+
if ((e = s_mp_sqr_comba(&a,&c)) != MP_OKAY) {
137137
t1 = UINT64_MAX;
138138
goto LBL_ERR;
139139
}

libtommath_VS2008.vcproj

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -912,10 +912,6 @@
912912
RelativePath="s_mp_rand_platform.c"
913913
>
914914
</File>
915-
<File
916-
RelativePath="s_mp_sqr.c"
917-
>
918-
</File>
919915
<File
920916
RelativePath="s_mp_sqr_comba.c"
921917
>

makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_expt
4848
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
4949
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
5050
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
51-
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
52-
s_mp_zero_buf.o s_mp_zero_digs.o
51+
s_mp_rand_platform.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o \
52+
s_mp_zero_digs.o
5353

5454
#END_INS
5555

makefile.mingw

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_expt
5050
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
5151
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
5252
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
53-
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
54-
s_mp_zero_buf.o s_mp_zero_digs.o
53+
s_mp_rand_platform.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o \
54+
s_mp_zero_digs.o
5555

5656
HEADERS_PUB=tommath.h
5757
HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB)

makefile.msvc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ s_mp_div_recursive.obj s_mp_div_school.obj s_mp_div_small.obj s_mp_exptmod.obj s
4343
s_mp_invmod.obj s_mp_invmod_odd.obj s_mp_log.obj s_mp_log_d.obj s_mp_log_pow2.obj s_mp_montgomery_reduce_comba.obj \
4444
s_mp_mul_balance.obj s_mp_mul_comba.obj s_mp_mul_high.obj s_mp_mul_high_comba.obj s_mp_mul_karatsuba.obj \
4545
s_mp_mul_toom.obj s_mp_prime_is_divisible.obj s_mp_prime_tab.obj s_mp_radix_map.obj s_mp_rand_jenkins.obj \
46-
s_mp_rand_platform.obj s_mp_sqr.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj \
47-
s_mp_zero_buf.obj s_mp_zero_digs.obj
46+
s_mp_rand_platform.obj s_mp_sqr_comba.obj s_mp_sqr_karatsuba.obj s_mp_sqr_toom.obj s_mp_sub.obj s_mp_zero_buf.obj \
47+
s_mp_zero_digs.obj
4848

4949
HEADERS_PUB=tommath.h
5050
HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB)

makefile.shared

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_expt
4545
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
4646
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
4747
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
48-
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
49-
s_mp_zero_buf.o s_mp_zero_digs.o
48+
s_mp_rand_platform.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o \
49+
s_mp_zero_digs.o
5050

5151
#END_INS
5252

makefile.unix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_expt
5151
s_mp_invmod.o s_mp_invmod_odd.o s_mp_log.o s_mp_log_d.o s_mp_log_pow2.o s_mp_montgomery_reduce_comba.o \
5252
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
5353
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o s_mp_rand_jenkins.o \
54-
s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o \
55-
s_mp_zero_buf.o s_mp_zero_digs.o
54+
s_mp_rand_platform.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o \
55+
s_mp_zero_digs.o
5656

5757
HEADERS_PUB=tommath.h
5858
HEADERS=tommath_private.h tommath_class.h tommath_superclass.h tommath_cutoffs.h $(HEADERS_PUB)

mp_mul.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,8 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
2121
(a->used >= MP_SQR_KARATSUBA_CUTOFF)) {
2222
err = s_mp_sqr_karatsuba(a, c);
2323
} else if ((a == b) &&
24-
MP_HAS(S_MP_SQR_COMBA) &&
25-
(a->used < (MP_MAX_COMBA / 2))) {
24+
MP_HAS(S_MP_SQR_COMBA)) {
2625
err = s_mp_sqr_comba(a, c);
27-
} else if ((a == b) &&
28-
MP_HAS(S_MP_SQR)) {
29-
err = s_mp_sqr(a, c);
3026
} else if (MP_HAS(S_MP_MUL_BALANCE) &&
3127
/* Check sizes. The smaller one needs to be larger than the Karatsuba cut-off.
3228
* The bigger one needs to be at least about one MP_MUL_KARATSUBA_CUTOFF bigger

s_mp_sqr.c

Lines changed: 0 additions & 76 deletions
This file was deleted.

s_mp_sqr_comba.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ After that loop you do the squares and add them in.
1616
mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b)
1717
{
1818
int oldused, pa, ix;
19-
mp_word W1;
19+
mp_digit c0, c1, c2;
2020
mp_err err;
2121
mp_int tmp, *b_;
2222

@@ -31,10 +31,9 @@ mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b)
3131
}
3232

3333
/* number of output digits to produce */
34-
W1 = 0;
34+
c0 = c1 = c2 = 0;
3535
for (ix = 0; ix < pa; ix++) {
3636
int tx, ty, iy, iz;
37-
mp_word W = 0;
3837

3938
/* get offsets into the two bignums */
4039
ty = MP_MIN(a->used-1, ix);
@@ -53,22 +52,33 @@ mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b)
5352

5453
/* execute loop */
5554
for (iz = 0; iz < iy; iz++) {
56-
W += (mp_word)a->dp[tx + iz] * (mp_word)a->dp[ty - iz];
55+
mp_word t = (mp_word)a->dp[tx + iz] * (mp_word)a->dp[ty - iz];
56+
int j;
57+
for (j = 0; j < 2; ++j) {
58+
mp_word w = (mp_word)c0 + t;
59+
c0 = (mp_digit)(w & MP_MASK);
60+
w = (mp_word)c1 + (w >> MP_DIGIT_BIT);
61+
c1 = (mp_digit)(w & MP_MASK);
62+
c2 += (mp_digit)(w >> MP_DIGIT_BIT);
63+
}
5764
}
5865

59-
/* double the inner product and add carry */
60-
W = W + W + W1;
61-
6266
/* even columns have the square term in them */
6367
if (((unsigned)ix & 1u) == 0u) {
64-
W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
68+
mp_word w = (mp_word)c0 + ((mp_word)a->dp[ix / 2] * (mp_word)a->dp[ix / 2]);
69+
c0 = (mp_digit)(w & MP_MASK);
70+
w = (mp_word)c1 + (w >> MP_DIGIT_BIT);
71+
c1 = (mp_digit)(w & MP_MASK);
72+
c2 += (mp_digit)(w >> MP_DIGIT_BIT);
6573
}
6674

67-
/* store it */
68-
b_->dp[ix] = (mp_digit)W & MP_MASK;
75+
/* store term */
76+
b_->dp[ix] = c0;
6977

7078
/* make next carry */
71-
W1 = W >> (mp_word)MP_DIGIT_BIT;
79+
c0 = c1;
80+
c1 = c2;
81+
c2 = 0;
7282
}
7383

7484
/* setup dest */

tommath_class.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@
161161
# define S_MP_RADIX_MAP_C
162162
# define S_MP_RAND_JENKINS_C
163163
# define S_MP_RAND_PLATFORM_C
164-
# define S_MP_SQR_C
165164
# define S_MP_SQR_COMBA_C
166165
# define S_MP_SQR_KARATSUBA_C
167166
# define S_MP_SQR_TOOM_C
@@ -544,7 +543,6 @@
544543
# define S_MP_MUL_COMBA_C
545544
# define S_MP_MUL_KARATSUBA_C
546545
# define S_MP_MUL_TOOM_C
547-
# define S_MP_SQR_C
548546
# define S_MP_SQR_COMBA_C
549547
# define S_MP_SQR_KARATSUBA_C
550548
# define S_MP_SQR_TOOM_C
@@ -1204,14 +1202,6 @@
12041202
#if defined(S_MP_RAND_PLATFORM_C)
12051203
#endif
12061204

1207-
#if defined(S_MP_SQR_C)
1208-
# define MP_CLAMP_C
1209-
# define MP_CLEAR_C
1210-
# define MP_GROW_C
1211-
# define MP_INIT_SIZE_C
1212-
# define S_MP_ZERO_DIGS_C
1213-
#endif
1214-
12151205
#if defined(S_MP_SQR_COMBA_C)
12161206
# define MP_CLAMP_C
12171207
# define MP_CLEAR_C

tommath_private.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ extern void MP_FREE(void *mem, size_t size);
129129

130130
#define MP_SIZEOF_BITS(type) ((size_t)CHAR_BIT * sizeof(type))
131131

132+
/* TODO: Remove MP_MAX_COMBA and MP_WARRAY */
132133
#define MP_MAX_COMBA (int)(1uL << (MP_SIZEOF_BITS(mp_word) - (2u * (size_t)MP_DIGIT_BIT)))
133134
#define MP_WARRAY (int)(1uL << ((MP_SIZEOF_BITS(mp_word) - (2u * (size_t)MP_DIGIT_BIT)) + 1u))
134135

@@ -183,7 +184,6 @@ MP_PRIVATE mp_err s_mp_mul_karatsuba(const mp_int *a, const mp_int *b, mp_int *c
183184
MP_PRIVATE mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
184185
MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) MP_WUR;
185186
MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR;
186-
MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
187187
MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR;
188188
MP_PRIVATE mp_err s_mp_sqr_karatsuba(const mp_int *a, mp_int *b) MP_WUR;
189189
MP_PRIVATE mp_err s_mp_sqr_toom(const mp_int *a, mp_int *b) MP_WUR;

0 commit comments

Comments
 (0)