Skip to content

Commit 3223b87

Browse files
authored
Merge pull request #709 from MarekKnapek/sha-stack
Smaller stack usage for SHA-1, SHA-256 and SHA-512.
2 parents d0e0909 + cc53195 commit 3223b87

9 files changed

Lines changed: 242 additions & 106 deletions

File tree

.github/workflows/main.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,12 @@ jobs:
4747
- { BUILDNAME: 'STOCK', BUILDOPTIONS: '', BUILDSCRIPT: '.ci/run.sh' }
4848
- { BUILDNAME: 'STOCK-MPI', BUILDOPTIONS: '-ULTM_DESC -UTFM_DESC -UUSE_LTM -UUSE_TFM', BUILDSCRIPT: '.ci/run.sh' }
4949
- { BUILDNAME: 'EASY', BUILDOPTIONS: '-DLTC_EASY', BUILDSCRIPT: '.ci/run.sh' }
50-
- { BUILDNAME: 'SMALL', BUILDOPTIONS: '-DLTC_SMALL_CODE', BUILDSCRIPT: '.ci/run.sh' }
50+
- { BUILDNAME: 'SMALL_CODE', BUILDOPTIONS: '-DLTC_SMALL_CODE', BUILDSCRIPT: '.ci/run.sh' }
51+
- { BUILDNAME: 'SMALL_STACK', BUILDOPTIONS: '-DLTC_SMALL_STACK', BUILDSCRIPT: '.ci/run.sh' }
52+
- { BUILDNAME: 'SMALL', BUILDOPTIONS: '-DLTC_SMALL_CODE -DLTC_SMALL_STACK', BUILDSCRIPT: '.ci/run.sh' }
5153
- { BUILDNAME: 'NO_TABLES', BUILDOPTIONS: '-DLTC_NO_TABLES', BUILDSCRIPT: '.ci/run.sh' }
5254
- { BUILDNAME: 'NO_FAST', BUILDOPTIONS: '-DLTC_NO_FAST', BUILDSCRIPT: '.ci/run.sh' }
53-
- { BUILDNAME: 'NO_FAST+SMALL+NO_TABLES', BUILDOPTIONS: '-DLTC_NO_FAST -DLTC_SMALL_CODE -DLTC_NO_TABLES', BUILDSCRIPT: '.ci/run.sh' }
55+
- { BUILDNAME: 'NO_FAST+SMALL+NO_TABLES', BUILDOPTIONS: '-DLTC_NO_FAST -DLTC_SMALL_CODE -DLTC_SMALL_STACK -DLTC_NO_TABLES', BUILDSCRIPT: '.ci/run.sh' }
5456
- { BUILDNAME: 'NO_ASM', BUILDOPTIONS: '-DLTC_NO_ASM', BUILDSCRIPT: '.ci/run.sh' }
5557
- { BUILDNAME: 'NO_DEPRECATED_APIS', BUILDOPTIONS: '-DLTC_NO_DEPRECATED_APIS', BUILDSCRIPT: '.ci/run.sh' }
5658
- { BUILDNAME: 'NO_TIMING_RESISTANCE', BUILDOPTIONS: '-DLTC_NO_ECC_TIMING_RESISTANT -DLTC_NO_RSA_BLINDING', BUILDSCRIPT: '.ci/run.sh' }

demos/timing.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ static prng_state yarrow_prng;
1414
#define KTIMES 25
1515
#define TIMES 100000
1616

17+
static const char *filter_arg;
18+
1719
static struct list {
1820
int id;
1921
ulong64 spd1, spd2, avg;
@@ -56,7 +58,7 @@ static void tally_results(int type)
5658
}
5759

5860
/* RDTSC from Scott Duplichan */
59-
static ulong64 rdtsc (void)
61+
static LTC_INLINE ulong64 rdtsc (void)
6062
{
6163
#if defined __GNUC__ && !defined(LTC_NO_ASM)
6264
#if defined(__i386__) || defined(__x86_64__)
@@ -111,12 +113,12 @@ static ulong64 rdtsc (void)
111113

112114
static ulong64 timer, skew = 0;
113115

114-
static void t_start(void)
116+
static LTC_INLINE void t_start(void)
115117
{
116118
timer = rdtsc();
117119
}
118120

119-
static ulong64 t_read(void)
121+
static LTC_INLINE ulong64 t_read(void)
120122
{
121123
return rdtsc() - timer;
122124
}
@@ -470,20 +472,27 @@ static void time_cipher_lrw(void) { fprintf(stderr, "NO LRW\n"); }
470472

471473
static void time_hash(void)
472474
{
473-
unsigned long x, y1, len;
475+
unsigned long x, y1, len = 1024;
474476
ulong64 t1, t2, c1, c2;
475477
hash_state md;
476478
int (*func)(hash_state *, const unsigned char *, unsigned long), err;
477-
unsigned char pt[MAXBLOCKSIZE] = { 0 };
478-
479+
unsigned char *pt = XMALLOC(len);
480+
if (pt == NULL) {
481+
fprintf(stderr, "\n\nout of heap yo\n\n");
482+
exit(EXIT_FAILURE);
483+
}
479484

480485
fprintf(stderr, "\n\nHASH Time Trials for:\n");
481486
no_results = 0;
482487
for (x = 0; hash_descriptor[x].name != NULL; x++) {
483488

489+
if (filter_arg && strstr(hash_descriptor[x].name, filter_arg) == NULL)
490+
continue;
491+
484492
/* sanity check on hash */
485493
if ((err = hash_descriptor[x].test()) != CRYPT_OK) {
486494
fprintf(stderr, "\n\nERROR: Hash %s failed self-test %s\n", hash_descriptor[x].name, error_to_string(err));
495+
XFREE(pt);
487496
exit(EXIT_FAILURE);
488497
}
489498

@@ -493,7 +502,6 @@ static void time_hash(void)
493502
#define DO2 DO1 DO1
494503

495504
func = hash_descriptor[x].process;
496-
len = hash_descriptor[x].blocksize;
497505

498506
c1 = c2 = (ulong64)-1;
499507
for (y1 = 0; y1 < TIMES; y1++) {
@@ -515,6 +523,7 @@ static void time_hash(void)
515523
#undef DO1
516524
}
517525
tally_results(2);
526+
XFREE(pt);
518527
}
519528

520529
/*#warning you need an mp_rand!!!*/
@@ -1368,12 +1377,15 @@ static void LTC_NORETURN die(int status)
13681377
{
13691378
FILE* o = status == EXIT_SUCCESS ? stdout : stderr;
13701379
fprintf(o,
1371-
"Usage: timing [<-h|-l|alg>] [mpi]\n\n"
1380+
"Usage: timing [<-h|-l|alg>] [mpi] [filter]\n\n"
13721381
"Run timing tests of all built-in algorithms, or only the one given in <alg>.\n\n"
1373-
"\talg\tThe algorithm to test. Use the '-l' option to check for valid values.\n"
1382+
"\talg\tThe algorithms to test. Use the '-l' option to check for valid values.\n"
13741383
"\tmpi\tThe MPI provider to use.\n"
1384+
"\tfilter\tFilter within the algorithm class (currently only for 'hash'es).\n"
13751385
"\t-l\tList all built-in algorithms that can be timed.\n"
1376-
"\t-h\tThe help you're looking at.\n"
1386+
"\t-h\tThe help you're looking at.\n\n"
1387+
"Examples:\n"
1388+
"\ttiming hash sha\t\tWill run the timing demo for all hashes containing 'sha' in their name\n"
13771389
);
13781390
exit(status);
13791391
}
@@ -1440,6 +1452,9 @@ register_all_prngs();
14401452

14411453
if (crypt_mp_init(mpi_provider) != CRYPT_OK) {
14421454
fprintf(stderr, "Init of MPI provider \"%s\" failed\n", mpi_provider ? mpi_provider : "(null)");
1455+
filter_arg = mpi_provider;
1456+
} else if (argc > 3){
1457+
filter_arg = argv[3];
14431458
}
14441459

14451460
if ((err = rng_make_prng(128, find_prng("yarrow"), &yarrow_prng, NULL)) != CRYPT_OK) {

doc/crypt.tex

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9124,6 +9124,12 @@ \subsection{LTC\_SMALL\_CODE}
91249124
When this is defined some of the code such as the Rijndael and SAFER+ ciphers are replaced with smaller code variants.
91259125
These variants are slower but can save quite a bit of code space.
91269126

9127+
\subsection{LTC\_SMALL\_STACK}
9128+
When this is defined some of the code uses a variant which results in smaller stack sizes.
9129+
Depending on the architecture and other configuration options the results of execution speeed can vary.
9130+
Therefore we try to enable this automatically where it brings an advantage in speed.
9131+
In case you always want smaller stack usage, no matter if it makes the execution slower, you should enable this.
9132+
91279133
\subsection{LTC\_PTHREAD}
91289134
When this is activated all of the descriptor table functions will use pthread locking to ensure thread safe updates to the tables. Note that
91299135
it doesn't prevent a thread that is passively using a table from being messed up by another thread that updates the table.

src/hashes/sha1.c

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,20 @@
44

55
/**
66
@file sha1.c
7-
LTC_SHA1 code by Tom St Denis
7+
SHA1 code by Tom St Denis
88
*/
99

1010

1111
#ifdef LTC_SHA1
1212

13+
/* While implementing the SMALL STACK option in https://github.com/libtom/libtomcrypt/pull/709
14+
* we came to the conclusion that SHA1 profits from the SMALL STACK option when the SMALL CODE
15+
* option is enabled, so let's do that.
16+
*/
17+
#if defined(LTC_SMALL_STACK) || defined(LTC_SMALL_CODE)
18+
#define LTC_SMALL_STACK_SHA1
19+
#endif
20+
1321
const struct ltc_hash_descriptor sha1_desc =
1422
{
1523
"sha1",
@@ -39,7 +47,12 @@ static int ss_sha1_compress(hash_state *md, const unsigned char *buf)
3947
static int s_sha1_compress(hash_state *md, const unsigned char *buf)
4048
#endif
4149
{
42-
ulong32 a,b,c,d,e,W[80],i;
50+
ulong32 a,b,c,d,e,i;
51+
#ifdef LTC_SMALL_STACK_SHA1
52+
ulong32 W[16];
53+
#else
54+
ulong32 W[80];
55+
#endif
4356
#ifdef LTC_SMALL_CODE
4457
ulong32 t;
4558
#endif
@@ -56,78 +69,95 @@ static int s_sha1_compress(hash_state *md, const unsigned char *buf)
5669
d = md->sha1.state[3];
5770
e = md->sha1.state[4];
5871

72+
#ifdef LTC_SMALL_STACK_SHA1
73+
#define Wi(i) do { W[(i) % 16] = ROL(W[((i) - 3) % 16] ^ W[((i) - 8) % 16] ^ W[((i) - 14) % 16] ^ W[((i) - 16) % 16], 1); } while(0)
74+
#define Windex(i) ((i) % 16)
75+
#else
76+
#define Wi(i) do { } while(0)
77+
#define Windex(i) (i)
5978
/* expand it */
6079
for (i = 16; i < 80; i++) {
6180
W[i] = ROL(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
6281
}
82+
#endif
6383

6484
/* compress */
6585
/* round one */
66-
#define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[i] + 0x5a827999UL); b = ROLc(b, 30);
67-
#define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[i] + 0x6ed9eba1UL); b = ROLc(b, 30);
68-
#define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[i] + 0x8f1bbcdcUL); b = ROLc(b, 30);
69-
#define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[i] + 0xca62c1d6UL); b = ROLc(b, 30);
86+
#define FF0(a,b,c,d,e,i) e = (ROLc(a, 5) + F0(b,c,d) + e + W[Windex(i)] + 0x5a827999UL); b = ROLc(b, 30);
87+
#define FF1(a,b,c,d,e,i) e = (ROLc(a, 5) + F1(b,c,d) + e + W[Windex(i)] + 0x6ed9eba1UL); b = ROLc(b, 30);
88+
#define FF2(a,b,c,d,e,i) e = (ROLc(a, 5) + F2(b,c,d) + e + W[Windex(i)] + 0x8f1bbcdcUL); b = ROLc(b, 30);
89+
#define FF3(a,b,c,d,e,i) e = (ROLc(a, 5) + F3(b,c,d) + e + W[Windex(i)] + 0xca62c1d6UL); b = ROLc(b, 30);
7090

7191
#ifdef LTC_SMALL_CODE
7292

73-
for (i = 0; i < 20; ) {
93+
for (i = 0; i < 16; ) {
7494
FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
7595
}
96+
for (; i < 20; ) {
97+
Wi(i); FF0(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
98+
}
7699

77100
for (; i < 40; ) {
78-
FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
101+
Wi(i); FF1(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
79102
}
80103

81104
for (; i < 60; ) {
82-
FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
105+
Wi(i); FF2(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
83106
}
84107

85108
for (; i < 80; ) {
86-
FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
109+
Wi(i); FF3(a,b,c,d,e,i++); t = e; e = d; d = c; c = b; b = a; a = t;
87110
}
88111

89112
#else
90113

91-
for (i = 0; i < 20; ) {
114+
for (i = 0; i < 15; ) {
92115
FF0(a,b,c,d,e,i++);
93116
FF0(e,a,b,c,d,i++);
94117
FF0(d,e,a,b,c,i++);
95118
FF0(c,d,e,a,b,i++);
96119
FF0(b,c,d,e,a,i++);
97120
}
121+
FF0(a,b,c,d,e,i++);
122+
Wi(i); FF0(e,a,b,c,d,i++);
123+
Wi(i); FF0(d,e,a,b,c,i++);
124+
Wi(i); FF0(c,d,e,a,b,i++);
125+
Wi(i); FF0(b,c,d,e,a,i++);
98126

99127
/* round two */
100128
for (; i < 40; ) {
101-
FF1(a,b,c,d,e,i++);
102-
FF1(e,a,b,c,d,i++);
103-
FF1(d,e,a,b,c,i++);
104-
FF1(c,d,e,a,b,i++);
105-
FF1(b,c,d,e,a,i++);
129+
Wi(i); FF1(a,b,c,d,e,i++);
130+
Wi(i); FF1(e,a,b,c,d,i++);
131+
Wi(i); FF1(d,e,a,b,c,i++);
132+
Wi(i); FF1(c,d,e,a,b,i++);
133+
Wi(i); FF1(b,c,d,e,a,i++);
106134
}
107135

108136
/* round three */
109137
for (; i < 60; ) {
110-
FF2(a,b,c,d,e,i++);
111-
FF2(e,a,b,c,d,i++);
112-
FF2(d,e,a,b,c,i++);
113-
FF2(c,d,e,a,b,i++);
114-
FF2(b,c,d,e,a,i++);
138+
Wi(i); FF2(a,b,c,d,e,i++);
139+
Wi(i); FF2(e,a,b,c,d,i++);
140+
Wi(i); FF2(d,e,a,b,c,i++);
141+
Wi(i); FF2(c,d,e,a,b,i++);
142+
Wi(i); FF2(b,c,d,e,a,i++);
115143
}
116144

117145
/* round four */
118146
for (; i < 80; ) {
119-
FF3(a,b,c,d,e,i++);
120-
FF3(e,a,b,c,d,i++);
121-
FF3(d,e,a,b,c,i++);
122-
FF3(c,d,e,a,b,i++);
123-
FF3(b,c,d,e,a,i++);
147+
Wi(i); FF3(a,b,c,d,e,i++);
148+
Wi(i); FF3(e,a,b,c,d,i++);
149+
Wi(i); FF3(d,e,a,b,c,i++);
150+
Wi(i); FF3(c,d,e,a,b,i++);
151+
Wi(i); FF3(b,c,d,e,a,i++);
124152
}
125153
#endif
126154

127155
#undef FF0
128156
#undef FF1
129157
#undef FF2
130158
#undef FF3
159+
#undef Wi
160+
#undef Windex
131161

132162
/* store */
133163
md->sha1.state[0] = md->sha1.state[0] + a;

0 commit comments

Comments
 (0)