s390x: support CPACF sha3/shake performance improvements

On newer machines the SHA3/SHAKE performance of CPACF instructions KIMD and KLMD
can be enhanced by using additional modifier bits. This allows the application
to omit initializing the ICV, but also affects the internal processing of the
instructions. Performance is mostly gained when processing short messages.

The new CPACF feature is backwards compatible with older machines, i.e. the new
modifier bits are ignored on older machines. However, to save the ICV
initialization, the application must detect the MSA level and omit the ICV
initialization only if this feature is supported.

Signed-off-by: Joerg Schmidbauer <jschmidb@de.ibm.com>

Reviewed-by: Paul Dale <ppzgs1@gmail.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/25235)
This commit is contained in:
Joerg Schmidbauer 2024-02-29 12:50:05 +01:00 committed by Tomas Mraz
parent 6772c2ab1b
commit 25f5d7b85f
4 changed files with 34 additions and 9 deletions

View File

@ -191,6 +191,9 @@ extern int OPENSSL_s390xcex;
# define S390X_KMA_LAAD 0x200
# define S390X_KMA_HS 0x400
# define S390X_KDSA_D 0x80
# define S390X_KIMD_NIP 0x8000
# define S390X_KLMD_DUFOP 0x4000
# define S390X_KLMD_NIP 0x8000
# define S390X_KLMD_PS 0x100
# define S390X_KMAC_IKP 0x8000
# define S390X_KMAC_IIMP 0x4000

View File

@ -308,7 +308,7 @@ s390x_kimd:
llgfr %r0,$fc
lgr %r1,$param
.long 0xb93e0002 # kimd %r0,%r2
.long 0xb93e8002 # kimd %r0,%r2[,M3]
brc 1,.-4 # pay attention to "partial completion"
br $ra
@ -329,7 +329,7 @@ s390x_klmd:
llgfr %r0,$fc
l${g} %r1,$stdframe($sp)
.long 0xb93f0042 # klmd %r4,%r2
.long 0xb93f8042 # klmd %r4,%r2[,M3]
brc 1,.-4 # pay attention to "partial completion"
br $ra

View File

@ -8,13 +8,19 @@
*/
#include <string.h>
#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ)
# include "crypto/s390x_arch.h"
#endif
#include "internal/sha3.h"
void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next);
void ossl_sha3_reset(KECCAK1600_CTX *ctx)
{
memset(ctx->A, 0, sizeof(ctx->A));
#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ)
if (!(OPENSSL_s390xcap_P.stfle[1] & S390X_CAPBIT(S390X_MSA12)))
#endif
memset(ctx->A, 0, sizeof(ctx->A));
ctx->bufsz = 0;
ctx->xof_state = XOF_STATE_INIT;
}

View File

@ -193,26 +193,32 @@ static size_t s390x_sha3_absorb(void *vctx, const void *inp, size_t len)
{
KECCAK1600_CTX *ctx = vctx;
size_t rem = len % ctx->block_size;
unsigned int fc;
if (!(ctx->xof_state == XOF_STATE_INIT ||
ctx->xof_state == XOF_STATE_ABSORB))
return 0;
fc = ctx->pad;
fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
ctx->xof_state = XOF_STATE_ABSORB;
s390x_kimd(inp, len - rem, ctx->pad, ctx->A);
s390x_kimd(inp, len - rem, fc, ctx->A);
return rem;
}
static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen)
{
KECCAK1600_CTX *ctx = vctx;
unsigned int fc;
if (!ossl_prov_is_running())
return 0;
if (!(ctx->xof_state == XOF_STATE_INIT ||
ctx->xof_state == XOF_STATE_ABSORB))
return 0;
fc = ctx->pad | S390X_KLMD_DUFOP;
fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
ctx->xof_state = XOF_STATE_FINAL;
s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, ctx->pad, ctx->A);
s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, fc, ctx->A);
memcpy(out, ctx->A, outlen);
return 1;
}
@ -220,14 +226,17 @@ static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen)
static int s390x_shake_final(void *vctx, unsigned char *out, size_t outlen)
{
KECCAK1600_CTX *ctx = vctx;
unsigned int fc;
if (!ossl_prov_is_running())
return 0;
if (!(ctx->xof_state == XOF_STATE_INIT ||
ctx->xof_state == XOF_STATE_ABSORB))
return 0;
fc = ctx->pad | S390X_KLMD_DUFOP;
fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0;
ctx->xof_state = XOF_STATE_FINAL;
s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, ctx->pad, ctx->A);
s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A);
return 1;
}
@ -277,24 +286,28 @@ static int s390x_keccakc_final(void *vctx, unsigned char *out, size_t outlen,
size_t bsz = ctx->block_size;
size_t num = ctx->bufsz;
size_t needed = outlen;
unsigned int fc;
if (!ossl_prov_is_running())
return 0;
if (!(ctx->xof_state == XOF_STATE_INIT ||
ctx->xof_state == XOF_STATE_ABSORB))
return 0;
fc = ctx->pad;
fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
ctx->xof_state = XOF_STATE_FINAL;
if (outlen == 0)
return 1;
memset(ctx->buf + num, 0, bsz - num);
ctx->buf[num] = padding;
ctx->buf[bsz - 1] |= 0x80;
s390x_kimd(ctx->buf, bsz, ctx->pad, ctx->A);
s390x_kimd(ctx->buf, bsz, fc, ctx->A);
num = needed > bsz ? bsz : needed;
memcpy(out, ctx->A, num);
needed -= num;
if (needed > 0)
s390x_klmd(NULL, 0, out + bsz, needed, ctx->pad | S390X_KLMD_PS, ctx->A);
s390x_klmd(NULL, 0, out + bsz, needed,
ctx->pad | S390X_KLMD_PS | S390X_KLMD_DUFOP, ctx->A);
return 1;
}
@ -314,6 +327,7 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen,
{
KECCAK1600_CTX *ctx = vctx;
size_t len;
unsigned int fc;
if (!ossl_prov_is_running())
return 0;
@ -329,7 +343,9 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen,
memset(ctx->buf + ctx->bufsz, 0, len);
ctx->buf[ctx->bufsz] = padding;
ctx->buf[ctx->block_size - 1] |= 0x80;
s390x_kimd(ctx->buf, ctx->block_size, ctx->pad, ctx->A);
fc = ctx->pad;
fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0;
s390x_kimd(ctx->buf, ctx->block_size, fc, ctx->A);
ctx->bufsz = 0;
/* reuse ctx->bufsz to count bytes squeezed from current sponge */
}