AES crypto performance improvements

Stefan Metzmacher metze at samba.org
Thu Aug 13 21:22:17 UTC 2015


Hi,

while debugging poor performance of our SMB3 crypto
I developed the following performance improvements.

The aes_block_xor() improved by a factor of ~10.
The aes_block_[r|l]shift() functions improved by 25%.

Please review and push.

The real solution will be to use hardware support,
I've extended the work from Simo in the following branch
https://git.samba.org/?p=metze/samba/wip.git;a=shortlog;h=refs/heads/master3-smb-crypto
(ignore the unrelated and directly reverted commits on top)

But I'm not that happy with the abstraction yet.

metze
-------------- next part --------------
From 78170df1e630d1400b9a080a10e92becf3e70c9e Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:09:24 +0200
Subject: [PATCH 1/8] s3:vfs_smb_traffic_analyzer: remove samba_ prefix from
 AES_* function calls

This should be an implementation detail in lib/crypto/aes.h.

In future we may add support for other implementations.

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_smb_traffic_analyzer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source3/modules/vfs_smb_traffic_analyzer.c b/source3/modules/vfs_smb_traffic_analyzer.c
index 73ebf63..0208cde 100644
--- a/source3/modules/vfs_smb_traffic_analyzer.c
+++ b/source3/modules/vfs_smb_traffic_analyzer.c
@@ -175,7 +175,7 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
 	unsigned char filler[17]= "................";
 	char *output;
 	if (akey == NULL) return NULL;
-	samba_AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
+	AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
 	s1 = strlen(str) / 16;
 	s2 = strlen(str) % 16;
 	memcpy(filler, str + (s1*16), s2);
@@ -185,10 +185,10 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
 	*len = ((s1 + 1)*16);
 	output = talloc_array(ctx, char, *len);
 	for (h = 0; h < s1; h++) {
-		samba_AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
+		AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
 			&key);
 	}
-	samba_AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
+	AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
 	*len = (s1*16)+16;
 	return output;
 }
-- 
1.9.1


From f2dc577f645f06793d2530ee20d5e4f7493aa7e9 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Tue, 11 Aug 2015 16:31:25 +0200
Subject: [PATCH 2/8] lib/crypto: add aes_cmac_128 chunked tests

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_cmac_128_test.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/lib/crypto/aes_cmac_128_test.c b/lib/crypto/aes_cmac_128_test.c
index 173087f..c099a0f 100644
--- a/lib/crypto/aes_cmac_128_test.c
+++ b/lib/crypto/aes_cmac_128_test.c
@@ -87,6 +87,30 @@ bool torture_local_crypto_aes_cmac_128(struct torture_context *torture)
 			ret = false;
 		}
 	}
+	for (i=0; testarray[i].cmac.length != 0; i++) {
+		struct aes_cmac_128_context ctx;
+		uint8_t cmac[AES_BLOCK_SIZE];
+		int e;
+		size_t j;
+
+		aes_cmac_128_init(&ctx, key.data);
+		for (j=0; j < testarray[i].data.length; j++) {
+			aes_cmac_128_update(&ctx,
+					    &testarray[i].data.data[j],
+					    1);
+		}
+		aes_cmac_128_final(&ctx, cmac);
+
+		e = memcmp(testarray[i].cmac.data, cmac, sizeof(cmac));
+		if (e != 0) {
+			printf("aes_cmac_128 chunked test[%u]: failed\n", i);
+			dump_data(0, key.data, key.length);
+			dump_data(0, testarray[i].data.data, testarray[i].data.length);
+			dump_data(0, testarray[i].cmac.data, testarray[i].cmac.length);
+			dump_data(0, cmac, sizeof(cmac));
+			ret = false;
+		}
+	}
 	talloc_free(tctx);
 	return ret;
 }
-- 
1.9.1


From b3936791bfbb222ae1f24dd7486246522f5cdaa5 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 00:59:58 +0200
Subject: [PATCH 3/8] lib/crypto: add optimized helper function
 aes_block_{xor,lshift,rshift}

These are typical operations on an AES_BLOCK used by different modes.

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.h | 559 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 559 insertions(+)

diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index a2b6c07..ff2c448 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -80,4 +80,563 @@ void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
 }
 #endif
 
+static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
+				 const uint8_t in2[AES_BLOCK_SIZE],
+				 uint8_t out[AES_BLOCK_SIZE])
+{
+	const uint64_t *i1 = (const uint64_t *)in1;
+	const uint64_t *i2 = (const uint64_t *)in2;
+	uint64_t *o = (uint64_t *)out;
+
+	o[0] = i1[0] ^ i2[0];
+	o[1] = i1[1] ^ i2[1];
+}
+
+static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
+				    uint8_t out[AES_BLOCK_SIZE])
+{
+	static const struct aes_block_lshift_entry {
+		uint8_t lshift;
+		uint8_t overflow;
+	} aes_block_lshift_table[UINT8_MAX+1] = {
+		[0x00] = { .lshift = 0x00, .overflow = 0x00 },
+		[0x01] = { .lshift = 0x02, .overflow = 0x00 },
+		[0x02] = { .lshift = 0x04, .overflow = 0x00 },
+		[0x03] = { .lshift = 0x06, .overflow = 0x00 },
+		[0x04] = { .lshift = 0x08, .overflow = 0x00 },
+		[0x05] = { .lshift = 0x0a, .overflow = 0x00 },
+		[0x06] = { .lshift = 0x0c, .overflow = 0x00 },
+		[0x07] = { .lshift = 0x0e, .overflow = 0x00 },
+		[0x08] = { .lshift = 0x10, .overflow = 0x00 },
+		[0x09] = { .lshift = 0x12, .overflow = 0x00 },
+		[0x0a] = { .lshift = 0x14, .overflow = 0x00 },
+		[0x0b] = { .lshift = 0x16, .overflow = 0x00 },
+		[0x0c] = { .lshift = 0x18, .overflow = 0x00 },
+		[0x0d] = { .lshift = 0x1a, .overflow = 0x00 },
+		[0x0e] = { .lshift = 0x1c, .overflow = 0x00 },
+		[0x0f] = { .lshift = 0x1e, .overflow = 0x00 },
+		[0x10] = { .lshift = 0x20, .overflow = 0x00 },
+		[0x11] = { .lshift = 0x22, .overflow = 0x00 },
+		[0x12] = { .lshift = 0x24, .overflow = 0x00 },
+		[0x13] = { .lshift = 0x26, .overflow = 0x00 },
+		[0x14] = { .lshift = 0x28, .overflow = 0x00 },
+		[0x15] = { .lshift = 0x2a, .overflow = 0x00 },
+		[0x16] = { .lshift = 0x2c, .overflow = 0x00 },
+		[0x17] = { .lshift = 0x2e, .overflow = 0x00 },
+		[0x18] = { .lshift = 0x30, .overflow = 0x00 },
+		[0x19] = { .lshift = 0x32, .overflow = 0x00 },
+		[0x1a] = { .lshift = 0x34, .overflow = 0x00 },
+		[0x1b] = { .lshift = 0x36, .overflow = 0x00 },
+		[0x1c] = { .lshift = 0x38, .overflow = 0x00 },
+		[0x1d] = { .lshift = 0x3a, .overflow = 0x00 },
+		[0x1e] = { .lshift = 0x3c, .overflow = 0x00 },
+		[0x1f] = { .lshift = 0x3e, .overflow = 0x00 },
+		[0x20] = { .lshift = 0x40, .overflow = 0x00 },
+		[0x21] = { .lshift = 0x42, .overflow = 0x00 },
+		[0x22] = { .lshift = 0x44, .overflow = 0x00 },
+		[0x23] = { .lshift = 0x46, .overflow = 0x00 },
+		[0x24] = { .lshift = 0x48, .overflow = 0x00 },
+		[0x25] = { .lshift = 0x4a, .overflow = 0x00 },
+		[0x26] = { .lshift = 0x4c, .overflow = 0x00 },
+		[0x27] = { .lshift = 0x4e, .overflow = 0x00 },
+		[0x28] = { .lshift = 0x50, .overflow = 0x00 },
+		[0x29] = { .lshift = 0x52, .overflow = 0x00 },
+		[0x2a] = { .lshift = 0x54, .overflow = 0x00 },
+		[0x2b] = { .lshift = 0x56, .overflow = 0x00 },
+		[0x2c] = { .lshift = 0x58, .overflow = 0x00 },
+		[0x2d] = { .lshift = 0x5a, .overflow = 0x00 },
+		[0x2e] = { .lshift = 0x5c, .overflow = 0x00 },
+		[0x2f] = { .lshift = 0x5e, .overflow = 0x00 },
+		[0x30] = { .lshift = 0x60, .overflow = 0x00 },
+		[0x31] = { .lshift = 0x62, .overflow = 0x00 },
+		[0x32] = { .lshift = 0x64, .overflow = 0x00 },
+		[0x33] = { .lshift = 0x66, .overflow = 0x00 },
+		[0x34] = { .lshift = 0x68, .overflow = 0x00 },
+		[0x35] = { .lshift = 0x6a, .overflow = 0x00 },
+		[0x36] = { .lshift = 0x6c, .overflow = 0x00 },
+		[0x37] = { .lshift = 0x6e, .overflow = 0x00 },
+		[0x38] = { .lshift = 0x70, .overflow = 0x00 },
+		[0x39] = { .lshift = 0x72, .overflow = 0x00 },
+		[0x3a] = { .lshift = 0x74, .overflow = 0x00 },
+		[0x3b] = { .lshift = 0x76, .overflow = 0x00 },
+		[0x3c] = { .lshift = 0x78, .overflow = 0x00 },
+		[0x3d] = { .lshift = 0x7a, .overflow = 0x00 },
+		[0x3e] = { .lshift = 0x7c, .overflow = 0x00 },
+		[0x3f] = { .lshift = 0x7e, .overflow = 0x00 },
+		[0x40] = { .lshift = 0x80, .overflow = 0x00 },
+		[0x41] = { .lshift = 0x82, .overflow = 0x00 },
+		[0x42] = { .lshift = 0x84, .overflow = 0x00 },
+		[0x43] = { .lshift = 0x86, .overflow = 0x00 },
+		[0x44] = { .lshift = 0x88, .overflow = 0x00 },
+		[0x45] = { .lshift = 0x8a, .overflow = 0x00 },
+		[0x46] = { .lshift = 0x8c, .overflow = 0x00 },
+		[0x47] = { .lshift = 0x8e, .overflow = 0x00 },
+		[0x48] = { .lshift = 0x90, .overflow = 0x00 },
+		[0x49] = { .lshift = 0x92, .overflow = 0x00 },
+		[0x4a] = { .lshift = 0x94, .overflow = 0x00 },
+		[0x4b] = { .lshift = 0x96, .overflow = 0x00 },
+		[0x4c] = { .lshift = 0x98, .overflow = 0x00 },
+		[0x4d] = { .lshift = 0x9a, .overflow = 0x00 },
+		[0x4e] = { .lshift = 0x9c, .overflow = 0x00 },
+		[0x4f] = { .lshift = 0x9e, .overflow = 0x00 },
+		[0x50] = { .lshift = 0xa0, .overflow = 0x00 },
+		[0x51] = { .lshift = 0xa2, .overflow = 0x00 },
+		[0x52] = { .lshift = 0xa4, .overflow = 0x00 },
+		[0x53] = { .lshift = 0xa6, .overflow = 0x00 },
+		[0x54] = { .lshift = 0xa8, .overflow = 0x00 },
+		[0x55] = { .lshift = 0xaa, .overflow = 0x00 },
+		[0x56] = { .lshift = 0xac, .overflow = 0x00 },
+		[0x57] = { .lshift = 0xae, .overflow = 0x00 },
+		[0x58] = { .lshift = 0xb0, .overflow = 0x00 },
+		[0x59] = { .lshift = 0xb2, .overflow = 0x00 },
+		[0x5a] = { .lshift = 0xb4, .overflow = 0x00 },
+		[0x5b] = { .lshift = 0xb6, .overflow = 0x00 },
+		[0x5c] = { .lshift = 0xb8, .overflow = 0x00 },
+		[0x5d] = { .lshift = 0xba, .overflow = 0x00 },
+		[0x5e] = { .lshift = 0xbc, .overflow = 0x00 },
+		[0x5f] = { .lshift = 0xbe, .overflow = 0x00 },
+		[0x60] = { .lshift = 0xc0, .overflow = 0x00 },
+		[0x61] = { .lshift = 0xc2, .overflow = 0x00 },
+		[0x62] = { .lshift = 0xc4, .overflow = 0x00 },
+		[0x63] = { .lshift = 0xc6, .overflow = 0x00 },
+		[0x64] = { .lshift = 0xc8, .overflow = 0x00 },
+		[0x65] = { .lshift = 0xca, .overflow = 0x00 },
+		[0x66] = { .lshift = 0xcc, .overflow = 0x00 },
+		[0x67] = { .lshift = 0xce, .overflow = 0x00 },
+		[0x68] = { .lshift = 0xd0, .overflow = 0x00 },
+		[0x69] = { .lshift = 0xd2, .overflow = 0x00 },
+		[0x6a] = { .lshift = 0xd4, .overflow = 0x00 },
+		[0x6b] = { .lshift = 0xd6, .overflow = 0x00 },
+		[0x6c] = { .lshift = 0xd8, .overflow = 0x00 },
+		[0x6d] = { .lshift = 0xda, .overflow = 0x00 },
+		[0x6e] = { .lshift = 0xdc, .overflow = 0x00 },
+		[0x6f] = { .lshift = 0xde, .overflow = 0x00 },
+		[0x70] = { .lshift = 0xe0, .overflow = 0x00 },
+		[0x71] = { .lshift = 0xe2, .overflow = 0x00 },
+		[0x72] = { .lshift = 0xe4, .overflow = 0x00 },
+		[0x73] = { .lshift = 0xe6, .overflow = 0x00 },
+		[0x74] = { .lshift = 0xe8, .overflow = 0x00 },
+		[0x75] = { .lshift = 0xea, .overflow = 0x00 },
+		[0x76] = { .lshift = 0xec, .overflow = 0x00 },
+		[0x77] = { .lshift = 0xee, .overflow = 0x00 },
+		[0x78] = { .lshift = 0xf0, .overflow = 0x00 },
+		[0x79] = { .lshift = 0xf2, .overflow = 0x00 },
+		[0x7a] = { .lshift = 0xf4, .overflow = 0x00 },
+		[0x7b] = { .lshift = 0xf6, .overflow = 0x00 },
+		[0x7c] = { .lshift = 0xf8, .overflow = 0x00 },
+		[0x7d] = { .lshift = 0xfa, .overflow = 0x00 },
+		[0x7e] = { .lshift = 0xfc, .overflow = 0x00 },
+		[0x7f] = { .lshift = 0xfe, .overflow = 0x00 },
+		[0x80] = { .lshift = 0x00, .overflow = 0x01 },
+		[0x81] = { .lshift = 0x02, .overflow = 0x01 },
+		[0x82] = { .lshift = 0x04, .overflow = 0x01 },
+		[0x83] = { .lshift = 0x06, .overflow = 0x01 },
+		[0x84] = { .lshift = 0x08, .overflow = 0x01 },
+		[0x85] = { .lshift = 0x0a, .overflow = 0x01 },
+		[0x86] = { .lshift = 0x0c, .overflow = 0x01 },
+		[0x87] = { .lshift = 0x0e, .overflow = 0x01 },
+		[0x88] = { .lshift = 0x10, .overflow = 0x01 },
+		[0x89] = { .lshift = 0x12, .overflow = 0x01 },
+		[0x8a] = { .lshift = 0x14, .overflow = 0x01 },
+		[0x8b] = { .lshift = 0x16, .overflow = 0x01 },
+		[0x8c] = { .lshift = 0x18, .overflow = 0x01 },
+		[0x8d] = { .lshift = 0x1a, .overflow = 0x01 },
+		[0x8e] = { .lshift = 0x1c, .overflow = 0x01 },
+		[0x8f] = { .lshift = 0x1e, .overflow = 0x01 },
+		[0x90] = { .lshift = 0x20, .overflow = 0x01 },
+		[0x91] = { .lshift = 0x22, .overflow = 0x01 },
+		[0x92] = { .lshift = 0x24, .overflow = 0x01 },
+		[0x93] = { .lshift = 0x26, .overflow = 0x01 },
+		[0x94] = { .lshift = 0x28, .overflow = 0x01 },
+		[0x95] = { .lshift = 0x2a, .overflow = 0x01 },
+		[0x96] = { .lshift = 0x2c, .overflow = 0x01 },
+		[0x97] = { .lshift = 0x2e, .overflow = 0x01 },
+		[0x98] = { .lshift = 0x30, .overflow = 0x01 },
+		[0x99] = { .lshift = 0x32, .overflow = 0x01 },
+		[0x9a] = { .lshift = 0x34, .overflow = 0x01 },
+		[0x9b] = { .lshift = 0x36, .overflow = 0x01 },
+		[0x9c] = { .lshift = 0x38, .overflow = 0x01 },
+		[0x9d] = { .lshift = 0x3a, .overflow = 0x01 },
+		[0x9e] = { .lshift = 0x3c, .overflow = 0x01 },
+		[0x9f] = { .lshift = 0x3e, .overflow = 0x01 },
+		[0xa0] = { .lshift = 0x40, .overflow = 0x01 },
+		[0xa1] = { .lshift = 0x42, .overflow = 0x01 },
+		[0xa2] = { .lshift = 0x44, .overflow = 0x01 },
+		[0xa3] = { .lshift = 0x46, .overflow = 0x01 },
+		[0xa4] = { .lshift = 0x48, .overflow = 0x01 },
+		[0xa5] = { .lshift = 0x4a, .overflow = 0x01 },
+		[0xa6] = { .lshift = 0x4c, .overflow = 0x01 },
+		[0xa7] = { .lshift = 0x4e, .overflow = 0x01 },
+		[0xa8] = { .lshift = 0x50, .overflow = 0x01 },
+		[0xa9] = { .lshift = 0x52, .overflow = 0x01 },
+		[0xaa] = { .lshift = 0x54, .overflow = 0x01 },
+		[0xab] = { .lshift = 0x56, .overflow = 0x01 },
+		[0xac] = { .lshift = 0x58, .overflow = 0x01 },
+		[0xad] = { .lshift = 0x5a, .overflow = 0x01 },
+		[0xae] = { .lshift = 0x5c, .overflow = 0x01 },
+		[0xaf] = { .lshift = 0x5e, .overflow = 0x01 },
+		[0xb0] = { .lshift = 0x60, .overflow = 0x01 },
+		[0xb1] = { .lshift = 0x62, .overflow = 0x01 },
+		[0xb2] = { .lshift = 0x64, .overflow = 0x01 },
+		[0xb3] = { .lshift = 0x66, .overflow = 0x01 },
+		[0xb4] = { .lshift = 0x68, .overflow = 0x01 },
+		[0xb5] = { .lshift = 0x6a, .overflow = 0x01 },
+		[0xb6] = { .lshift = 0x6c, .overflow = 0x01 },
+		[0xb7] = { .lshift = 0x6e, .overflow = 0x01 },
+		[0xb8] = { .lshift = 0x70, .overflow = 0x01 },
+		[0xb9] = { .lshift = 0x72, .overflow = 0x01 },
+		[0xba] = { .lshift = 0x74, .overflow = 0x01 },
+		[0xbb] = { .lshift = 0x76, .overflow = 0x01 },
+		[0xbc] = { .lshift = 0x78, .overflow = 0x01 },
+		[0xbd] = { .lshift = 0x7a, .overflow = 0x01 },
+		[0xbe] = { .lshift = 0x7c, .overflow = 0x01 },
+		[0xbf] = { .lshift = 0x7e, .overflow = 0x01 },
+		[0xc0] = { .lshift = 0x80, .overflow = 0x01 },
+		[0xc1] = { .lshift = 0x82, .overflow = 0x01 },
+		[0xc2] = { .lshift = 0x84, .overflow = 0x01 },
+		[0xc3] = { .lshift = 0x86, .overflow = 0x01 },
+		[0xc4] = { .lshift = 0x88, .overflow = 0x01 },
+		[0xc5] = { .lshift = 0x8a, .overflow = 0x01 },
+		[0xc6] = { .lshift = 0x8c, .overflow = 0x01 },
+		[0xc7] = { .lshift = 0x8e, .overflow = 0x01 },
+		[0xc8] = { .lshift = 0x90, .overflow = 0x01 },
+		[0xc9] = { .lshift = 0x92, .overflow = 0x01 },
+		[0xca] = { .lshift = 0x94, .overflow = 0x01 },
+		[0xcb] = { .lshift = 0x96, .overflow = 0x01 },
+		[0xcc] = { .lshift = 0x98, .overflow = 0x01 },
+		[0xcd] = { .lshift = 0x9a, .overflow = 0x01 },
+		[0xce] = { .lshift = 0x9c, .overflow = 0x01 },
+		[0xcf] = { .lshift = 0x9e, .overflow = 0x01 },
+		[0xd0] = { .lshift = 0xa0, .overflow = 0x01 },
+		[0xd1] = { .lshift = 0xa2, .overflow = 0x01 },
+		[0xd2] = { .lshift = 0xa4, .overflow = 0x01 },
+		[0xd3] = { .lshift = 0xa6, .overflow = 0x01 },
+		[0xd4] = { .lshift = 0xa8, .overflow = 0x01 },
+		[0xd5] = { .lshift = 0xaa, .overflow = 0x01 },
+		[0xd6] = { .lshift = 0xac, .overflow = 0x01 },
+		[0xd7] = { .lshift = 0xae, .overflow = 0x01 },
+		[0xd8] = { .lshift = 0xb0, .overflow = 0x01 },
+		[0xd9] = { .lshift = 0xb2, .overflow = 0x01 },
+		[0xda] = { .lshift = 0xb4, .overflow = 0x01 },
+		[0xdb] = { .lshift = 0xb6, .overflow = 0x01 },
+		[0xdc] = { .lshift = 0xb8, .overflow = 0x01 },
+		[0xdd] = { .lshift = 0xba, .overflow = 0x01 },
+		[0xde] = { .lshift = 0xbc, .overflow = 0x01 },
+		[0xdf] = { .lshift = 0xbe, .overflow = 0x01 },
+		[0xe0] = { .lshift = 0xc0, .overflow = 0x01 },
+		[0xe1] = { .lshift = 0xc2, .overflow = 0x01 },
+		[0xe2] = { .lshift = 0xc4, .overflow = 0x01 },
+		[0xe3] = { .lshift = 0xc6, .overflow = 0x01 },
+		[0xe4] = { .lshift = 0xc8, .overflow = 0x01 },
+		[0xe5] = { .lshift = 0xca, .overflow = 0x01 },
+		[0xe6] = { .lshift = 0xcc, .overflow = 0x01 },
+		[0xe7] = { .lshift = 0xce, .overflow = 0x01 },
+		[0xe8] = { .lshift = 0xd0, .overflow = 0x01 },
+		[0xe9] = { .lshift = 0xd2, .overflow = 0x01 },
+		[0xea] = { .lshift = 0xd4, .overflow = 0x01 },
+		[0xeb] = { .lshift = 0xd6, .overflow = 0x01 },
+		[0xec] = { .lshift = 0xd8, .overflow = 0x01 },
+		[0xed] = { .lshift = 0xda, .overflow = 0x01 },
+		[0xee] = { .lshift = 0xdc, .overflow = 0x01 },
+		[0xef] = { .lshift = 0xde, .overflow = 0x01 },
+		[0xf0] = { .lshift = 0xe0, .overflow = 0x01 },
+		[0xf1] = { .lshift = 0xe2, .overflow = 0x01 },
+		[0xf2] = { .lshift = 0xe4, .overflow = 0x01 },
+		[0xf3] = { .lshift = 0xe6, .overflow = 0x01 },
+		[0xf4] = { .lshift = 0xe8, .overflow = 0x01 },
+		[0xf5] = { .lshift = 0xea, .overflow = 0x01 },
+		[0xf6] = { .lshift = 0xec, .overflow = 0x01 },
+		[0xf7] = { .lshift = 0xee, .overflow = 0x01 },
+		[0xf8] = { .lshift = 0xf0, .overflow = 0x01 },
+		[0xf9] = { .lshift = 0xf2, .overflow = 0x01 },
+		[0xfa] = { .lshift = 0xf4, .overflow = 0x01 },
+		[0xfb] = { .lshift = 0xf6, .overflow = 0x01 },
+		[0xfc] = { .lshift = 0xf8, .overflow = 0x01 },
+		[0xfd] = { .lshift = 0xfa, .overflow = 0x01 },
+		[0xfe] = { .lshift = 0xfc, .overflow = 0x01 },
+		[0xff] = { .lshift = 0xfe, .overflow = 0x01 },
+	};
+	int8_t i;
+	uint8_t overflow = 0;
+
+	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
+		const struct aes_block_lshift_entry *e = &aes_block_lshift_table[in[i]];
+		out[i] = e->lshift | overflow;
+		overflow = e->overflow;
+	}
+}
+
+static inline void aes_block_rshift(const uint8_t in[AES_BLOCK_SIZE],
+				    uint8_t out[AES_BLOCK_SIZE])
+{
+	static const struct aes_block_rshift_entry {
+		uint8_t rshift;
+		uint8_t overflow;
+	} aes_block_rshift_table[UINT8_MAX+1] = {
+		[0x00] = { .rshift = 0x00, .overflow = 0x00 },
+		[0x01] = { .rshift = 0x00, .overflow = 0x80 },
+		[0x02] = { .rshift = 0x01, .overflow = 0x00 },
+		[0x03] = { .rshift = 0x01, .overflow = 0x80 },
+		[0x04] = { .rshift = 0x02, .overflow = 0x00 },
+		[0x05] = { .rshift = 0x02, .overflow = 0x80 },
+		[0x06] = { .rshift = 0x03, .overflow = 0x00 },
+		[0x07] = { .rshift = 0x03, .overflow = 0x80 },
+		[0x08] = { .rshift = 0x04, .overflow = 0x00 },
+		[0x09] = { .rshift = 0x04, .overflow = 0x80 },
+		[0x0a] = { .rshift = 0x05, .overflow = 0x00 },
+		[0x0b] = { .rshift = 0x05, .overflow = 0x80 },
+		[0x0c] = { .rshift = 0x06, .overflow = 0x00 },
+		[0x0d] = { .rshift = 0x06, .overflow = 0x80 },
+		[0x0e] = { .rshift = 0x07, .overflow = 0x00 },
+		[0x0f] = { .rshift = 0x07, .overflow = 0x80 },
+		[0x10] = { .rshift = 0x08, .overflow = 0x00 },
+		[0x11] = { .rshift = 0x08, .overflow = 0x80 },
+		[0x12] = { .rshift = 0x09, .overflow = 0x00 },
+		[0x13] = { .rshift = 0x09, .overflow = 0x80 },
+		[0x14] = { .rshift = 0x0a, .overflow = 0x00 },
+		[0x15] = { .rshift = 0x0a, .overflow = 0x80 },
+		[0x16] = { .rshift = 0x0b, .overflow = 0x00 },
+		[0x17] = { .rshift = 0x0b, .overflow = 0x80 },
+		[0x18] = { .rshift = 0x0c, .overflow = 0x00 },
+		[0x19] = { .rshift = 0x0c, .overflow = 0x80 },
+		[0x1a] = { .rshift = 0x0d, .overflow = 0x00 },
+		[0x1b] = { .rshift = 0x0d, .overflow = 0x80 },
+		[0x1c] = { .rshift = 0x0e, .overflow = 0x00 },
+		[0x1d] = { .rshift = 0x0e, .overflow = 0x80 },
+		[0x1e] = { .rshift = 0x0f, .overflow = 0x00 },
+		[0x1f] = { .rshift = 0x0f, .overflow = 0x80 },
+		[0x20] = { .rshift = 0x10, .overflow = 0x00 },
+		[0x21] = { .rshift = 0x10, .overflow = 0x80 },
+		[0x22] = { .rshift = 0x11, .overflow = 0x00 },
+		[0x23] = { .rshift = 0x11, .overflow = 0x80 },
+		[0x24] = { .rshift = 0x12, .overflow = 0x00 },
+		[0x25] = { .rshift = 0x12, .overflow = 0x80 },
+		[0x26] = { .rshift = 0x13, .overflow = 0x00 },
+		[0x27] = { .rshift = 0x13, .overflow = 0x80 },
+		[0x28] = { .rshift = 0x14, .overflow = 0x00 },
+		[0x29] = { .rshift = 0x14, .overflow = 0x80 },
+		[0x2a] = { .rshift = 0x15, .overflow = 0x00 },
+		[0x2b] = { .rshift = 0x15, .overflow = 0x80 },
+		[0x2c] = { .rshift = 0x16, .overflow = 0x00 },
+		[0x2d] = { .rshift = 0x16, .overflow = 0x80 },
+		[0x2e] = { .rshift = 0x17, .overflow = 0x00 },
+		[0x2f] = { .rshift = 0x17, .overflow = 0x80 },
+		[0x30] = { .rshift = 0x18, .overflow = 0x00 },
+		[0x31] = { .rshift = 0x18, .overflow = 0x80 },
+		[0x32] = { .rshift = 0x19, .overflow = 0x00 },
+		[0x33] = { .rshift = 0x19, .overflow = 0x80 },
+		[0x34] = { .rshift = 0x1a, .overflow = 0x00 },
+		[0x35] = { .rshift = 0x1a, .overflow = 0x80 },
+		[0x36] = { .rshift = 0x1b, .overflow = 0x00 },
+		[0x37] = { .rshift = 0x1b, .overflow = 0x80 },
+		[0x38] = { .rshift = 0x1c, .overflow = 0x00 },
+		[0x39] = { .rshift = 0x1c, .overflow = 0x80 },
+		[0x3a] = { .rshift = 0x1d, .overflow = 0x00 },
+		[0x3b] = { .rshift = 0x1d, .overflow = 0x80 },
+		[0x3c] = { .rshift = 0x1e, .overflow = 0x00 },
+		[0x3d] = { .rshift = 0x1e, .overflow = 0x80 },
+		[0x3e] = { .rshift = 0x1f, .overflow = 0x00 },
+		[0x3f] = { .rshift = 0x1f, .overflow = 0x80 },
+		[0x40] = { .rshift = 0x20, .overflow = 0x00 },
+		[0x41] = { .rshift = 0x20, .overflow = 0x80 },
+		[0x42] = { .rshift = 0x21, .overflow = 0x00 },
+		[0x43] = { .rshift = 0x21, .overflow = 0x80 },
+		[0x44] = { .rshift = 0x22, .overflow = 0x00 },
+		[0x45] = { .rshift = 0x22, .overflow = 0x80 },
+		[0x46] = { .rshift = 0x23, .overflow = 0x00 },
+		[0x47] = { .rshift = 0x23, .overflow = 0x80 },
+		[0x48] = { .rshift = 0x24, .overflow = 0x00 },
+		[0x49] = { .rshift = 0x24, .overflow = 0x80 },
+		[0x4a] = { .rshift = 0x25, .overflow = 0x00 },
+		[0x4b] = { .rshift = 0x25, .overflow = 0x80 },
+		[0x4c] = { .rshift = 0x26, .overflow = 0x00 },
+		[0x4d] = { .rshift = 0x26, .overflow = 0x80 },
+		[0x4e] = { .rshift = 0x27, .overflow = 0x00 },
+		[0x4f] = { .rshift = 0x27, .overflow = 0x80 },
+		[0x50] = { .rshift = 0x28, .overflow = 0x00 },
+		[0x51] = { .rshift = 0x28, .overflow = 0x80 },
+		[0x52] = { .rshift = 0x29, .overflow = 0x00 },
+		[0x53] = { .rshift = 0x29, .overflow = 0x80 },
+		[0x54] = { .rshift = 0x2a, .overflow = 0x00 },
+		[0x55] = { .rshift = 0x2a, .overflow = 0x80 },
+		[0x56] = { .rshift = 0x2b, .overflow = 0x00 },
+		[0x57] = { .rshift = 0x2b, .overflow = 0x80 },
+		[0x58] = { .rshift = 0x2c, .overflow = 0x00 },
+		[0x59] = { .rshift = 0x2c, .overflow = 0x80 },
+		[0x5a] = { .rshift = 0x2d, .overflow = 0x00 },
+		[0x5b] = { .rshift = 0x2d, .overflow = 0x80 },
+		[0x5c] = { .rshift = 0x2e, .overflow = 0x00 },
+		[0x5d] = { .rshift = 0x2e, .overflow = 0x80 },
+		[0x5e] = { .rshift = 0x2f, .overflow = 0x00 },
+		[0x5f] = { .rshift = 0x2f, .overflow = 0x80 },
+		[0x60] = { .rshift = 0x30, .overflow = 0x00 },
+		[0x61] = { .rshift = 0x30, .overflow = 0x80 },
+		[0x62] = { .rshift = 0x31, .overflow = 0x00 },
+		[0x63] = { .rshift = 0x31, .overflow = 0x80 },
+		[0x64] = { .rshift = 0x32, .overflow = 0x00 },
+		[0x65] = { .rshift = 0x32, .overflow = 0x80 },
+		[0x66] = { .rshift = 0x33, .overflow = 0x00 },
+		[0x67] = { .rshift = 0x33, .overflow = 0x80 },
+		[0x68] = { .rshift = 0x34, .overflow = 0x00 },
+		[0x69] = { .rshift = 0x34, .overflow = 0x80 },
+		[0x6a] = { .rshift = 0x35, .overflow = 0x00 },
+		[0x6b] = { .rshift = 0x35, .overflow = 0x80 },
+		[0x6c] = { .rshift = 0x36, .overflow = 0x00 },
+		[0x6d] = { .rshift = 0x36, .overflow = 0x80 },
+		[0x6e] = { .rshift = 0x37, .overflow = 0x00 },
+		[0x6f] = { .rshift = 0x37, .overflow = 0x80 },
+		[0x70] = { .rshift = 0x38, .overflow = 0x00 },
+		[0x71] = { .rshift = 0x38, .overflow = 0x80 },
+		[0x72] = { .rshift = 0x39, .overflow = 0x00 },
+		[0x73] = { .rshift = 0x39, .overflow = 0x80 },
+		[0x74] = { .rshift = 0x3a, .overflow = 0x00 },
+		[0x75] = { .rshift = 0x3a, .overflow = 0x80 },
+		[0x76] = { .rshift = 0x3b, .overflow = 0x00 },
+		[0x77] = { .rshift = 0x3b, .overflow = 0x80 },
+		[0x78] = { .rshift = 0x3c, .overflow = 0x00 },
+		[0x79] = { .rshift = 0x3c, .overflow = 0x80 },
+		[0x7a] = { .rshift = 0x3d, .overflow = 0x00 },
+		[0x7b] = { .rshift = 0x3d, .overflow = 0x80 },
+		[0x7c] = { .rshift = 0x3e, .overflow = 0x00 },
+		[0x7d] = { .rshift = 0x3e, .overflow = 0x80 },
+		[0x7e] = { .rshift = 0x3f, .overflow = 0x00 },
+		[0x7f] = { .rshift = 0x3f, .overflow = 0x80 },
+		[0x80] = { .rshift = 0x40, .overflow = 0x00 },
+		[0x81] = { .rshift = 0x40, .overflow = 0x80 },
+		[0x82] = { .rshift = 0x41, .overflow = 0x00 },
+		[0x83] = { .rshift = 0x41, .overflow = 0x80 },
+		[0x84] = { .rshift = 0x42, .overflow = 0x00 },
+		[0x85] = { .rshift = 0x42, .overflow = 0x80 },
+		[0x86] = { .rshift = 0x43, .overflow = 0x00 },
+		[0x87] = { .rshift = 0x43, .overflow = 0x80 },
+		[0x88] = { .rshift = 0x44, .overflow = 0x00 },
+		[0x89] = { .rshift = 0x44, .overflow = 0x80 },
+		[0x8a] = { .rshift = 0x45, .overflow = 0x00 },
+		[0x8b] = { .rshift = 0x45, .overflow = 0x80 },
+		[0x8c] = { .rshift = 0x46, .overflow = 0x00 },
+		[0x8d] = { .rshift = 0x46, .overflow = 0x80 },
+		[0x8e] = { .rshift = 0x47, .overflow = 0x00 },
+		[0x8f] = { .rshift = 0x47, .overflow = 0x80 },
+		[0x90] = { .rshift = 0x48, .overflow = 0x00 },
+		[0x91] = { .rshift = 0x48, .overflow = 0x80 },
+		[0x92] = { .rshift = 0x49, .overflow = 0x00 },
+		[0x93] = { .rshift = 0x49, .overflow = 0x80 },
+		[0x94] = { .rshift = 0x4a, .overflow = 0x00 },
+		[0x95] = { .rshift = 0x4a, .overflow = 0x80 },
+		[0x96] = { .rshift = 0x4b, .overflow = 0x00 },
+		[0x97] = { .rshift = 0x4b, .overflow = 0x80 },
+		[0x98] = { .rshift = 0x4c, .overflow = 0x00 },
+		[0x99] = { .rshift = 0x4c, .overflow = 0x80 },
+		[0x9a] = { .rshift = 0x4d, .overflow = 0x00 },
+		[0x9b] = { .rshift = 0x4d, .overflow = 0x80 },
+		[0x9c] = { .rshift = 0x4e, .overflow = 0x00 },
+		[0x9d] = { .rshift = 0x4e, .overflow = 0x80 },
+		[0x9e] = { .rshift = 0x4f, .overflow = 0x00 },
+		[0x9f] = { .rshift = 0x4f, .overflow = 0x80 },
+		[0xa0] = { .rshift = 0x50, .overflow = 0x00 },
+		[0xa1] = { .rshift = 0x50, .overflow = 0x80 },
+		[0xa2] = { .rshift = 0x51, .overflow = 0x00 },
+		[0xa3] = { .rshift = 0x51, .overflow = 0x80 },
+		[0xa4] = { .rshift = 0x52, .overflow = 0x00 },
+		[0xa5] = { .rshift = 0x52, .overflow = 0x80 },
+		[0xa6] = { .rshift = 0x53, .overflow = 0x00 },
+		[0xa7] = { .rshift = 0x53, .overflow = 0x80 },
+		[0xa8] = { .rshift = 0x54, .overflow = 0x00 },
+		[0xa9] = { .rshift = 0x54, .overflow = 0x80 },
+		[0xaa] = { .rshift = 0x55, .overflow = 0x00 },
+		[0xab] = { .rshift = 0x55, .overflow = 0x80 },
+		[0xac] = { .rshift = 0x56, .overflow = 0x00 },
+		[0xad] = { .rshift = 0x56, .overflow = 0x80 },
+		[0xae] = { .rshift = 0x57, .overflow = 0x00 },
+		[0xaf] = { .rshift = 0x57, .overflow = 0x80 },
+		[0xb0] = { .rshift = 0x58, .overflow = 0x00 },
+		[0xb1] = { .rshift = 0x58, .overflow = 0x80 },
+		[0xb2] = { .rshift = 0x59, .overflow = 0x00 },
+		[0xb3] = { .rshift = 0x59, .overflow = 0x80 },
+		[0xb4] = { .rshift = 0x5a, .overflow = 0x00 },
+		[0xb5] = { .rshift = 0x5a, .overflow = 0x80 },
+		[0xb6] = { .rshift = 0x5b, .overflow = 0x00 },
+		[0xb7] = { .rshift = 0x5b, .overflow = 0x80 },
+		[0xb8] = { .rshift = 0x5c, .overflow = 0x00 },
+		[0xb9] = { .rshift = 0x5c, .overflow = 0x80 },
+		[0xba] = { .rshift = 0x5d, .overflow = 0x00 },
+		[0xbb] = { .rshift = 0x5d, .overflow = 0x80 },
+		[0xbc] = { .rshift = 0x5e, .overflow = 0x00 },
+		[0xbd] = { .rshift = 0x5e, .overflow = 0x80 },
+		[0xbe] = { .rshift = 0x5f, .overflow = 0x00 },
+		[0xbf] = { .rshift = 0x5f, .overflow = 0x80 },
+		[0xc0] = { .rshift = 0x60, .overflow = 0x00 },
+		[0xc1] = { .rshift = 0x60, .overflow = 0x80 },
+		[0xc2] = { .rshift = 0x61, .overflow = 0x00 },
+		[0xc3] = { .rshift = 0x61, .overflow = 0x80 },
+		[0xc4] = { .rshift = 0x62, .overflow = 0x00 },
+		[0xc5] = { .rshift = 0x62, .overflow = 0x80 },
+		[0xc6] = { .rshift = 0x63, .overflow = 0x00 },
+		[0xc7] = { .rshift = 0x63, .overflow = 0x80 },
+		[0xc8] = { .rshift = 0x64, .overflow = 0x00 },
+		[0xc9] = { .rshift = 0x64, .overflow = 0x80 },
+		[0xca] = { .rshift = 0x65, .overflow = 0x00 },
+		[0xcb] = { .rshift = 0x65, .overflow = 0x80 },
+		[0xcc] = { .rshift = 0x66, .overflow = 0x00 },
+		[0xcd] = { .rshift = 0x66, .overflow = 0x80 },
+		[0xce] = { .rshift = 0x67, .overflow = 0x00 },
+		[0xcf] = { .rshift = 0x67, .overflow = 0x80 },
+		[0xd0] = { .rshift = 0x68, .overflow = 0x00 },
+		[0xd1] = { .rshift = 0x68, .overflow = 0x80 },
+		[0xd2] = { .rshift = 0x69, .overflow = 0x00 },
+		[0xd3] = { .rshift = 0x69, .overflow = 0x80 },
+		[0xd4] = { .rshift = 0x6a, .overflow = 0x00 },
+		[0xd5] = { .rshift = 0x6a, .overflow = 0x80 },
+		[0xd6] = { .rshift = 0x6b, .overflow = 0x00 },
+		[0xd7] = { .rshift = 0x6b, .overflow = 0x80 },
+		[0xd8] = { .rshift = 0x6c, .overflow = 0x00 },
+		[0xd9] = { .rshift = 0x6c, .overflow = 0x80 },
+		[0xda] = { .rshift = 0x6d, .overflow = 0x00 },
+		[0xdb] = { .rshift = 0x6d, .overflow = 0x80 },
+		[0xdc] = { .rshift = 0x6e, .overflow = 0x00 },
+		[0xdd] = { .rshift = 0x6e, .overflow = 0x80 },
+		[0xde] = { .rshift = 0x6f, .overflow = 0x00 },
+		[0xdf] = { .rshift = 0x6f, .overflow = 0x80 },
+		[0xe0] = { .rshift = 0x70, .overflow = 0x00 },
+		[0xe1] = { .rshift = 0x70, .overflow = 0x80 },
+		[0xe2] = { .rshift = 0x71, .overflow = 0x00 },
+		[0xe3] = { .rshift = 0x71, .overflow = 0x80 },
+		[0xe4] = { .rshift = 0x72, .overflow = 0x00 },
+		[0xe5] = { .rshift = 0x72, .overflow = 0x80 },
+		[0xe6] = { .rshift = 0x73, .overflow = 0x00 },
+		[0xe7] = { .rshift = 0x73, .overflow = 0x80 },
+		[0xe8] = { .rshift = 0x74, .overflow = 0x00 },
+		[0xe9] = { .rshift = 0x74, .overflow = 0x80 },
+		[0xea] = { .rshift = 0x75, .overflow = 0x00 },
+		[0xeb] = { .rshift = 0x75, .overflow = 0x80 },
+		[0xec] = { .rshift = 0x76, .overflow = 0x00 },
+		[0xed] = { .rshift = 0x76, .overflow = 0x80 },
+		[0xee] = { .rshift = 0x77, .overflow = 0x00 },
+		[0xef] = { .rshift = 0x77, .overflow = 0x80 },
+		[0xf0] = { .rshift = 0x78, .overflow = 0x00 },
+		[0xf1] = { .rshift = 0x78, .overflow = 0x80 },
+		[0xf2] = { .rshift = 0x79, .overflow = 0x00 },
+		[0xf3] = { .rshift = 0x79, .overflow = 0x80 },
+		[0xf4] = { .rshift = 0x7a, .overflow = 0x00 },
+		[0xf5] = { .rshift = 0x7a, .overflow = 0x80 },
+		[0xf6] = { .rshift = 0x7b, .overflow = 0x00 },
+		[0xf7] = { .rshift = 0x7b, .overflow = 0x80 },
+		[0xf8] = { .rshift = 0x7c, .overflow = 0x00 },
+		[0xf9] = { .rshift = 0x7c, .overflow = 0x80 },
+		[0xfa] = { .rshift = 0x7d, .overflow = 0x00 },
+		[0xfb] = { .rshift = 0x7d, .overflow = 0x80 },
+		[0xfc] = { .rshift = 0x7e, .overflow = 0x00 },
+		[0xfd] = { .rshift = 0x7e, .overflow = 0x80 },
+		[0xfe] = { .rshift = 0x7f, .overflow = 0x00 },
+		[0xff] = { .rshift = 0x7f, .overflow = 0x80 },
+	};
+	uint8_t i;
+	uint8_t overflow = 0;
+
+	for (i = 0; i < AES_BLOCK_SIZE; i++) {
+		const struct aes_block_rshift_entry *e = &aes_block_rshift_table[in[i]];
+		out[i] = e->rshift | overflow;
+		overflow = e->overflow;
+	}
+}
 #endif /* LIB_CRYPTO_AES_H */
-- 
1.9.1


From 9a27cb2b7292ecdfe6e6d15b9d5091407ba4b744 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 4/8] lib/crypto: optimize aes_cmac_128

- We avoid variables in order to do a lazy cleanup
  in aes_cmac_128_final() via ZERO_STRUCTP(ctx)
- We avoid unused memcpy() calls
- We use the optimized aes_block_{xor,lshift}() functions

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_cmac_128.c | 104 +++++++++-------------------------------------
 lib/crypto/aes_cmac_128.h |   4 ++
 2 files changed, 23 insertions(+), 85 deletions(-)

diff --git a/lib/crypto/aes_cmac_128.c b/lib/crypto/aes_cmac_128.c
index b630eea..5d71e82 100644
--- a/lib/crypto/aes_cmac_128.c
+++ b/lib/crypto/aes_cmac_128.c
@@ -33,92 +33,42 @@ static const uint8_t const_Rb[] = {
 
 #define _MSB(x) (((x)[0] & 0x80)?1:0)
 
-static inline void aes_cmac_128_left_shift_1(const uint8_t in[AES_BLOCK_SIZE],
-					     uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t overflow = 0;
-	int8_t i;
-
-	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
-		out[i] = in[i] << 1;
-
-		out[i] |= overflow;
-
-		overflow = _MSB(&in[i]);
-	}
-}
-
-static inline void aes_cmac_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				    const uint8_t in2[AES_BLOCK_SIZE],
-				    uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
 void aes_cmac_128_init(struct aes_cmac_128_context *ctx,
 		       const uint8_t K[AES_BLOCK_SIZE])
 {
-	uint8_t L[AES_BLOCK_SIZE];
-
 	ZERO_STRUCTP(ctx);
 
 	AES_set_encrypt_key(K, 128, &ctx->aes_key);
 
 	/* step 1 - generate subkeys k1 and k2 */
 
-	AES_encrypt(const_Zero, L, &ctx->aes_key);
+	AES_encrypt(const_Zero, ctx->L, &ctx->aes_key);
 
-	if (_MSB(L) == 0) {
-		aes_cmac_128_left_shift_1(L, ctx->K1);
+	if (_MSB(ctx->L) == 0) {
+		aes_block_lshift(ctx->L, ctx->K1);
 	} else {
-		uint8_t tmp_block[AES_BLOCK_SIZE];
-
-		aes_cmac_128_left_shift_1(L, tmp_block);
-		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K1);
-		ZERO_STRUCT(tmp_block);
+		aes_block_lshift(ctx->L, ctx->tmp);
+		aes_block_xor(ctx->tmp, const_Rb, ctx->K1);
 	}
 
 	if (_MSB(ctx->K1) == 0) {
-		aes_cmac_128_left_shift_1(ctx->K1, ctx->K2);
+		aes_block_lshift(ctx->K1, ctx->K2);
 	} else {
-		uint8_t tmp_block[AES_BLOCK_SIZE];
-
-		aes_cmac_128_left_shift_1(ctx->K1, tmp_block);
-		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K2);
-		ZERO_STRUCT(tmp_block);
+		aes_block_lshift(ctx->K1, ctx->tmp);
+		aes_block_xor(ctx->tmp, const_Rb, ctx->K2);
 	}
-
-	ZERO_STRUCT(L);
 }
 
 void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
-			 const uint8_t *_msg, size_t _msg_len)
+			 const uint8_t *msg, size_t msg_len)
 {
-	uint8_t tmp_block[AES_BLOCK_SIZE];
-	uint8_t Y[AES_BLOCK_SIZE];
-	const uint8_t *msg = _msg;
-	size_t msg_len = _msg_len;
-
-	/*
-	 * copy the remembered last block
-	 */
-	ZERO_STRUCT(tmp_block);
-	if (ctx->last_len) {
-		memcpy(tmp_block, ctx->last, ctx->last_len);
-	}
-
 	/*
 	 * check if we expand the block
 	 */
 	if (ctx->last_len < AES_BLOCK_SIZE) {
 		size_t len = MIN(AES_BLOCK_SIZE - ctx->last_len, msg_len);
 
-		memcpy(&tmp_block[ctx->last_len], msg, len);
-		memcpy(ctx->last, tmp_block, AES_BLOCK_SIZE);
+		memcpy(&ctx->last[ctx->last_len], msg, len);
 		msg += len;
 		msg_len -= len;
 		ctx->last_len += len;
@@ -126,59 +76,43 @@ void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
 
 	if (msg_len == 0) {
 		/* if it is still the last block, we are done */
-		ZERO_STRUCT(tmp_block);
 		return;
 	}
 
 	/*
-	 * It is not the last block anymore
-	 */
-	ZERO_STRUCT(ctx->last);
-	ctx->last_len = 0;
-
-	/*
 	 * now checksum everything but the last block
 	 */
-	aes_cmac_128_xor(ctx->X, tmp_block, Y);
-	AES_encrypt(Y, ctx->X, &ctx->aes_key);
+	aes_block_xor(ctx->X, ctx->last, ctx->Y);
+	AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
 
 	while (msg_len > AES_BLOCK_SIZE) {
-		memcpy(tmp_block, msg, AES_BLOCK_SIZE);
+		aes_block_xor(ctx->X, msg, ctx->Y);
+		AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
 		msg += AES_BLOCK_SIZE;
 		msg_len -= AES_BLOCK_SIZE;
-
-		aes_cmac_128_xor(ctx->X, tmp_block, Y);
-		AES_encrypt(Y, ctx->X, &ctx->aes_key);
 	}
 
 	/*
 	 * copy the last block, it will be processed in
 	 * aes_cmac_128_final().
 	 */
+	ZERO_STRUCT(ctx->last);
 	memcpy(ctx->last, msg, msg_len);
 	ctx->last_len = msg_len;
-
-	ZERO_STRUCT(tmp_block);
-	ZERO_STRUCT(Y);
 }
 
 void aes_cmac_128_final(struct aes_cmac_128_context *ctx,
 			uint8_t T[AES_BLOCK_SIZE])
 {
-	uint8_t tmp_block[AES_BLOCK_SIZE];
-	uint8_t Y[AES_BLOCK_SIZE];
-
 	if (ctx->last_len < AES_BLOCK_SIZE) {
 		ctx->last[ctx->last_len] = 0x80;
-		aes_cmac_128_xor(ctx->last, ctx->K2, tmp_block);
+		aes_block_xor(ctx->last, ctx->K2, ctx->tmp);
 	} else {
-		aes_cmac_128_xor(ctx->last, ctx->K1, tmp_block);
+		aes_block_xor(ctx->last, ctx->K1, ctx->tmp);
 	}
 
-	aes_cmac_128_xor(tmp_block, ctx->X, Y);
-	AES_encrypt(Y, T, &ctx->aes_key);
+	aes_block_xor(ctx->tmp, ctx->X, ctx->Y);
+	AES_encrypt(ctx->Y, T, &ctx->aes_key);
 
-	ZERO_STRUCT(tmp_block);
-	ZERO_STRUCT(Y);
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_cmac_128.h b/lib/crypto/aes_cmac_128.h
index 28117a0..871f5a7 100644
--- a/lib/crypto/aes_cmac_128.h
+++ b/lib/crypto/aes_cmac_128.h
@@ -25,7 +25,11 @@ struct aes_cmac_128_context {
 	uint8_t K1[AES_BLOCK_SIZE];
 	uint8_t K2[AES_BLOCK_SIZE];
 
+	uint8_t L[AES_BLOCK_SIZE];
 	uint8_t X[AES_BLOCK_SIZE];
+	uint8_t Y[AES_BLOCK_SIZE];
+
+	uint8_t tmp[AES_BLOCK_SIZE];
 
 	uint8_t last[AES_BLOCK_SIZE];
 	size_t last_len;
-- 
1.9.1


From 22e8ba7c543db8694bb4c295389a51f2fd2f6fce Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 5/8] lib/crypto: optimize aes_ccm_128

- We avoid variables in order to do a lazy cleanup
  in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
- We use the optimized aes_block_xor() function

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_ccm_128.c | 55 ++++++++++++++++++++----------------------------
 lib/crypto/aes_ccm_128.h |  2 ++
 2 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/lib/crypto/aes_ccm_128.c b/lib/crypto/aes_ccm_128.c
index 94b9803..8bbeee9 100644
--- a/lib/crypto/aes_ccm_128.c
+++ b/lib/crypto/aes_ccm_128.c
@@ -24,24 +24,11 @@
 #define M_ ((AES_CCM_128_M - 2) / 2)
 #define L_ (AES_CCM_128_L - 1)
 
-static inline void aes_ccm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				   const uint8_t in2[AES_BLOCK_SIZE],
-				   uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
 void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
 		      const uint8_t K[AES_BLOCK_SIZE],
 		      const uint8_t N[AES_CCM_128_NONCE_SIZE],
 		      size_t a_total, size_t m_total)
 {
-	uint8_t B_0[AES_BLOCK_SIZE];
-
 	ZERO_STRUCTP(ctx);
 
 	AES_set_encrypt_key(K, 128, &ctx->aes_key);
@@ -52,18 +39,18 @@ void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
 	/*
 	 * prepare B_0
 	 */
-	B_0[0]  = L_;
-	B_0[0] += 8 * M_;
+	ctx->B_i[0]  = L_;
+	ctx->B_i[0] += 8 * M_;
 	if (a_total > 0) {
-		B_0[0] += 64;
+		ctx->B_i[0] += 64;
 	}
-	memcpy(&B_0[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
-	RSIVAL(B_0, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
+	memcpy(&ctx->B_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
+	RSIVAL(ctx->B_i, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
 
 	/*
 	 * prepare X_1
 	 */
-	AES_encrypt(B_0, ctx->X_i, &ctx->aes_key);
+	AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
 
 	/*
 	 * prepare B_1
@@ -115,10 +102,9 @@ void aes_ccm_128_update(struct aes_ccm_128_context *ctx,
 			continue;
 		}
 
-		aes_ccm_128_xor(ctx->X_i, ctx->B_i, ctx->B_i);
+		aes_block_xor(ctx->X_i, ctx->B_i, ctx->B_i);
 		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
 
-		ZERO_STRUCT(ctx->B_i);
 		ctx->B_i_ofs = 0;
 	}
 }
@@ -127,13 +113,11 @@ static void aes_ccm_128_S_i(struct aes_ccm_128_context *ctx,
 			    uint8_t S_i[AES_BLOCK_SIZE],
 			    size_t i)
 {
-	uint8_t A_i[AES_BLOCK_SIZE];
-
-	A_i[0]  = L_;
-	memcpy(&A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
-	RSIVAL(A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
+	ctx->A_i[0]  = L_;
+	memcpy(&ctx->A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
+	RSIVAL(ctx->A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
 
-	AES_encrypt(A_i, S_i, &ctx->aes_key);
+	AES_encrypt(ctx->A_i, S_i, &ctx->aes_key);
 }
 
 void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
@@ -146,6 +130,15 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
 			ctx->S_i_ofs = 0;
 		}
 
+		if (likely(ctx->S_i_ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
+			aes_block_xor(m, ctx->S_i, m);
+			m += AES_BLOCK_SIZE;
+			m_len -= AES_BLOCK_SIZE;
+			ctx->S_i_ctr += 1;
+			aes_ccm_128_S_i(ctx, ctx->S_i, ctx->S_i_ctr);
+			continue;
+		}
+
 		m[0] ^= ctx->S_i[ctx->S_i_ofs];
 		m += 1;
 		m_len -= 1;
@@ -156,15 +149,13 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
 void aes_ccm_128_digest(struct aes_ccm_128_context *ctx,
 			uint8_t digest[AES_BLOCK_SIZE])
 {
-	uint8_t S_0[AES_BLOCK_SIZE];
-
-	aes_ccm_128_S_i(ctx, S_0, 0);
+	/* prepare S_0 */
+	aes_ccm_128_S_i(ctx, ctx->S_i, 0);
 
 	/*
 	 * note X_i is T here
 	 */
-	aes_ccm_128_xor(ctx->X_i, S_0, digest);
+	aes_block_xor(ctx->X_i, ctx->S_i, digest);
 
-	ZERO_STRUCT(S_0);
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_ccm_128.h b/lib/crypto/aes_ccm_128.h
index a98c754..4f5f903 100644
--- a/lib/crypto/aes_ccm_128.h
+++ b/lib/crypto/aes_ccm_128.h
@@ -35,6 +35,8 @@ struct aes_ccm_128_context {
 	uint8_t B_i[AES_BLOCK_SIZE];
 	size_t B_i_ofs;
 
+	uint8_t A_i[AES_BLOCK_SIZE];
+
 	uint8_t S_i[AES_BLOCK_SIZE];
 	size_t S_i_ofs;
 	size_t S_i_ctr;
-- 
1.9.1


From 75d1c778a8d181a756f9d5ff3c2c3c7c171144b0 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 6/8] lib/crypto: optimize aes_gcm_128

- We avoid variables in order to do a lazy cleanup
  in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
- We use the optimized aes_block_{xor,rshift}() functions

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_gcm_128.c | 46 ++++++++++++++++------------------------------
 lib/crypto/aes_gcm_128.h |  2 +-
 2 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/lib/crypto/aes_gcm_128.c b/lib/crypto/aes_gcm_128.c
index f59d659..bfbf176 100644
--- a/lib/crypto/aes_gcm_128.c
+++ b/lib/crypto/aes_gcm_128.c
@@ -30,35 +30,12 @@ static inline void aes_gcm_128_inc32(uint8_t inout[AES_BLOCK_SIZE])
 	RSIVAL(inout, AES_BLOCK_SIZE - 4, v);
 }
 
-static inline void aes_gcm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				   const uint8_t in2[AES_BLOCK_SIZE],
-				   uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
-static inline void aes_gcm_128_rightshift(uint8_t x[AES_BLOCK_SIZE])
-{
-	int8_t i;
-
-	for (i = AES_BLOCK_SIZE - 1; i >=0; i--) {
-		x[i] >>= 1;
-		if (i > 0) {
-			x[i] |= (x[i-1] & 1) << 7;
-		}
-	}
-}
-
 static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 				   const uint8_t y[AES_BLOCK_SIZE],
+				   uint8_t v[AES_BLOCK_SIZE],
 				   uint8_t z[AES_BLOCK_SIZE])
 {
 	uint8_t i;
-	uint8_t v[AES_BLOCK_SIZE];
 	/* 11100001 || 0^120 */
 	static const uint8_t r[AES_BLOCK_SIZE] = {
 		0xE1, 0x00, 0x00, 0x00,
@@ -75,12 +52,12 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 		for (mask = 0x80; mask != 0 ; mask >>= 1) {
 			uint8_t v_lsb = v[AES_BLOCK_SIZE-1] & 1;
 			if (x[i] & mask) {
-				aes_gcm_128_xor(z, v, z);
+				aes_block_xor(z, v, z);
 			}
 
-			aes_gcm_128_rightshift(v);
+			aes_block_rshift(v, v);
 			if (v_lsb != 0) {
-				aes_gcm_128_xor(v, r, v);
+				aes_block_xor(v, r, v);
 			}
 		}
 	}
@@ -89,8 +66,8 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 static inline void aes_gcm_128_ghash_block(struct aes_gcm_128_context *ctx,
 					   const uint8_t in[AES_BLOCK_SIZE])
 {
-	aes_gcm_128_xor(ctx->Y, in, ctx->y.block);
-	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->Y);
+	aes_block_xor(ctx->Y, in, ctx->y.block);
+	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->v.block, ctx->Y);
 }
 
 void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
@@ -184,6 +161,15 @@ static inline void aes_gcm_128_crypt_tmp(struct aes_gcm_128_context *ctx,
 			tmp->ofs = 0;
 		}
 
+		if (likely(tmp->ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
+			aes_block_xor(m, tmp->block, m);
+			m += AES_BLOCK_SIZE;
+			m_len -= AES_BLOCK_SIZE;
+			aes_gcm_128_inc32(ctx->CB);
+			AES_encrypt(ctx->CB, tmp->block, &ctx->aes_key);
+			continue;
+		}
+
 		m[0] ^= tmp->block[tmp->ofs];
 		m += 1;
 		m_len -= 1;
@@ -215,7 +201,7 @@ void aes_gcm_128_digest(struct aes_gcm_128_context *ctx,
 	aes_gcm_128_ghash_block(ctx, ctx->AC);
 
 	AES_encrypt(ctx->J0, ctx->c.block, &ctx->aes_key);
-	aes_gcm_128_xor(ctx->c.block, ctx->Y, T);
+	aes_block_xor(ctx->c.block, ctx->Y, T);
 
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_gcm_128.h b/lib/crypto/aes_gcm_128.h
index 278b6db..2b6cf43 100644
--- a/lib/crypto/aes_gcm_128.h
+++ b/lib/crypto/aes_gcm_128.h
@@ -34,7 +34,7 @@ struct aes_gcm_128_context {
 		uint8_t block[AES_BLOCK_SIZE];
 		size_t ofs;
 		size_t total;
-	} A, C, c, y;
+	} A, C, c, v, y;
 };
 
 void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
-- 
1.9.1


From aee5f55a3df7f380000f4e67f3cfb0e9ef737ab8 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:58:49 +0200
Subject: [PATCH 7/8] lib/crypto: sync AES_cfb8_encrypt() from heimdal

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.c | 35 ++++++++++++++++++-----------------
 lib/crypto/aes.h | 10 +++++++---
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index a47a456..f7f9688 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -113,24 +113,25 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
     }
 }
 
-void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
-		      size_t length, const AES_KEY *key,
-		      uint8_t *iv, int forward)
+void
+AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+                 unsigned long size, const AES_KEY *key,
+                 unsigned char *iv, int forward_encrypt)
 {
-	size_t i;
+    int i;
 
-	for (i=0; i < length; i++) {
-		uint8_t tiv[AES_BLOCK_SIZE*2];
+    for (i = 0; i < size; i++) {
+        unsigned char tmp[AES_BLOCK_SIZE + 1];
 
-		memcpy(tiv, iv, AES_BLOCK_SIZE);
-		AES_encrypt(iv, iv, key);
-		if (!forward) {
-			tiv[AES_BLOCK_SIZE] = in[i];
-		}
-		out[i] = in[i] ^ iv[0];
-		if (forward) {
-			tiv[AES_BLOCK_SIZE] = out[i];
-		}
-		memcpy(iv, tiv+1, AES_BLOCK_SIZE);
-	}
+        memcpy(tmp, iv, AES_BLOCK_SIZE);
+        AES_encrypt(iv, iv, key);
+        if (!forward_encrypt) {
+            tmp[AES_BLOCK_SIZE] = in[i];
+        }
+        out[i] = in[i] ^ iv[0];
+        if (forward_encrypt) {
+            tmp[AES_BLOCK_SIZE] = out[i];
+        }
+        memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
+    }
 }
diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index ff2c448..1d0a1c0 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -42,6 +42,7 @@
 #define AES_encrypt samba_AES_encrypt
 #define AES_decrypt samba_AES_decrypt
 #define AES_cbc_encrypt samba_AES_cbc_encrypt
+#define AES_cfb8_encrypt samba_AES_cfb8_encrypt
 
 /*
  *
@@ -72,9 +73,12 @@ void AES_cbc_encrypt(const unsigned char *, unsigned char *,
 		     const unsigned long, const AES_KEY *,
 		     unsigned char *, int);
 
-void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
-		      size_t length, const AES_KEY *key,
-		      uint8_t *iv, int forward);
+void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+		      unsigned long size, const AES_KEY *key,
+		      unsigned char *iv, int forward_encrypt);
+
+#define aes_cfb8_encrypt(in, out, size, key, iv, forward_encrypt) \
+	AES_cfb8_encrypt(in, out, size, key, iv, forward_encrypt)
 
 #ifdef  __cplusplus
 }
-- 
1.9.1


From 12f4d84941858d679a154458332b8eada9ac32d1 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:58:49 +0200
Subject: [PATCH 8/8] lib/crypto: make it possible to use only parts of
 aes.[ch]

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.c | 8 +++++++-
 lib/crypto/aes.h | 7 +++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index f7f9688..800a97e 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -32,9 +32,10 @@
  */
 
 #include "replace.h"
+#include "aes.h"
 
+#ifdef SAMBA_RIJNDAEL
 #include "rijndael-alg-fst.h"
-#include "aes.h"
 
 int
 AES_set_encrypt_key(const unsigned char *userkey, const int bits, AES_KEY *key)
@@ -65,7 +66,9 @@ AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
 {
     rijndaelDecrypt(key->key, key->rounds, in, out);
 }
+#endif /* SAMBA_RIJNDAEL */
 
+#ifdef SAMBA_AES_CBC_ENCRYPT
 void
 AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 		unsigned long size, const AES_KEY *key,
@@ -112,7 +115,9 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 	}
     }
 }
+#endif /* SAMBA_AES_CBC_ENCRYPT */
 
+#ifdef SAMBA_AES_CFB8_ENCRYPT
 void
 AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
                  unsigned long size, const AES_KEY *key,
@@ -135,3 +140,4 @@ AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
         memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
     }
 }
+#endif /* SAMBA_AES_CFB8_ENCRYPT */
diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index 1d0a1c0..a8fb055 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -36,6 +36,11 @@
 #ifndef LIB_CRYPTO_AES_H
 #define LIB_CRYPTO_AES_H 1
 
+#define SAMBA_RIJNDAEL 1
+#define SAMBA_AES_CBC_ENCRYPT 1
+#define SAMBA_AES_CFB8_ENCRYPT 1
+#define SAMBA_AES_BLOCK_XOR 1
+
 /* symbol renaming */
 #define AES_set_encrypt_key samba_AES_set_encrypt_key
 #define AES_set_decrypt_key samba_AES_decrypt_key
@@ -84,6 +89,7 @@ void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
 }
 #endif
 
+#ifdef SAMBA_AES_BLOCK_XOR
 static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
 				 const uint8_t in2[AES_BLOCK_SIZE],
 				 uint8_t out[AES_BLOCK_SIZE])
@@ -95,6 +101,7 @@ static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
 	o[0] = i1[0] ^ i2[0];
 	o[1] = i1[1] ^ i2[1];
 }
+#endif /* SAMBA_AES_BLOCK_XOR */
 
 static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
 				    uint8_t out[AES_BLOCK_SIZE])
-- 
1.9.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 198 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20150813/2323ecf9/signature.sig>


More information about the samba-technical mailing list