AES crypto performance improvements

Stefan Metzmacher metze at samba.org
Thu Aug 27 12:29:32 UTC 2015


Hi Jeremy,

a few more cosmetic updates to aes_ccm_128_update(),
in the while() loop we don't need to set
ctx->B_i_ofs = 0; again and again, it's already
0 before we enter the loop. And we clear v and v_len
when everything is consumed, it's not really needed
as we don't use these at the end currently, but it's
clearer that way.

metze

>>>>> while debugging poor performance of our SMB3 crypto
>>>>> I developed the following performance improvements.
>>>>>
>>>>> The aes_block_xor() improved by a factor of ~10.
>>>>> The aes_block_[r|l]shift() functions improved by 25%.
>>>>>
>>>>> Please review and push.
>>>>>
>>>>> The real solution will be to use hardware support,
>>>>> I've extended the work from Simo in the following branch
>>>>> https://git.samba.org/?p=metze/samba/wip.git;a=shortlog;h=refs/heads/master3-smb-crypto
>>>>> (ignore the unrelated and directly reverted commits on top)
>>>>>
>>>>> But I'm not that happy with the abstraction yet.
>>>>
>>>> There's a regression in the aes_ccm_128 optimization because that doesn't
>>>> have test cases... I'm working on it.
>>>
>>> Here's an updated patchset, please review and push.
>>
>> OK, just FYI this patchset breaks SMB3 transport
>> level encryption somehow.
>>
>> Just do:
>>
>> smbclient //localhost/tmp -UUSER%PASS -mSMB3 -e
>>
>> and you'll find the client terminates the
>> connection on finding a bad signature after
>> decryption inside smb2_signing_decrypt_pdu() here:
>>
>>         sig_ptr = tf + SMB2_TF_SIGNATURE;
>>         if (memcmp(sig_ptr, sig, 16) != 0) {
>>                 return NT_STATUS_ACCESS_DENIED;
>>         }
>>
> 
> Thanks for finding this!
> 
> The problem was a missing if (v_len == 0) return
> in aes_ccm_128_update().
> 
> I've also updated the tests to call _update() and _crypt() functions
> with 0 length buffers.
> 
> metze
> 
-------------- next part --------------
From c2204e2defa97709db90d591cac63035b744ef17 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:09:24 +0200
Subject: [PATCH 01/12] s3:vfs_smb_traffic_analyzer: remove samba_ prefix from
 AES_* function calls

This should be an implementation detail in lib/crypto/aes.h.

In future we may add support for other implementations.

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_smb_traffic_analyzer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source3/modules/vfs_smb_traffic_analyzer.c b/source3/modules/vfs_smb_traffic_analyzer.c
index 73ebf63..0208cde 100644
--- a/source3/modules/vfs_smb_traffic_analyzer.c
+++ b/source3/modules/vfs_smb_traffic_analyzer.c
@@ -175,7 +175,7 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
 	unsigned char filler[17]= "................";
 	char *output;
 	if (akey == NULL) return NULL;
-	samba_AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
+	AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
 	s1 = strlen(str) / 16;
 	s2 = strlen(str) % 16;
 	memcpy(filler, str + (s1*16), s2);
@@ -185,10 +185,10 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
 	*len = ((s1 + 1)*16);
 	output = talloc_array(ctx, char, *len);
 	for (h = 0; h < s1; h++) {
-		samba_AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
+		AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
 			&key);
 	}
-	samba_AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
+	AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
 	*len = (s1*16)+16;
 	return output;
 }
-- 
1.9.1


From 113d2291261a5651ecd924d945b219095e7040cd Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Tue, 11 Aug 2015 16:31:25 +0200
Subject: [PATCH 02/12] lib/crypto: add aes_cmac_128 chunked tests

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_cmac_128_test.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/lib/crypto/aes_cmac_128_test.c b/lib/crypto/aes_cmac_128_test.c
index 173087f..86a2fd7 100644
--- a/lib/crypto/aes_cmac_128_test.c
+++ b/lib/crypto/aes_cmac_128_test.c
@@ -87,6 +87,32 @@ bool torture_local_crypto_aes_cmac_128(struct torture_context *torture)
 			ret = false;
 		}
 	}
+	for (i=0; testarray[i].cmac.length != 0; i++) {
+		struct aes_cmac_128_context ctx;
+		uint8_t cmac[AES_BLOCK_SIZE];
+		int e;
+		size_t j;
+
+		aes_cmac_128_init(&ctx, key.data);
+		for (j=0; j < testarray[i].data.length; j++) {
+			aes_cmac_128_update(&ctx, NULL, 0);
+			aes_cmac_128_update(&ctx,
+					    &testarray[i].data.data[j],
+					    1);
+			aes_cmac_128_update(&ctx, NULL, 0);
+		}
+		aes_cmac_128_final(&ctx, cmac);
+
+		e = memcmp(testarray[i].cmac.data, cmac, sizeof(cmac));
+		if (e != 0) {
+			printf("aes_cmac_128 chunked test[%u]: failed\n", i);
+			dump_data(0, key.data, key.length);
+			dump_data(0, testarray[i].data.data, testarray[i].data.length);
+			dump_data(0, testarray[i].cmac.data, testarray[i].cmac.length);
+			dump_data(0, cmac, sizeof(cmac));
+			ret = false;
+		}
+	}
 	talloc_free(tctx);
 	return ret;
 }
-- 
1.9.1


From f1979b7d31a39a0276a829f7d5aaa054b2b9d12a Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 14 Aug 2015 13:12:13 +0200
Subject: [PATCH 03/12] lib/crypto: run all aes_gcm_128 testcases

We should not skip the first one.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_gcm_128_test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/crypto/aes_gcm_128_test.c b/lib/crypto/aes_gcm_128_test.c
index 703ad86..f70d851 100644
--- a/lib/crypto/aes_gcm_128_test.c
+++ b/lib/crypto/aes_gcm_128_test.c
@@ -104,7 +104,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 	testarray[3].T = strhex_to_data_blob(tctx,
 				"5bc94fbc3221a5db94fae95ae7121a47");
 
-	for (i=1; testarray[i].T.length != 0; i++) {
+	for (i=0; testarray[i].T.length != 0; i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
 		DATA_BLOB C;
@@ -167,7 +167,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 		}
 	}
 
-	for (i=1; testarray[i].T.length != 0; i++) {
+	for (i=0; testarray[i].T.length != 0; i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
 		DATA_BLOB C;
@@ -233,7 +233,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 		}
 	}
 
-	for (i=1; testarray[i].T.length != 0; i++) {
+	for (i=0; testarray[i].T.length != 0; i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
 		DATA_BLOB P;
@@ -299,7 +299,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 		}
 	}
 
-	for (i=1; testarray[i].T.length != 0; i++) {
+	for (i=0; testarray[i].T.length != 0; i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
 		DATA_BLOB P;
-- 
1.9.1


From 7e295a524f6a7f12f42c8dbbb7d8dc20863f4dbc Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Thu, 27 Aug 2015 13:44:56 +0200
Subject: [PATCH 04/12] lib/crypto: verify 0 updates in aes_gcm_128 tests

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_gcm_128_test.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/crypto/aes_gcm_128_test.c b/lib/crypto/aes_gcm_128_test.c
index f70d851..4b53487 100644
--- a/lib/crypto/aes_gcm_128_test.c
+++ b/lib/crypto/aes_gcm_128_test.c
@@ -178,11 +178,17 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
 		for (j=0; j < testarray[i].A.length; j++) {
+			aes_gcm_128_updateA(&ctx, NULL, 0);
 			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
+			aes_gcm_128_updateA(&ctx, NULL, 0);
 		}
 		for (j=0; j < C.length; j++) {
+			aes_gcm_128_crypt(&ctx, NULL, 0);
+			aes_gcm_128_updateC(&ctx, NULL, 0);
 			aes_gcm_128_crypt(&ctx, &C.data[j], 1);
 			aes_gcm_128_updateC(&ctx, &C.data[j], 1);
+			aes_gcm_128_crypt(&ctx, NULL, 0);
+			aes_gcm_128_updateC(&ctx, NULL, 0);
 		}
 		aes_gcm_128_digest(&ctx, T);
 
@@ -244,11 +250,17 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
 		for (j=0; j < testarray[i].A.length; j++) {
+			aes_gcm_128_updateA(&ctx, NULL, 0);
 			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
+			aes_gcm_128_updateA(&ctx, NULL, 0);
 		}
 		for (j=0; j < P.length; j++) {
+			aes_gcm_128_updateC(&ctx, NULL, 0);
+			aes_gcm_128_crypt(&ctx, NULL, 0);
 			aes_gcm_128_updateC(&ctx, &P.data[j], 1);
 			aes_gcm_128_crypt(&ctx, &P.data[j], 1);
+			aes_gcm_128_updateC(&ctx, NULL, 0);
+			aes_gcm_128_crypt(&ctx, NULL, 0);
 		}
 		aes_gcm_128_digest(&ctx, T);
 
-- 
1.9.1


From c0af5c186ad193749eac617fd8f12f811feb54db Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 14 Aug 2015 13:13:21 +0200
Subject: [PATCH 05/12] lib/crypto: add aes_ccm_128 tests

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_ccm_128_test.c | 376 ++++++++++++++++++++++++++++++++++++++++++
 lib/crypto/aes_test.h         |  67 ++++++++
 lib/crypto/wscript_build      |  11 +-
 source4/torture/local/local.c |   2 +
 4 files changed, 451 insertions(+), 5 deletions(-)
 create mode 100644 lib/crypto/aes_ccm_128_test.c
 create mode 100644 lib/crypto/aes_test.h

diff --git a/lib/crypto/aes_ccm_128_test.c b/lib/crypto/aes_ccm_128_test.c
new file mode 100644
index 0000000..1a9fe41
--- /dev/null
+++ b/lib/crypto/aes_ccm_128_test.c
@@ -0,0 +1,376 @@
+/*
+   AES-CCM-128 tests
+
+   Copyright (C) Stefan Metzmacher 2015
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#include "replace.h"
+#include "../lib/util/samba_util.h"
+#include "../lib/crypto/crypto.h"
+#include "../lib/crypto/aes_test.h"
+
+#ifndef AES_CCM_128_ONLY_TESTVECTORS
+struct torture_context;
+bool torture_local_crypto_aes_ccm_128(struct torture_context *torture);
+
+/*
+ This uses our own test values as we rely on a 11 byte nonce
+ and the values from rfc rfc3610 use 13 byte nonce.
+*/
+bool torture_local_crypto_aes_ccm_128(struct torture_context *tctx)
+{
+	bool ret = true;
+	uint32_t i;
+	struct aes_mode_testvector testarray[] = {
+#endif /* AES_CCM_128_ONLY_TESTVECTORS */
+#define AES_CCM_128_TESTVECTOR(_k, _n, _a, _p, _c, _t) \
+	AES_MODE_TESTVECTOR(aes_ccm_128, _k, _n, _a, _p, _c, _t)
+
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"8BF9FBC2B8149484FF11AB1F3A544FF6",
+		/* N */
+		"010000000000000077F7A8",
+		/* A */
+		"010000000000000077F7A80000000000"
+		"A8000000000001004100002C00980000",
+		/* P */
+		"FE534D4240000100000000000B00811F"
+		"00000000000000000600000000000000"
+		"00000000010000004100002C00980000"
+		"00000000000000000000000000000000"
+		"3900000094010600FFFFFFFFFFFFFFFF"
+		"FFFFFFFFFFFFFFFF7800000030000000"
+		"000000007800000000000000FFFF0000"
+		"0100000000000000"
+		"03005C003100370032002E0033003100"
+		"2E0039002E003100380033005C006E00"
+		"650074006C006F0067006F006E000000",
+		/* C */
+		"25985364BF9AF90EB0B9C8FB55B7C446"
+		"780F310F1EC4677726BFBF34E38E6408"
+		"057EE228814F11CBAAB794A79F7A1F78"
+		"2DE73B7477985360A02D35A7A347ABF7"
+		"9F18DD8687767423BB08F18642B6EFEE"
+		"8B1543D83091AF5952F58BB4BD89FF6B"
+		"0206E7170481C7BC61F06653D0CF10F7"
+		"C78380389382C276"
+		"7B8BF34D687A5C3D4F783F926F7755C0"
+		"2D44C30848C69CFDD8E54395F1881611"
+		"E5502285870A7179068923105190C837",
+		/* T */
+		"3C11F652F8EA5600C8607D2E0FEAFD42"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"f9fdca4ac64fe7f014de0f43039c7571",
+		/* N */
+		"5a8aa485c316e947125478",
+		/* A */
+		"3796cf51b8726652a4204733b8fbb047"
+		"cf00fb91a9837e22ec22b1a268f88e2c",
+		/* P */
+		"a265480ca88d5f536db0dc6abc40faf0"
+		"d05be7a9669777682345647586786983",
+		/* C */
+		"65F8D8422006FB77FB7CCEFDFFF93729"
+		"B3EFCB06A0FAF3A2ABAB485723373F53",
+		/* T */
+		"2C62BD82AD231887A7B326E1E045BC91"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"197afb02ffbd8f699dacae87094d5243",
+		/* N */
+		"5a8aa485c316e947125478",
+		/* A */
+		"",
+		/* P */
+		"3796cf51b8726652a4204733b8fbb047"
+		"cf00fb91a9837e22",
+		/* C */
+		"CA53910394115C5DAB5D7250F04D6A27"
+		"2BCFA4329528F3AC",
+		/* T */
+		"38E3A318F9BA88D4DD2FAF3521820001"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"90929a4b0ac65b350ad1591611fe4829",
+		/* N */
+		"5a8aa485c316e9403aff85",
+		/* A */
+		"",
+		/* P */
+		"a16a2e741f1cd9717285b6d882c1fc53"
+		"655e9773761ad697",
+		/* C */
+		"ACA5E98D2784D131AE76E3C8BF9C3988"
+		"35C0206C71893F26",
+		/* T */
+		"AE67C0EA38C5383BFDC7967F4E9D1678"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"f9fdca4ac64fe7f014de0f43039c7571",
+		/* N */
+		"5a8aa485c316e947125478",
+		/* A */
+		"3796cf51b8726652a4204733b8fbb047"
+		"cf00fb91a9837e22ec22b1a268f88e2c",
+		/* P */
+		"a265480ca88d5f536db0dc6abc40faf0"
+		"d05be7a966977768",
+		/* C */
+		"65F8D8422006FB77FB7CCEFDFFF93729"
+		"B3EFCB06A0FAF3A2",
+		/* T */
+		"03C6E244586AFAB9B60D9F6DBDF7EB1A"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"26511fb51fcfa75cb4b44da75a6e5a0e",
+		/* N */
+		"5a8aa485c316e9403aff85",
+		/* A */
+		"a16a2e741f1cd9717285b6d882c1fc53"
+		"655e9773761ad697a7ee6410184c7982",
+		/* P */
+		"8739b4bea1a099fe547499cbc6d1b13d"
+		"849b8084c9b6acc5",
+		/* C */
+		"D31F9FC23674D5272125375E0A2F5365"
+		"41B1FAF1DD68C819",
+		/* T */
+		"4F315233A76C4DD99972561C5158AB3B"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"f9fdca4ac64fe7f014de0f43039c7571",
+		/* N */
+		"5a8aa485c316e947125478",
+		/* A */
+		"3796cf51b8726652a4204733b8fbb047"
+		"cf00fb91a9837e22ec22b1a268",
+		/* P */
+		"a265480ca88d5f536db0dc6abc40faf0"
+		"d05be7a9669777682376345745",
+		/* C */
+		"65F8D8422006FB77FB7CCEFDFFF93729"
+		"B3EFCB06A0FAF3A2AB981875E0",
+		/* T */
+		"EA93AAEDA607226E9E79D2EE5C4B62F8"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"26511fb51fcfa75cb4b44da75a6e5a0e",
+		/* N */
+		"5a8aa485c316e9403aff85",
+		/* A */
+		"a16a2e741f1cd9717285b6d882c1fc53"
+		"65",
+		/* P */
+		"8739b4bea1a099fe547499cbc6d1b13d"
+		"84",
+		/* C */
+		"D31F9FC23674D5272125375E0A2F5365"
+		"41",
+		/* T */
+		"036F58DA2372B29BD0E01C58A0E7F9EE"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"00000000000000000000000000000000",
+		/* N */
+		"0000000000000000000000",
+		/* A */
+		"",
+		/* P */
+		"00",
+		/* C */
+		"2E",
+		/* T */
+		"61787D2C432A58293B73D01154E61B6B"
+	),
+	AES_CCM_128_TESTVECTOR(
+		/* K */
+		"00000000000000000000000000000000",
+		/* N */
+		"0000000000000000000000",
+		/* A */
+		"00",
+		/* P */
+		"00",
+		/* C */
+		"2E",
+		/* T */
+		"E4284A0E813F0FFA146CF59F9ADAFBD7"
+	),
+#ifndef AES_CCM_128_ONLY_TESTVECTORS
+	};
+
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
+		struct aes_ccm_128_context ctx;
+		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
+		DATA_BLOB C;
+		int e;
+
+		C = data_blob_dup_talloc(tctx, testarray[i].P);
+
+		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
+				 testarray[i].A.length, testarray[i].P.length);
+		aes_ccm_128_update(&ctx,
+				   testarray[i].A.data,
+				   testarray[i].A.length);
+		aes_ccm_128_update(&ctx, C.data, C.length);
+		aes_ccm_128_crypt(&ctx, C.data, C.length);
+		aes_ccm_128_digest(&ctx, T);
+
+		e = memcmp(testarray[i].T.data, T, sizeof(T));
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
+			ret = false;
+			goto fail;
+		}
+
+		e = memcmp(testarray[i].C.data, C.data, C.length);
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
+			ret = false;
+			goto fail;
+		}
+	}
+
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
+		struct aes_ccm_128_context ctx;
+		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
+		DATA_BLOB C;
+		int e;
+		size_t j;
+
+		C = data_blob_dup_talloc(tctx, testarray[i].P);
+
+		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
+				 testarray[i].A.length, testarray[i].P.length);
+		for (j=0; j < testarray[i].A.length; j++) {
+			aes_ccm_128_update(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, &testarray[i].A.data[j], 1);
+			aes_ccm_128_update(&ctx, NULL, 0);
+		}
+		for (j=0; j < C.length; j++) {
+			aes_ccm_128_crypt(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, &C.data[j], 1);
+			aes_ccm_128_crypt(&ctx, &C.data[j], 1);
+			aes_ccm_128_crypt(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, NULL, 0);
+		}
+		aes_ccm_128_digest(&ctx, T);
+
+		e = memcmp(testarray[i].T.data, T, sizeof(T));
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
+			ret = false;
+			goto fail;
+		}
+
+		e = memcmp(testarray[i].C.data, C.data, C.length);
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
+			ret = false;
+			goto fail;
+		}
+	}
+
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
+		struct aes_ccm_128_context ctx;
+		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
+		DATA_BLOB P;
+		int e;
+		size_t j;
+
+		P = data_blob_dup_talloc(tctx, testarray[i].C);
+
+		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
+				 testarray[i].A.length, testarray[i].P.length);
+		for (j=0; j < testarray[i].A.length; j++) {
+			aes_ccm_128_update(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, &testarray[i].A.data[j], 1);
+			aes_ccm_128_update(&ctx, NULL, 0);
+		}
+		for (j=0; j < P.length; j++) {
+			aes_ccm_128_crypt(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, NULL, 0);
+			aes_ccm_128_crypt(&ctx, &P.data[j], 1);
+			aes_ccm_128_update(&ctx, &P.data[j], 1);
+			aes_ccm_128_crypt(&ctx, NULL, 0);
+			aes_ccm_128_update(&ctx, NULL, 0);
+		}
+		aes_ccm_128_digest(&ctx, T);
+
+		e = memcmp(testarray[i].T.data, T, sizeof(T));
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
+			ret = false;
+			goto fail;
+		}
+
+		e = memcmp(testarray[i].P.data, P.data, P.length);
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
+			ret = false;
+			goto fail;
+		}
+	}
+
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
+		struct aes_ccm_128_context ctx;
+		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
+		DATA_BLOB P;
+		int e;
+
+		P = data_blob_dup_talloc(tctx, testarray[i].C);
+
+		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
+				 testarray[i].A.length, testarray[i].P.length);
+		aes_ccm_128_update(&ctx, testarray[i].A.data, testarray[i].A.length);
+		aes_ccm_128_crypt(&ctx, P.data, P.length);
+		aes_ccm_128_update(&ctx, P.data, P.length);
+		aes_ccm_128_digest(&ctx, T);
+
+		e = memcmp(testarray[i].T.data, T, sizeof(T));
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
+			ret = false;
+			goto fail;
+		}
+
+		e = memcmp(testarray[i].P.data, P.data, P.length);
+		if (e != 0) {
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
+			ret = false;
+			goto fail;
+		}
+	}
+
+ fail:
+	return ret;
+}
+
+#endif /* AES_CCM_128_ONLY_TESTVECTORS */
diff --git a/lib/crypto/aes_test.h b/lib/crypto/aes_test.h
new file mode 100644
index 0000000..ad89249
--- /dev/null
+++ b/lib/crypto/aes_test.h
@@ -0,0 +1,67 @@
+#ifndef LIB_CRYPTO_AES_TEST_H
+#define LIB_CRYPTO_AES_TEST_H
+
+struct aes_mode_testvector {
+	DATA_BLOB K;
+	DATA_BLOB N;
+	DATA_BLOB A;
+	DATA_BLOB P;
+	DATA_BLOB C;
+	DATA_BLOB T;
+	const char *mode;
+	bool aes_cmac_128;
+	bool aes_ccm_128;
+	bool aes_gcm_128;
+	const char *location;
+};
+
+#define AES_MODE_TESTVECTOR(_mode, _k, _n, _a, _p, _c, _t) \
+	{ \
+		.K = strhex_to_data_blob(tctx, _k), \
+		.N = strhex_to_data_blob(tctx, _n), \
+		.A = strhex_to_data_blob(tctx, _a), \
+		.P = strhex_to_data_blob(tctx, _p), \
+		.C = strhex_to_data_blob(tctx, _c), \
+		.T = strhex_to_data_blob(tctx, _t), \
+		._mode = true, \
+		.mode = #_mode, \
+		.location = __location__, \
+	}
+
+#define aes_mode_testvector_debug(tv, P, C, T) \
+	_aes_mode_testvector_debug(tv, P, C, T, __location__)
+static inline void _aes_mode_testvector_debug(const struct aes_mode_testvector *tv,
+					      const DATA_BLOB *P,
+					      const DATA_BLOB *C,
+					      const DATA_BLOB *T,
+					      const char *location)
+{
+	printf("location: %s\n", location);
+	printf("TEST: %s\n", tv->location);
+	printf("MODE: %s\n", tv->mode);
+	printf("K\n");
+	dump_data(0, tv->K.data, tv->K.length);
+	printf("N\n");
+	dump_data(0, tv->N.data, tv->N.length);
+	printf("A\n");
+	dump_data(0, tv->A.data, tv->A.length);
+	printf("P\n");
+	dump_data(0, tv->P.data, tv->P.length);
+	if (P) {
+		printf("PV\n");
+		dump_data(0, P->data, P->length);
+	}
+	printf("C\n");
+	dump_data(0, tv->C.data, tv->C.length);
+	if (C) {
+		printf("CV\n");
+		dump_data(0, C->data, C->length);
+	}
+	printf("T\n");
+	dump_data(0, tv->T.data, tv->T.length);
+	if (T) {
+		printf("TV\n");
+		dump_data(0, T->data, T->length);
+	}
+}
+#endif /* LIB_CRYPTO_AES_TEST_H */
diff --git a/lib/crypto/wscript_build b/lib/crypto/wscript_build
index f2326a2..7f94532 100644
--- a/lib/crypto/wscript_build
+++ b/lib/crypto/wscript_build
@@ -19,8 +19,9 @@ bld.SAMBA_SUBSYSTEM('LIBCRYPTO',
         )
 
 bld.SAMBA_SUBSYSTEM('TORTURE_LIBCRYPTO',
-	source='md4test.c md5test.c hmacmd5test.c aes_cmac_128_test.c aes_gcm_128_test.c',
-	autoproto='test_proto.h',
-	deps='LIBCRYPTO'
-	)
-
+        source='''md4test.c md5test.c hmacmd5test.c
+            aes_cmac_128_test.c aes_ccm_128_test.c aes_gcm_128_test.c
+        ''',
+        autoproto='test_proto.h',
+        deps='LIBCRYPTO'
+        )
diff --git a/source4/torture/local/local.c b/source4/torture/local/local.c
index 91e67f8..3988988 100644
--- a/source4/torture/local/local.c
+++ b/source4/torture/local/local.c
@@ -90,6 +90,8 @@ NTSTATUS torture_local_init(void)
 				      torture_local_crypto_hmacmd5);
 	torture_suite_add_simple_test(suite, "crypto.aes_cmac_128",
 				      torture_local_crypto_aes_cmac_128);
+	torture_suite_add_simple_test(suite, "crypto.aes_ccm_128",
+				      torture_local_crypto_aes_ccm_128);
 	torture_suite_add_simple_test(suite, "crypto.aes_gcm_128",
 				      torture_local_crypto_aes_gcm_128);
 
-- 
1.9.1


From ed7c8ecbfbee7a77897e277622c5ad4ca28c0dc6 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 00:59:58 +0200
Subject: [PATCH 06/12] lib/crypto: add optimized helper functions
 aes_block_{xor,lshift,rshift}()

These are typical operations on an AES_BLOCK used by different modes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.h | 575 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 575 insertions(+)

diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index a2b6c07..2cfb587 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -80,4 +80,579 @@ void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
 }
 #endif
 
+static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
+				 const uint8_t in2[AES_BLOCK_SIZE],
+				 uint8_t out[AES_BLOCK_SIZE])
+{
+#define __IS_ALIGN8(p) ((((uintptr_t)(p)) & 0x7) == 0)
+#define __IS_ALIGNED(a,b,c) __IS_ALIGN8(\
+		((uintptr_t)(a)) | \
+		((uintptr_t)(b)) | \
+		((uintptr_t)(c)))
+	/* If everything is aligned we can optimize */
+	if (likely(__IS_ALIGNED(in1, in2, out))) {
+#define __RO64(p) ((const uint64_t *)(p))
+#define __RW64(p) ((uint64_t *)(p))
+		__RW64(out)[0] = __RO64(in1)[0] ^ __RO64(in2)[0];
+		__RW64(out)[1] = __RO64(in1)[1] ^ __RO64(in2)[1];
+	} else {
+		uint64_t i1[2];
+		uint64_t i2[2];
+		uint64_t o[2];
+
+		memcpy(i1, in1, AES_BLOCK_SIZE);
+		memcpy(i2, in2, AES_BLOCK_SIZE);
+		o[0] = i1[0] ^ i2[0];
+		o[1] = i1[1] ^ i2[1];
+		memcpy(out, o, AES_BLOCK_SIZE);
+	}
+}
+
+static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
+				    uint8_t out[AES_BLOCK_SIZE])
+{
+	static const struct aes_block_lshift_entry {
+		uint8_t lshift;
+		uint8_t overflow;
+	} aes_block_lshift_table[UINT8_MAX+1] = {
+		[0x00] = { .lshift = 0x00, .overflow = 0x00 },
+		[0x01] = { .lshift = 0x02, .overflow = 0x00 },
+		[0x02] = { .lshift = 0x04, .overflow = 0x00 },
+		[0x03] = { .lshift = 0x06, .overflow = 0x00 },
+		[0x04] = { .lshift = 0x08, .overflow = 0x00 },
+		[0x05] = { .lshift = 0x0a, .overflow = 0x00 },
+		[0x06] = { .lshift = 0x0c, .overflow = 0x00 },
+		[0x07] = { .lshift = 0x0e, .overflow = 0x00 },
+		[0x08] = { .lshift = 0x10, .overflow = 0x00 },
+		[0x09] = { .lshift = 0x12, .overflow = 0x00 },
+		[0x0a] = { .lshift = 0x14, .overflow = 0x00 },
+		[0x0b] = { .lshift = 0x16, .overflow = 0x00 },
+		[0x0c] = { .lshift = 0x18, .overflow = 0x00 },
+		[0x0d] = { .lshift = 0x1a, .overflow = 0x00 },
+		[0x0e] = { .lshift = 0x1c, .overflow = 0x00 },
+		[0x0f] = { .lshift = 0x1e, .overflow = 0x00 },
+		[0x10] = { .lshift = 0x20, .overflow = 0x00 },
+		[0x11] = { .lshift = 0x22, .overflow = 0x00 },
+		[0x12] = { .lshift = 0x24, .overflow = 0x00 },
+		[0x13] = { .lshift = 0x26, .overflow = 0x00 },
+		[0x14] = { .lshift = 0x28, .overflow = 0x00 },
+		[0x15] = { .lshift = 0x2a, .overflow = 0x00 },
+		[0x16] = { .lshift = 0x2c, .overflow = 0x00 },
+		[0x17] = { .lshift = 0x2e, .overflow = 0x00 },
+		[0x18] = { .lshift = 0x30, .overflow = 0x00 },
+		[0x19] = { .lshift = 0x32, .overflow = 0x00 },
+		[0x1a] = { .lshift = 0x34, .overflow = 0x00 },
+		[0x1b] = { .lshift = 0x36, .overflow = 0x00 },
+		[0x1c] = { .lshift = 0x38, .overflow = 0x00 },
+		[0x1d] = { .lshift = 0x3a, .overflow = 0x00 },
+		[0x1e] = { .lshift = 0x3c, .overflow = 0x00 },
+		[0x1f] = { .lshift = 0x3e, .overflow = 0x00 },
+		[0x20] = { .lshift = 0x40, .overflow = 0x00 },
+		[0x21] = { .lshift = 0x42, .overflow = 0x00 },
+		[0x22] = { .lshift = 0x44, .overflow = 0x00 },
+		[0x23] = { .lshift = 0x46, .overflow = 0x00 },
+		[0x24] = { .lshift = 0x48, .overflow = 0x00 },
+		[0x25] = { .lshift = 0x4a, .overflow = 0x00 },
+		[0x26] = { .lshift = 0x4c, .overflow = 0x00 },
+		[0x27] = { .lshift = 0x4e, .overflow = 0x00 },
+		[0x28] = { .lshift = 0x50, .overflow = 0x00 },
+		[0x29] = { .lshift = 0x52, .overflow = 0x00 },
+		[0x2a] = { .lshift = 0x54, .overflow = 0x00 },
+		[0x2b] = { .lshift = 0x56, .overflow = 0x00 },
+		[0x2c] = { .lshift = 0x58, .overflow = 0x00 },
+		[0x2d] = { .lshift = 0x5a, .overflow = 0x00 },
+		[0x2e] = { .lshift = 0x5c, .overflow = 0x00 },
+		[0x2f] = { .lshift = 0x5e, .overflow = 0x00 },
+		[0x30] = { .lshift = 0x60, .overflow = 0x00 },
+		[0x31] = { .lshift = 0x62, .overflow = 0x00 },
+		[0x32] = { .lshift = 0x64, .overflow = 0x00 },
+		[0x33] = { .lshift = 0x66, .overflow = 0x00 },
+		[0x34] = { .lshift = 0x68, .overflow = 0x00 },
+		[0x35] = { .lshift = 0x6a, .overflow = 0x00 },
+		[0x36] = { .lshift = 0x6c, .overflow = 0x00 },
+		[0x37] = { .lshift = 0x6e, .overflow = 0x00 },
+		[0x38] = { .lshift = 0x70, .overflow = 0x00 },
+		[0x39] = { .lshift = 0x72, .overflow = 0x00 },
+		[0x3a] = { .lshift = 0x74, .overflow = 0x00 },
+		[0x3b] = { .lshift = 0x76, .overflow = 0x00 },
+		[0x3c] = { .lshift = 0x78, .overflow = 0x00 },
+		[0x3d] = { .lshift = 0x7a, .overflow = 0x00 },
+		[0x3e] = { .lshift = 0x7c, .overflow = 0x00 },
+		[0x3f] = { .lshift = 0x7e, .overflow = 0x00 },
+		[0x40] = { .lshift = 0x80, .overflow = 0x00 },
+		[0x41] = { .lshift = 0x82, .overflow = 0x00 },
+		[0x42] = { .lshift = 0x84, .overflow = 0x00 },
+		[0x43] = { .lshift = 0x86, .overflow = 0x00 },
+		[0x44] = { .lshift = 0x88, .overflow = 0x00 },
+		[0x45] = { .lshift = 0x8a, .overflow = 0x00 },
+		[0x46] = { .lshift = 0x8c, .overflow = 0x00 },
+		[0x47] = { .lshift = 0x8e, .overflow = 0x00 },
+		[0x48] = { .lshift = 0x90, .overflow = 0x00 },
+		[0x49] = { .lshift = 0x92, .overflow = 0x00 },
+		[0x4a] = { .lshift = 0x94, .overflow = 0x00 },
+		[0x4b] = { .lshift = 0x96, .overflow = 0x00 },
+		[0x4c] = { .lshift = 0x98, .overflow = 0x00 },
+		[0x4d] = { .lshift = 0x9a, .overflow = 0x00 },
+		[0x4e] = { .lshift = 0x9c, .overflow = 0x00 },
+		[0x4f] = { .lshift = 0x9e, .overflow = 0x00 },
+		[0x50] = { .lshift = 0xa0, .overflow = 0x00 },
+		[0x51] = { .lshift = 0xa2, .overflow = 0x00 },
+		[0x52] = { .lshift = 0xa4, .overflow = 0x00 },
+		[0x53] = { .lshift = 0xa6, .overflow = 0x00 },
+		[0x54] = { .lshift = 0xa8, .overflow = 0x00 },
+		[0x55] = { .lshift = 0xaa, .overflow = 0x00 },
+		[0x56] = { .lshift = 0xac, .overflow = 0x00 },
+		[0x57] = { .lshift = 0xae, .overflow = 0x00 },
+		[0x58] = { .lshift = 0xb0, .overflow = 0x00 },
+		[0x59] = { .lshift = 0xb2, .overflow = 0x00 },
+		[0x5a] = { .lshift = 0xb4, .overflow = 0x00 },
+		[0x5b] = { .lshift = 0xb6, .overflow = 0x00 },
+		[0x5c] = { .lshift = 0xb8, .overflow = 0x00 },
+		[0x5d] = { .lshift = 0xba, .overflow = 0x00 },
+		[0x5e] = { .lshift = 0xbc, .overflow = 0x00 },
+		[0x5f] = { .lshift = 0xbe, .overflow = 0x00 },
+		[0x60] = { .lshift = 0xc0, .overflow = 0x00 },
+		[0x61] = { .lshift = 0xc2, .overflow = 0x00 },
+		[0x62] = { .lshift = 0xc4, .overflow = 0x00 },
+		[0x63] = { .lshift = 0xc6, .overflow = 0x00 },
+		[0x64] = { .lshift = 0xc8, .overflow = 0x00 },
+		[0x65] = { .lshift = 0xca, .overflow = 0x00 },
+		[0x66] = { .lshift = 0xcc, .overflow = 0x00 },
+		[0x67] = { .lshift = 0xce, .overflow = 0x00 },
+		[0x68] = { .lshift = 0xd0, .overflow = 0x00 },
+		[0x69] = { .lshift = 0xd2, .overflow = 0x00 },
+		[0x6a] = { .lshift = 0xd4, .overflow = 0x00 },
+		[0x6b] = { .lshift = 0xd6, .overflow = 0x00 },
+		[0x6c] = { .lshift = 0xd8, .overflow = 0x00 },
+		[0x6d] = { .lshift = 0xda, .overflow = 0x00 },
+		[0x6e] = { .lshift = 0xdc, .overflow = 0x00 },
+		[0x6f] = { .lshift = 0xde, .overflow = 0x00 },
+		[0x70] = { .lshift = 0xe0, .overflow = 0x00 },
+		[0x71] = { .lshift = 0xe2, .overflow = 0x00 },
+		[0x72] = { .lshift = 0xe4, .overflow = 0x00 },
+		[0x73] = { .lshift = 0xe6, .overflow = 0x00 },
+		[0x74] = { .lshift = 0xe8, .overflow = 0x00 },
+		[0x75] = { .lshift = 0xea, .overflow = 0x00 },
+		[0x76] = { .lshift = 0xec, .overflow = 0x00 },
+		[0x77] = { .lshift = 0xee, .overflow = 0x00 },
+		[0x78] = { .lshift = 0xf0, .overflow = 0x00 },
+		[0x79] = { .lshift = 0xf2, .overflow = 0x00 },
+		[0x7a] = { .lshift = 0xf4, .overflow = 0x00 },
+		[0x7b] = { .lshift = 0xf6, .overflow = 0x00 },
+		[0x7c] = { .lshift = 0xf8, .overflow = 0x00 },
+		[0x7d] = { .lshift = 0xfa, .overflow = 0x00 },
+		[0x7e] = { .lshift = 0xfc, .overflow = 0x00 },
+		[0x7f] = { .lshift = 0xfe, .overflow = 0x00 },
+		[0x80] = { .lshift = 0x00, .overflow = 0x01 },
+		[0x81] = { .lshift = 0x02, .overflow = 0x01 },
+		[0x82] = { .lshift = 0x04, .overflow = 0x01 },
+		[0x83] = { .lshift = 0x06, .overflow = 0x01 },
+		[0x84] = { .lshift = 0x08, .overflow = 0x01 },
+		[0x85] = { .lshift = 0x0a, .overflow = 0x01 },
+		[0x86] = { .lshift = 0x0c, .overflow = 0x01 },
+		[0x87] = { .lshift = 0x0e, .overflow = 0x01 },
+		[0x88] = { .lshift = 0x10, .overflow = 0x01 },
+		[0x89] = { .lshift = 0x12, .overflow = 0x01 },
+		[0x8a] = { .lshift = 0x14, .overflow = 0x01 },
+		[0x8b] = { .lshift = 0x16, .overflow = 0x01 },
+		[0x8c] = { .lshift = 0x18, .overflow = 0x01 },
+		[0x8d] = { .lshift = 0x1a, .overflow = 0x01 },
+		[0x8e] = { .lshift = 0x1c, .overflow = 0x01 },
+		[0x8f] = { .lshift = 0x1e, .overflow = 0x01 },
+		[0x90] = { .lshift = 0x20, .overflow = 0x01 },
+		[0x91] = { .lshift = 0x22, .overflow = 0x01 },
+		[0x92] = { .lshift = 0x24, .overflow = 0x01 },
+		[0x93] = { .lshift = 0x26, .overflow = 0x01 },
+		[0x94] = { .lshift = 0x28, .overflow = 0x01 },
+		[0x95] = { .lshift = 0x2a, .overflow = 0x01 },
+		[0x96] = { .lshift = 0x2c, .overflow = 0x01 },
+		[0x97] = { .lshift = 0x2e, .overflow = 0x01 },
+		[0x98] = { .lshift = 0x30, .overflow = 0x01 },
+		[0x99] = { .lshift = 0x32, .overflow = 0x01 },
+		[0x9a] = { .lshift = 0x34, .overflow = 0x01 },
+		[0x9b] = { .lshift = 0x36, .overflow = 0x01 },
+		[0x9c] = { .lshift = 0x38, .overflow = 0x01 },
+		[0x9d] = { .lshift = 0x3a, .overflow = 0x01 },
+		[0x9e] = { .lshift = 0x3c, .overflow = 0x01 },
+		[0x9f] = { .lshift = 0x3e, .overflow = 0x01 },
+		[0xa0] = { .lshift = 0x40, .overflow = 0x01 },
+		[0xa1] = { .lshift = 0x42, .overflow = 0x01 },
+		[0xa2] = { .lshift = 0x44, .overflow = 0x01 },
+		[0xa3] = { .lshift = 0x46, .overflow = 0x01 },
+		[0xa4] = { .lshift = 0x48, .overflow = 0x01 },
+		[0xa5] = { .lshift = 0x4a, .overflow = 0x01 },
+		[0xa6] = { .lshift = 0x4c, .overflow = 0x01 },
+		[0xa7] = { .lshift = 0x4e, .overflow = 0x01 },
+		[0xa8] = { .lshift = 0x50, .overflow = 0x01 },
+		[0xa9] = { .lshift = 0x52, .overflow = 0x01 },
+		[0xaa] = { .lshift = 0x54, .overflow = 0x01 },
+		[0xab] = { .lshift = 0x56, .overflow = 0x01 },
+		[0xac] = { .lshift = 0x58, .overflow = 0x01 },
+		[0xad] = { .lshift = 0x5a, .overflow = 0x01 },
+		[0xae] = { .lshift = 0x5c, .overflow = 0x01 },
+		[0xaf] = { .lshift = 0x5e, .overflow = 0x01 },
+		[0xb0] = { .lshift = 0x60, .overflow = 0x01 },
+		[0xb1] = { .lshift = 0x62, .overflow = 0x01 },
+		[0xb2] = { .lshift = 0x64, .overflow = 0x01 },
+		[0xb3] = { .lshift = 0x66, .overflow = 0x01 },
+		[0xb4] = { .lshift = 0x68, .overflow = 0x01 },
+		[0xb5] = { .lshift = 0x6a, .overflow = 0x01 },
+		[0xb6] = { .lshift = 0x6c, .overflow = 0x01 },
+		[0xb7] = { .lshift = 0x6e, .overflow = 0x01 },
+		[0xb8] = { .lshift = 0x70, .overflow = 0x01 },
+		[0xb9] = { .lshift = 0x72, .overflow = 0x01 },
+		[0xba] = { .lshift = 0x74, .overflow = 0x01 },
+		[0xbb] = { .lshift = 0x76, .overflow = 0x01 },
+		[0xbc] = { .lshift = 0x78, .overflow = 0x01 },
+		[0xbd] = { .lshift = 0x7a, .overflow = 0x01 },
+		[0xbe] = { .lshift = 0x7c, .overflow = 0x01 },
+		[0xbf] = { .lshift = 0x7e, .overflow = 0x01 },
+		[0xc0] = { .lshift = 0x80, .overflow = 0x01 },
+		[0xc1] = { .lshift = 0x82, .overflow = 0x01 },
+		[0xc2] = { .lshift = 0x84, .overflow = 0x01 },
+		[0xc3] = { .lshift = 0x86, .overflow = 0x01 },
+		[0xc4] = { .lshift = 0x88, .overflow = 0x01 },
+		[0xc5] = { .lshift = 0x8a, .overflow = 0x01 },
+		[0xc6] = { .lshift = 0x8c, .overflow = 0x01 },
+		[0xc7] = { .lshift = 0x8e, .overflow = 0x01 },
+		[0xc8] = { .lshift = 0x90, .overflow = 0x01 },
+		[0xc9] = { .lshift = 0x92, .overflow = 0x01 },
+		[0xca] = { .lshift = 0x94, .overflow = 0x01 },
+		[0xcb] = { .lshift = 0x96, .overflow = 0x01 },
+		[0xcc] = { .lshift = 0x98, .overflow = 0x01 },
+		[0xcd] = { .lshift = 0x9a, .overflow = 0x01 },
+		[0xce] = { .lshift = 0x9c, .overflow = 0x01 },
+		[0xcf] = { .lshift = 0x9e, .overflow = 0x01 },
+		[0xd0] = { .lshift = 0xa0, .overflow = 0x01 },
+		[0xd1] = { .lshift = 0xa2, .overflow = 0x01 },
+		[0xd2] = { .lshift = 0xa4, .overflow = 0x01 },
+		[0xd3] = { .lshift = 0xa6, .overflow = 0x01 },
+		[0xd4] = { .lshift = 0xa8, .overflow = 0x01 },
+		[0xd5] = { .lshift = 0xaa, .overflow = 0x01 },
+		[0xd6] = { .lshift = 0xac, .overflow = 0x01 },
+		[0xd7] = { .lshift = 0xae, .overflow = 0x01 },
+		[0xd8] = { .lshift = 0xb0, .overflow = 0x01 },
+		[0xd9] = { .lshift = 0xb2, .overflow = 0x01 },
+		[0xda] = { .lshift = 0xb4, .overflow = 0x01 },
+		[0xdb] = { .lshift = 0xb6, .overflow = 0x01 },
+		[0xdc] = { .lshift = 0xb8, .overflow = 0x01 },
+		[0xdd] = { .lshift = 0xba, .overflow = 0x01 },
+		[0xde] = { .lshift = 0xbc, .overflow = 0x01 },
+		[0xdf] = { .lshift = 0xbe, .overflow = 0x01 },
+		[0xe0] = { .lshift = 0xc0, .overflow = 0x01 },
+		[0xe1] = { .lshift = 0xc2, .overflow = 0x01 },
+		[0xe2] = { .lshift = 0xc4, .overflow = 0x01 },
+		[0xe3] = { .lshift = 0xc6, .overflow = 0x01 },
+		[0xe4] = { .lshift = 0xc8, .overflow = 0x01 },
+		[0xe5] = { .lshift = 0xca, .overflow = 0x01 },
+		[0xe6] = { .lshift = 0xcc, .overflow = 0x01 },
+		[0xe7] = { .lshift = 0xce, .overflow = 0x01 },
+		[0xe8] = { .lshift = 0xd0, .overflow = 0x01 },
+		[0xe9] = { .lshift = 0xd2, .overflow = 0x01 },
+		[0xea] = { .lshift = 0xd4, .overflow = 0x01 },
+		[0xeb] = { .lshift = 0xd6, .overflow = 0x01 },
+		[0xec] = { .lshift = 0xd8, .overflow = 0x01 },
+		[0xed] = { .lshift = 0xda, .overflow = 0x01 },
+		[0xee] = { .lshift = 0xdc, .overflow = 0x01 },
+		[0xef] = { .lshift = 0xde, .overflow = 0x01 },
+		[0xf0] = { .lshift = 0xe0, .overflow = 0x01 },
+		[0xf1] = { .lshift = 0xe2, .overflow = 0x01 },
+		[0xf2] = { .lshift = 0xe4, .overflow = 0x01 },
+		[0xf3] = { .lshift = 0xe6, .overflow = 0x01 },
+		[0xf4] = { .lshift = 0xe8, .overflow = 0x01 },
+		[0xf5] = { .lshift = 0xea, .overflow = 0x01 },
+		[0xf6] = { .lshift = 0xec, .overflow = 0x01 },
+		[0xf7] = { .lshift = 0xee, .overflow = 0x01 },
+		[0xf8] = { .lshift = 0xf0, .overflow = 0x01 },
+		[0xf9] = { .lshift = 0xf2, .overflow = 0x01 },
+		[0xfa] = { .lshift = 0xf4, .overflow = 0x01 },
+		[0xfb] = { .lshift = 0xf6, .overflow = 0x01 },
+		[0xfc] = { .lshift = 0xf8, .overflow = 0x01 },
+		[0xfd] = { .lshift = 0xfa, .overflow = 0x01 },
+		[0xfe] = { .lshift = 0xfc, .overflow = 0x01 },
+		[0xff] = { .lshift = 0xfe, .overflow = 0x01 },
+	};
+	int8_t i;
+	uint8_t overflow = 0;
+
+	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
+		const struct aes_block_lshift_entry *e = &aes_block_lshift_table[in[i]];
+		out[i] = e->lshift | overflow;
+		overflow = e->overflow;
+	}
+}
+
+static inline void aes_block_rshift(const uint8_t in[AES_BLOCK_SIZE],
+				    uint8_t out[AES_BLOCK_SIZE])
+{
+	static const struct aes_block_rshift_entry {
+		uint8_t rshift;
+		uint8_t overflow;
+	} aes_block_rshift_table[UINT8_MAX+1] = {
+		[0x00] = { .rshift = 0x00, .overflow = 0x00 },
+		[0x01] = { .rshift = 0x00, .overflow = 0x80 },
+		[0x02] = { .rshift = 0x01, .overflow = 0x00 },
+		[0x03] = { .rshift = 0x01, .overflow = 0x80 },
+		[0x04] = { .rshift = 0x02, .overflow = 0x00 },
+		[0x05] = { .rshift = 0x02, .overflow = 0x80 },
+		[0x06] = { .rshift = 0x03, .overflow = 0x00 },
+		[0x07] = { .rshift = 0x03, .overflow = 0x80 },
+		[0x08] = { .rshift = 0x04, .overflow = 0x00 },
+		[0x09] = { .rshift = 0x04, .overflow = 0x80 },
+		[0x0a] = { .rshift = 0x05, .overflow = 0x00 },
+		[0x0b] = { .rshift = 0x05, .overflow = 0x80 },
+		[0x0c] = { .rshift = 0x06, .overflow = 0x00 },
+		[0x0d] = { .rshift = 0x06, .overflow = 0x80 },
+		[0x0e] = { .rshift = 0x07, .overflow = 0x00 },
+		[0x0f] = { .rshift = 0x07, .overflow = 0x80 },
+		[0x10] = { .rshift = 0x08, .overflow = 0x00 },
+		[0x11] = { .rshift = 0x08, .overflow = 0x80 },
+		[0x12] = { .rshift = 0x09, .overflow = 0x00 },
+		[0x13] = { .rshift = 0x09, .overflow = 0x80 },
+		[0x14] = { .rshift = 0x0a, .overflow = 0x00 },
+		[0x15] = { .rshift = 0x0a, .overflow = 0x80 },
+		[0x16] = { .rshift = 0x0b, .overflow = 0x00 },
+		[0x17] = { .rshift = 0x0b, .overflow = 0x80 },
+		[0x18] = { .rshift = 0x0c, .overflow = 0x00 },
+		[0x19] = { .rshift = 0x0c, .overflow = 0x80 },
+		[0x1a] = { .rshift = 0x0d, .overflow = 0x00 },
+		[0x1b] = { .rshift = 0x0d, .overflow = 0x80 },
+		[0x1c] = { .rshift = 0x0e, .overflow = 0x00 },
+		[0x1d] = { .rshift = 0x0e, .overflow = 0x80 },
+		[0x1e] = { .rshift = 0x0f, .overflow = 0x00 },
+		[0x1f] = { .rshift = 0x0f, .overflow = 0x80 },
+		[0x20] = { .rshift = 0x10, .overflow = 0x00 },
+		[0x21] = { .rshift = 0x10, .overflow = 0x80 },
+		[0x22] = { .rshift = 0x11, .overflow = 0x00 },
+		[0x23] = { .rshift = 0x11, .overflow = 0x80 },
+		[0x24] = { .rshift = 0x12, .overflow = 0x00 },
+		[0x25] = { .rshift = 0x12, .overflow = 0x80 },
+		[0x26] = { .rshift = 0x13, .overflow = 0x00 },
+		[0x27] = { .rshift = 0x13, .overflow = 0x80 },
+		[0x28] = { .rshift = 0x14, .overflow = 0x00 },
+		[0x29] = { .rshift = 0x14, .overflow = 0x80 },
+		[0x2a] = { .rshift = 0x15, .overflow = 0x00 },
+		[0x2b] = { .rshift = 0x15, .overflow = 0x80 },
+		[0x2c] = { .rshift = 0x16, .overflow = 0x00 },
+		[0x2d] = { .rshift = 0x16, .overflow = 0x80 },
+		[0x2e] = { .rshift = 0x17, .overflow = 0x00 },
+		[0x2f] = { .rshift = 0x17, .overflow = 0x80 },
+		[0x30] = { .rshift = 0x18, .overflow = 0x00 },
+		[0x31] = { .rshift = 0x18, .overflow = 0x80 },
+		[0x32] = { .rshift = 0x19, .overflow = 0x00 },
+		[0x33] = { .rshift = 0x19, .overflow = 0x80 },
+		[0x34] = { .rshift = 0x1a, .overflow = 0x00 },
+		[0x35] = { .rshift = 0x1a, .overflow = 0x80 },
+		[0x36] = { .rshift = 0x1b, .overflow = 0x00 },
+		[0x37] = { .rshift = 0x1b, .overflow = 0x80 },
+		[0x38] = { .rshift = 0x1c, .overflow = 0x00 },
+		[0x39] = { .rshift = 0x1c, .overflow = 0x80 },
+		[0x3a] = { .rshift = 0x1d, .overflow = 0x00 },
+		[0x3b] = { .rshift = 0x1d, .overflow = 0x80 },
+		[0x3c] = { .rshift = 0x1e, .overflow = 0x00 },
+		[0x3d] = { .rshift = 0x1e, .overflow = 0x80 },
+		[0x3e] = { .rshift = 0x1f, .overflow = 0x00 },
+		[0x3f] = { .rshift = 0x1f, .overflow = 0x80 },
+		[0x40] = { .rshift = 0x20, .overflow = 0x00 },
+		[0x41] = { .rshift = 0x20, .overflow = 0x80 },
+		[0x42] = { .rshift = 0x21, .overflow = 0x00 },
+		[0x43] = { .rshift = 0x21, .overflow = 0x80 },
+		[0x44] = { .rshift = 0x22, .overflow = 0x00 },
+		[0x45] = { .rshift = 0x22, .overflow = 0x80 },
+		[0x46] = { .rshift = 0x23, .overflow = 0x00 },
+		[0x47] = { .rshift = 0x23, .overflow = 0x80 },
+		[0x48] = { .rshift = 0x24, .overflow = 0x00 },
+		[0x49] = { .rshift = 0x24, .overflow = 0x80 },
+		[0x4a] = { .rshift = 0x25, .overflow = 0x00 },
+		[0x4b] = { .rshift = 0x25, .overflow = 0x80 },
+		[0x4c] = { .rshift = 0x26, .overflow = 0x00 },
+		[0x4d] = { .rshift = 0x26, .overflow = 0x80 },
+		[0x4e] = { .rshift = 0x27, .overflow = 0x00 },
+		[0x4f] = { .rshift = 0x27, .overflow = 0x80 },
+		[0x50] = { .rshift = 0x28, .overflow = 0x00 },
+		[0x51] = { .rshift = 0x28, .overflow = 0x80 },
+		[0x52] = { .rshift = 0x29, .overflow = 0x00 },
+		[0x53] = { .rshift = 0x29, .overflow = 0x80 },
+		[0x54] = { .rshift = 0x2a, .overflow = 0x00 },
+		[0x55] = { .rshift = 0x2a, .overflow = 0x80 },
+		[0x56] = { .rshift = 0x2b, .overflow = 0x00 },
+		[0x57] = { .rshift = 0x2b, .overflow = 0x80 },
+		[0x58] = { .rshift = 0x2c, .overflow = 0x00 },
+		[0x59] = { .rshift = 0x2c, .overflow = 0x80 },
+		[0x5a] = { .rshift = 0x2d, .overflow = 0x00 },
+		[0x5b] = { .rshift = 0x2d, .overflow = 0x80 },
+		[0x5c] = { .rshift = 0x2e, .overflow = 0x00 },
+		[0x5d] = { .rshift = 0x2e, .overflow = 0x80 },
+		[0x5e] = { .rshift = 0x2f, .overflow = 0x00 },
+		[0x5f] = { .rshift = 0x2f, .overflow = 0x80 },
+		[0x60] = { .rshift = 0x30, .overflow = 0x00 },
+		[0x61] = { .rshift = 0x30, .overflow = 0x80 },
+		[0x62] = { .rshift = 0x31, .overflow = 0x00 },
+		[0x63] = { .rshift = 0x31, .overflow = 0x80 },
+		[0x64] = { .rshift = 0x32, .overflow = 0x00 },
+		[0x65] = { .rshift = 0x32, .overflow = 0x80 },
+		[0x66] = { .rshift = 0x33, .overflow = 0x00 },
+		[0x67] = { .rshift = 0x33, .overflow = 0x80 },
+		[0x68] = { .rshift = 0x34, .overflow = 0x00 },
+		[0x69] = { .rshift = 0x34, .overflow = 0x80 },
+		[0x6a] = { .rshift = 0x35, .overflow = 0x00 },
+		[0x6b] = { .rshift = 0x35, .overflow = 0x80 },
+		[0x6c] = { .rshift = 0x36, .overflow = 0x00 },
+		[0x6d] = { .rshift = 0x36, .overflow = 0x80 },
+		[0x6e] = { .rshift = 0x37, .overflow = 0x00 },
+		[0x6f] = { .rshift = 0x37, .overflow = 0x80 },
+		[0x70] = { .rshift = 0x38, .overflow = 0x00 },
+		[0x71] = { .rshift = 0x38, .overflow = 0x80 },
+		[0x72] = { .rshift = 0x39, .overflow = 0x00 },
+		[0x73] = { .rshift = 0x39, .overflow = 0x80 },
+		[0x74] = { .rshift = 0x3a, .overflow = 0x00 },
+		[0x75] = { .rshift = 0x3a, .overflow = 0x80 },
+		[0x76] = { .rshift = 0x3b, .overflow = 0x00 },
+		[0x77] = { .rshift = 0x3b, .overflow = 0x80 },
+		[0x78] = { .rshift = 0x3c, .overflow = 0x00 },
+		[0x79] = { .rshift = 0x3c, .overflow = 0x80 },
+		[0x7a] = { .rshift = 0x3d, .overflow = 0x00 },
+		[0x7b] = { .rshift = 0x3d, .overflow = 0x80 },
+		[0x7c] = { .rshift = 0x3e, .overflow = 0x00 },
+		[0x7d] = { .rshift = 0x3e, .overflow = 0x80 },
+		[0x7e] = { .rshift = 0x3f, .overflow = 0x00 },
+		[0x7f] = { .rshift = 0x3f, .overflow = 0x80 },
+		[0x80] = { .rshift = 0x40, .overflow = 0x00 },
+		[0x81] = { .rshift = 0x40, .overflow = 0x80 },
+		[0x82] = { .rshift = 0x41, .overflow = 0x00 },
+		[0x83] = { .rshift = 0x41, .overflow = 0x80 },
+		[0x84] = { .rshift = 0x42, .overflow = 0x00 },
+		[0x85] = { .rshift = 0x42, .overflow = 0x80 },
+		[0x86] = { .rshift = 0x43, .overflow = 0x00 },
+		[0x87] = { .rshift = 0x43, .overflow = 0x80 },
+		[0x88] = { .rshift = 0x44, .overflow = 0x00 },
+		[0x89] = { .rshift = 0x44, .overflow = 0x80 },
+		[0x8a] = { .rshift = 0x45, .overflow = 0x00 },
+		[0x8b] = { .rshift = 0x45, .overflow = 0x80 },
+		[0x8c] = { .rshift = 0x46, .overflow = 0x00 },
+		[0x8d] = { .rshift = 0x46, .overflow = 0x80 },
+		[0x8e] = { .rshift = 0x47, .overflow = 0x00 },
+		[0x8f] = { .rshift = 0x47, .overflow = 0x80 },
+		[0x90] = { .rshift = 0x48, .overflow = 0x00 },
+		[0x91] = { .rshift = 0x48, .overflow = 0x80 },
+		[0x92] = { .rshift = 0x49, .overflow = 0x00 },
+		[0x93] = { .rshift = 0x49, .overflow = 0x80 },
+		[0x94] = { .rshift = 0x4a, .overflow = 0x00 },
+		[0x95] = { .rshift = 0x4a, .overflow = 0x80 },
+		[0x96] = { .rshift = 0x4b, .overflow = 0x00 },
+		[0x97] = { .rshift = 0x4b, .overflow = 0x80 },
+		[0x98] = { .rshift = 0x4c, .overflow = 0x00 },
+		[0x99] = { .rshift = 0x4c, .overflow = 0x80 },
+		[0x9a] = { .rshift = 0x4d, .overflow = 0x00 },
+		[0x9b] = { .rshift = 0x4d, .overflow = 0x80 },
+		[0x9c] = { .rshift = 0x4e, .overflow = 0x00 },
+		[0x9d] = { .rshift = 0x4e, .overflow = 0x80 },
+		[0x9e] = { .rshift = 0x4f, .overflow = 0x00 },
+		[0x9f] = { .rshift = 0x4f, .overflow = 0x80 },
+		[0xa0] = { .rshift = 0x50, .overflow = 0x00 },
+		[0xa1] = { .rshift = 0x50, .overflow = 0x80 },
+		[0xa2] = { .rshift = 0x51, .overflow = 0x00 },
+		[0xa3] = { .rshift = 0x51, .overflow = 0x80 },
+		[0xa4] = { .rshift = 0x52, .overflow = 0x00 },
+		[0xa5] = { .rshift = 0x52, .overflow = 0x80 },
+		[0xa6] = { .rshift = 0x53, .overflow = 0x00 },
+		[0xa7] = { .rshift = 0x53, .overflow = 0x80 },
+		[0xa8] = { .rshift = 0x54, .overflow = 0x00 },
+		[0xa9] = { .rshift = 0x54, .overflow = 0x80 },
+		[0xaa] = { .rshift = 0x55, .overflow = 0x00 },
+		[0xab] = { .rshift = 0x55, .overflow = 0x80 },
+		[0xac] = { .rshift = 0x56, .overflow = 0x00 },
+		[0xad] = { .rshift = 0x56, .overflow = 0x80 },
+		[0xae] = { .rshift = 0x57, .overflow = 0x00 },
+		[0xaf] = { .rshift = 0x57, .overflow = 0x80 },
+		[0xb0] = { .rshift = 0x58, .overflow = 0x00 },
+		[0xb1] = { .rshift = 0x58, .overflow = 0x80 },
+		[0xb2] = { .rshift = 0x59, .overflow = 0x00 },
+		[0xb3] = { .rshift = 0x59, .overflow = 0x80 },
+		[0xb4] = { .rshift = 0x5a, .overflow = 0x00 },
+		[0xb5] = { .rshift = 0x5a, .overflow = 0x80 },
+		[0xb6] = { .rshift = 0x5b, .overflow = 0x00 },
+		[0xb7] = { .rshift = 0x5b, .overflow = 0x80 },
+		[0xb8] = { .rshift = 0x5c, .overflow = 0x00 },
+		[0xb9] = { .rshift = 0x5c, .overflow = 0x80 },
+		[0xba] = { .rshift = 0x5d, .overflow = 0x00 },
+		[0xbb] = { .rshift = 0x5d, .overflow = 0x80 },
+		[0xbc] = { .rshift = 0x5e, .overflow = 0x00 },
+		[0xbd] = { .rshift = 0x5e, .overflow = 0x80 },
+		[0xbe] = { .rshift = 0x5f, .overflow = 0x00 },
+		[0xbf] = { .rshift = 0x5f, .overflow = 0x80 },
+		[0xc0] = { .rshift = 0x60, .overflow = 0x00 },
+		[0xc1] = { .rshift = 0x60, .overflow = 0x80 },
+		[0xc2] = { .rshift = 0x61, .overflow = 0x00 },
+		[0xc3] = { .rshift = 0x61, .overflow = 0x80 },
+		[0xc4] = { .rshift = 0x62, .overflow = 0x00 },
+		[0xc5] = { .rshift = 0x62, .overflow = 0x80 },
+		[0xc6] = { .rshift = 0x63, .overflow = 0x00 },
+		[0xc7] = { .rshift = 0x63, .overflow = 0x80 },
+		[0xc8] = { .rshift = 0x64, .overflow = 0x00 },
+		[0xc9] = { .rshift = 0x64, .overflow = 0x80 },
+		[0xca] = { .rshift = 0x65, .overflow = 0x00 },
+		[0xcb] = { .rshift = 0x65, .overflow = 0x80 },
+		[0xcc] = { .rshift = 0x66, .overflow = 0x00 },
+		[0xcd] = { .rshift = 0x66, .overflow = 0x80 },
+		[0xce] = { .rshift = 0x67, .overflow = 0x00 },
+		[0xcf] = { .rshift = 0x67, .overflow = 0x80 },
+		[0xd0] = { .rshift = 0x68, .overflow = 0x00 },
+		[0xd1] = { .rshift = 0x68, .overflow = 0x80 },
+		[0xd2] = { .rshift = 0x69, .overflow = 0x00 },
+		[0xd3] = { .rshift = 0x69, .overflow = 0x80 },
+		[0xd4] = { .rshift = 0x6a, .overflow = 0x00 },
+		[0xd5] = { .rshift = 0x6a, .overflow = 0x80 },
+		[0xd6] = { .rshift = 0x6b, .overflow = 0x00 },
+		[0xd7] = { .rshift = 0x6b, .overflow = 0x80 },
+		[0xd8] = { .rshift = 0x6c, .overflow = 0x00 },
+		[0xd9] = { .rshift = 0x6c, .overflow = 0x80 },
+		[0xda] = { .rshift = 0x6d, .overflow = 0x00 },
+		[0xdb] = { .rshift = 0x6d, .overflow = 0x80 },
+		[0xdc] = { .rshift = 0x6e, .overflow = 0x00 },
+		[0xdd] = { .rshift = 0x6e, .overflow = 0x80 },
+		[0xde] = { .rshift = 0x6f, .overflow = 0x00 },
+		[0xdf] = { .rshift = 0x6f, .overflow = 0x80 },
+		[0xe0] = { .rshift = 0x70, .overflow = 0x00 },
+		[0xe1] = { .rshift = 0x70, .overflow = 0x80 },
+		[0xe2] = { .rshift = 0x71, .overflow = 0x00 },
+		[0xe3] = { .rshift = 0x71, .overflow = 0x80 },
+		[0xe4] = { .rshift = 0x72, .overflow = 0x00 },
+		[0xe5] = { .rshift = 0x72, .overflow = 0x80 },
+		[0xe6] = { .rshift = 0x73, .overflow = 0x00 },
+		[0xe7] = { .rshift = 0x73, .overflow = 0x80 },
+		[0xe8] = { .rshift = 0x74, .overflow = 0x00 },
+		[0xe9] = { .rshift = 0x74, .overflow = 0x80 },
+		[0xea] = { .rshift = 0x75, .overflow = 0x00 },
+		[0xeb] = { .rshift = 0x75, .overflow = 0x80 },
+		[0xec] = { .rshift = 0x76, .overflow = 0x00 },
+		[0xed] = { .rshift = 0x76, .overflow = 0x80 },
+		[0xee] = { .rshift = 0x77, .overflow = 0x00 },
+		[0xef] = { .rshift = 0x77, .overflow = 0x80 },
+		[0xf0] = { .rshift = 0x78, .overflow = 0x00 },
+		[0xf1] = { .rshift = 0x78, .overflow = 0x80 },
+		[0xf2] = { .rshift = 0x79, .overflow = 0x00 },
+		[0xf3] = { .rshift = 0x79, .overflow = 0x80 },
+		[0xf4] = { .rshift = 0x7a, .overflow = 0x00 },
+		[0xf5] = { .rshift = 0x7a, .overflow = 0x80 },
+		[0xf6] = { .rshift = 0x7b, .overflow = 0x00 },
+		[0xf7] = { .rshift = 0x7b, .overflow = 0x80 },
+		[0xf8] = { .rshift = 0x7c, .overflow = 0x00 },
+		[0xf9] = { .rshift = 0x7c, .overflow = 0x80 },
+		[0xfa] = { .rshift = 0x7d, .overflow = 0x00 },
+		[0xfb] = { .rshift = 0x7d, .overflow = 0x80 },
+		[0xfc] = { .rshift = 0x7e, .overflow = 0x00 },
+		[0xfd] = { .rshift = 0x7e, .overflow = 0x80 },
+		[0xfe] = { .rshift = 0x7f, .overflow = 0x00 },
+		[0xff] = { .rshift = 0x7f, .overflow = 0x80 },
+	};
+	uint8_t i;
+	uint8_t overflow = 0;
+
+	for (i = 0; i < AES_BLOCK_SIZE; i++) {
+		const struct aes_block_rshift_entry *e = &aes_block_rshift_table[in[i]];
+		out[i] = e->rshift | overflow;
+		overflow = e->overflow;
+	}
+}
 #endif /* LIB_CRYPTO_AES_H */
-- 
1.9.1


From c714515147799791ca82310093eb44cdb2ddd596 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 07/12] lib/crypto: optimize aes_cmac_128

- We avoid variables in order to do a lazy cleanup
  in aes_cmac_128_final() via ZERO_STRUCTP(ctx)
- We avoid unused memcpy() calls
- We use the optimized aes_block_{xor,lshift}() functions
- Align AES_BLOCK_SIZE arrays to 8 bytes

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_cmac_128.c | 104 +++++++++-------------------------------------
 lib/crypto/aes_cmac_128.h |   6 +++
 2 files changed, 25 insertions(+), 85 deletions(-)

diff --git a/lib/crypto/aes_cmac_128.c b/lib/crypto/aes_cmac_128.c
index b630eea..5d71e82 100644
--- a/lib/crypto/aes_cmac_128.c
+++ b/lib/crypto/aes_cmac_128.c
@@ -33,92 +33,42 @@ static const uint8_t const_Rb[] = {
 
 #define _MSB(x) (((x)[0] & 0x80)?1:0)
 
-static inline void aes_cmac_128_left_shift_1(const uint8_t in[AES_BLOCK_SIZE],
-					     uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t overflow = 0;
-	int8_t i;
-
-	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
-		out[i] = in[i] << 1;
-
-		out[i] |= overflow;
-
-		overflow = _MSB(&in[i]);
-	}
-}
-
-static inline void aes_cmac_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				    const uint8_t in2[AES_BLOCK_SIZE],
-				    uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
 void aes_cmac_128_init(struct aes_cmac_128_context *ctx,
 		       const uint8_t K[AES_BLOCK_SIZE])
 {
-	uint8_t L[AES_BLOCK_SIZE];
-
 	ZERO_STRUCTP(ctx);
 
 	AES_set_encrypt_key(K, 128, &ctx->aes_key);
 
 	/* step 1 - generate subkeys k1 and k2 */
 
-	AES_encrypt(const_Zero, L, &ctx->aes_key);
+	AES_encrypt(const_Zero, ctx->L, &ctx->aes_key);
 
-	if (_MSB(L) == 0) {
-		aes_cmac_128_left_shift_1(L, ctx->K1);
+	if (_MSB(ctx->L) == 0) {
+		aes_block_lshift(ctx->L, ctx->K1);
 	} else {
-		uint8_t tmp_block[AES_BLOCK_SIZE];
-
-		aes_cmac_128_left_shift_1(L, tmp_block);
-		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K1);
-		ZERO_STRUCT(tmp_block);
+		aes_block_lshift(ctx->L, ctx->tmp);
+		aes_block_xor(ctx->tmp, const_Rb, ctx->K1);
 	}
 
 	if (_MSB(ctx->K1) == 0) {
-		aes_cmac_128_left_shift_1(ctx->K1, ctx->K2);
+		aes_block_lshift(ctx->K1, ctx->K2);
 	} else {
-		uint8_t tmp_block[AES_BLOCK_SIZE];
-
-		aes_cmac_128_left_shift_1(ctx->K1, tmp_block);
-		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K2);
-		ZERO_STRUCT(tmp_block);
+		aes_block_lshift(ctx->K1, ctx->tmp);
+		aes_block_xor(ctx->tmp, const_Rb, ctx->K2);
 	}
-
-	ZERO_STRUCT(L);
 }
 
 void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
-			 const uint8_t *_msg, size_t _msg_len)
+			 const uint8_t *msg, size_t msg_len)
 {
-	uint8_t tmp_block[AES_BLOCK_SIZE];
-	uint8_t Y[AES_BLOCK_SIZE];
-	const uint8_t *msg = _msg;
-	size_t msg_len = _msg_len;
-
-	/*
-	 * copy the remembered last block
-	 */
-	ZERO_STRUCT(tmp_block);
-	if (ctx->last_len) {
-		memcpy(tmp_block, ctx->last, ctx->last_len);
-	}
-
 	/*
 	 * check if we expand the block
 	 */
 	if (ctx->last_len < AES_BLOCK_SIZE) {
 		size_t len = MIN(AES_BLOCK_SIZE - ctx->last_len, msg_len);
 
-		memcpy(&tmp_block[ctx->last_len], msg, len);
-		memcpy(ctx->last, tmp_block, AES_BLOCK_SIZE);
+		memcpy(&ctx->last[ctx->last_len], msg, len);
 		msg += len;
 		msg_len -= len;
 		ctx->last_len += len;
@@ -126,59 +76,43 @@ void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
 
 	if (msg_len == 0) {
 		/* if it is still the last block, we are done */
-		ZERO_STRUCT(tmp_block);
 		return;
 	}
 
 	/*
-	 * It is not the last block anymore
-	 */
-	ZERO_STRUCT(ctx->last);
-	ctx->last_len = 0;
-
-	/*
 	 * now checksum everything but the last block
 	 */
-	aes_cmac_128_xor(ctx->X, tmp_block, Y);
-	AES_encrypt(Y, ctx->X, &ctx->aes_key);
+	aes_block_xor(ctx->X, ctx->last, ctx->Y);
+	AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
 
 	while (msg_len > AES_BLOCK_SIZE) {
-		memcpy(tmp_block, msg, AES_BLOCK_SIZE);
+		aes_block_xor(ctx->X, msg, ctx->Y);
+		AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
 		msg += AES_BLOCK_SIZE;
 		msg_len -= AES_BLOCK_SIZE;
-
-		aes_cmac_128_xor(ctx->X, tmp_block, Y);
-		AES_encrypt(Y, ctx->X, &ctx->aes_key);
 	}
 
 	/*
 	 * copy the last block, it will be processed in
 	 * aes_cmac_128_final().
 	 */
+	ZERO_STRUCT(ctx->last);
 	memcpy(ctx->last, msg, msg_len);
 	ctx->last_len = msg_len;
-
-	ZERO_STRUCT(tmp_block);
-	ZERO_STRUCT(Y);
 }
 
 void aes_cmac_128_final(struct aes_cmac_128_context *ctx,
 			uint8_t T[AES_BLOCK_SIZE])
 {
-	uint8_t tmp_block[AES_BLOCK_SIZE];
-	uint8_t Y[AES_BLOCK_SIZE];
-
 	if (ctx->last_len < AES_BLOCK_SIZE) {
 		ctx->last[ctx->last_len] = 0x80;
-		aes_cmac_128_xor(ctx->last, ctx->K2, tmp_block);
+		aes_block_xor(ctx->last, ctx->K2, ctx->tmp);
 	} else {
-		aes_cmac_128_xor(ctx->last, ctx->K1, tmp_block);
+		aes_block_xor(ctx->last, ctx->K1, ctx->tmp);
 	}
 
-	aes_cmac_128_xor(tmp_block, ctx->X, Y);
-	AES_encrypt(Y, T, &ctx->aes_key);
+	aes_block_xor(ctx->tmp, ctx->X, ctx->Y);
+	AES_encrypt(ctx->Y, T, &ctx->aes_key);
 
-	ZERO_STRUCT(tmp_block);
-	ZERO_STRUCT(Y);
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_cmac_128.h b/lib/crypto/aes_cmac_128.h
index 28117a0..9bcf951 100644
--- a/lib/crypto/aes_cmac_128.h
+++ b/lib/crypto/aes_cmac_128.h
@@ -22,10 +22,16 @@
 struct aes_cmac_128_context {
 	AES_KEY aes_key;
 
+	uint64_t __align;
+
 	uint8_t K1[AES_BLOCK_SIZE];
 	uint8_t K2[AES_BLOCK_SIZE];
 
+	uint8_t L[AES_BLOCK_SIZE];
 	uint8_t X[AES_BLOCK_SIZE];
+	uint8_t Y[AES_BLOCK_SIZE];
+
+	uint8_t tmp[AES_BLOCK_SIZE];
 
 	uint8_t last[AES_BLOCK_SIZE];
 	size_t last_len;
-- 
1.9.1


From 227ca6a4ae641426ca5597c52e1f544ed2afc150 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 08/12] lib/crypto: optimize aes_ccm_128

- We avoid variables in order to do a lazy cleanup
  in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
- We use the optimized aes_block_xor() function
- We reuse A_i instead of rebuilding it everything completely.
- Align AES_BLOCK_SIZE arrays to 8 bytes

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_ccm_128.c | 117 +++++++++++++++++++++++++++++------------------
 lib/crypto/aes_ccm_128.h |   8 +++-
 2 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/lib/crypto/aes_ccm_128.c b/lib/crypto/aes_ccm_128.c
index 94b9803..a821e8d 100644
--- a/lib/crypto/aes_ccm_128.c
+++ b/lib/crypto/aes_ccm_128.c
@@ -24,24 +24,11 @@
 #define M_ ((AES_CCM_128_M - 2) / 2)
 #define L_ (AES_CCM_128_L - 1)
 
-static inline void aes_ccm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				   const uint8_t in2[AES_BLOCK_SIZE],
-				   uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
 void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
 		      const uint8_t K[AES_BLOCK_SIZE],
 		      const uint8_t N[AES_CCM_128_NONCE_SIZE],
 		      size_t a_total, size_t m_total)
 {
-	uint8_t B_0[AES_BLOCK_SIZE];
-
 	ZERO_STRUCTP(ctx);
 
 	AES_set_encrypt_key(K, 128, &ctx->aes_key);
@@ -52,22 +39,23 @@ void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
 	/*
 	 * prepare B_0
 	 */
-	B_0[0]  = L_;
-	B_0[0] += 8 * M_;
+	ctx->B_i[0]  = L_;
+	ctx->B_i[0] += 8 * M_;
 	if (a_total > 0) {
-		B_0[0] += 64;
+		ctx->B_i[0] += 64;
 	}
-	memcpy(&B_0[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
-	RSIVAL(B_0, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
+	memcpy(&ctx->B_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
+	RSIVAL(ctx->B_i, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
 
 	/*
 	 * prepare X_1
 	 */
-	AES_encrypt(B_0, ctx->X_i, &ctx->aes_key);
+	AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
 
 	/*
 	 * prepare B_1
 	 */
+	ZERO_STRUCT(ctx->B_i);
 	if (a_total >= UINT32_MAX) {
 		RSSVAL(ctx->B_i, 0, 0xFFFF);
 		RSBVAL(ctx->B_i, 2, (uint64_t)a_total);
@@ -81,6 +69,12 @@ void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
 		ctx->B_i_ofs = 2;
 	}
 
+	/*
+	 * prepare A_i
+	 */
+	ctx->A_i[0]  = L_;
+	memcpy(&ctx->A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
+
 	ctx->S_i_ofs = AES_BLOCK_SIZE;
 }
 
@@ -89,51 +83,70 @@ void aes_ccm_128_update(struct aes_ccm_128_context *ctx,
 {
 	size_t *remain;
 
+	if (v_len == 0) {
+		return;
+	}
+
 	if (ctx->a_remain > 0) {
 		remain = &ctx->a_remain;
 	} else {
 		remain = &ctx->m_remain;
 	}
 
-	while (v_len > 0) {
+	if (unlikely(v_len > *remain)) {
+		abort();
+	}
+
+	if (ctx->B_i_ofs > 0) {
 		size_t n = MIN(AES_BLOCK_SIZE - ctx->B_i_ofs, v_len);
-		bool more = true;
 
 		memcpy(&ctx->B_i[ctx->B_i_ofs], v, n);
 		v += n;
 		v_len -= n;
 		ctx->B_i_ofs += n;
 		*remain -= n;
+	}
 
-		if (ctx->B_i_ofs == AES_BLOCK_SIZE) {
-			more = false;
-		} else if (*remain == 0) {
-			more = false;
-		}
-
-		if (more) {
-			continue;
-		}
+	if ((ctx->B_i_ofs == AES_BLOCK_SIZE) || (*remain == 0)) {
+		aes_block_xor(ctx->X_i, ctx->B_i, ctx->B_i);
+		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
+		ctx->B_i_ofs = 0;
+	}
 
-		aes_ccm_128_xor(ctx->X_i, ctx->B_i, ctx->B_i);
+	while (v_len >= AES_BLOCK_SIZE) {
+		aes_block_xor(ctx->X_i, v, ctx->B_i);
 		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
+		v += AES_BLOCK_SIZE;
+		v_len -= AES_BLOCK_SIZE;
+		*remain -= AES_BLOCK_SIZE;
+	}
 
+	if (v_len > 0) {
 		ZERO_STRUCT(ctx->B_i);
+		memcpy(ctx->B_i, v, v_len);
+		ctx->B_i_ofs += v_len;
+		*remain -= v_len;
+		v = NULL;
+		v_len = 0;
+	}
+
+	if (*remain > 0) {
+		return;
+	}
+
+	if (ctx->B_i_ofs > 0) {
+		aes_block_xor(ctx->X_i, ctx->B_i, ctx->B_i);
+		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
 		ctx->B_i_ofs = 0;
 	}
 }
 
-static void aes_ccm_128_S_i(struct aes_ccm_128_context *ctx,
-			    uint8_t S_i[AES_BLOCK_SIZE],
-			    size_t i)
+static inline void aes_ccm_128_S_i(struct aes_ccm_128_context *ctx,
+				   uint8_t S_i[AES_BLOCK_SIZE],
+				   size_t i)
 {
-	uint8_t A_i[AES_BLOCK_SIZE];
-
-	A_i[0]  = L_;
-	memcpy(&A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
-	RSIVAL(A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
-
-	AES_encrypt(A_i, S_i, &ctx->aes_key);
+	RSIVAL(ctx->A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
+	AES_encrypt(ctx->A_i, S_i, &ctx->aes_key);
 }
 
 void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
@@ -146,6 +159,15 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
 			ctx->S_i_ofs = 0;
 		}
 
+		if (likely(ctx->S_i_ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
+			aes_block_xor(m, ctx->S_i, m);
+			m += AES_BLOCK_SIZE;
+			m_len -= AES_BLOCK_SIZE;
+			ctx->S_i_ctr += 1;
+			aes_ccm_128_S_i(ctx, ctx->S_i, ctx->S_i_ctr);
+			continue;
+		}
+
 		m[0] ^= ctx->S_i[ctx->S_i_ofs];
 		m += 1;
 		m_len -= 1;
@@ -156,15 +178,20 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
 void aes_ccm_128_digest(struct aes_ccm_128_context *ctx,
 			uint8_t digest[AES_BLOCK_SIZE])
 {
-	uint8_t S_0[AES_BLOCK_SIZE];
+	if (unlikely(ctx->a_remain != 0)) {
+		abort();
+	}
+	if (unlikely(ctx->m_remain != 0)) {
+		abort();
+	}
 
-	aes_ccm_128_S_i(ctx, S_0, 0);
+	/* prepare S_0 */
+	aes_ccm_128_S_i(ctx, ctx->S_i, 0);
 
 	/*
 	 * note X_i is T here
 	 */
-	aes_ccm_128_xor(ctx->X_i, S_0, digest);
+	aes_block_xor(ctx->X_i, ctx->S_i, digest);
 
-	ZERO_STRUCT(S_0);
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_ccm_128.h b/lib/crypto/aes_ccm_128.h
index a98c754..1382ee7 100644
--- a/lib/crypto/aes_ccm_128.h
+++ b/lib/crypto/aes_ccm_128.h
@@ -26,16 +26,20 @@
 
 struct aes_ccm_128_context {
 	AES_KEY aes_key;
+
 	uint8_t nonce[AES_CCM_128_NONCE_SIZE];
 
 	size_t a_remain;
 	size_t m_remain;
 
+	uint64_t __align;
+
 	uint8_t X_i[AES_BLOCK_SIZE];
 	uint8_t B_i[AES_BLOCK_SIZE];
-	size_t B_i_ofs;
-
+	uint8_t A_i[AES_BLOCK_SIZE];
 	uint8_t S_i[AES_BLOCK_SIZE];
+
+	size_t B_i_ofs;
 	size_t S_i_ofs;
 	size_t S_i_ctr;
 };
-- 
1.9.1


From 199921e8371e5d7f1e3a8081ac811215488e19d2 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 11:29:47 +0200
Subject: [PATCH 09/12] lib/crypto: optimize aes_gcm_128

- We avoid variables in order to do a lazy cleanup
  in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
- We use the optimized aes_block_{xor,rshift}() functions
- Align AES_BLOCK_SIZE arrays to 8 bytes

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_gcm_128.c | 46 ++++++++++++++++------------------------------
 lib/crypto/aes_gcm_128.h | 15 +++++++++------
 2 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/lib/crypto/aes_gcm_128.c b/lib/crypto/aes_gcm_128.c
index f59d659..bfbf176 100644
--- a/lib/crypto/aes_gcm_128.c
+++ b/lib/crypto/aes_gcm_128.c
@@ -30,35 +30,12 @@ static inline void aes_gcm_128_inc32(uint8_t inout[AES_BLOCK_SIZE])
 	RSIVAL(inout, AES_BLOCK_SIZE - 4, v);
 }
 
-static inline void aes_gcm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
-				   const uint8_t in2[AES_BLOCK_SIZE],
-				   uint8_t out[AES_BLOCK_SIZE])
-{
-	uint8_t i;
-
-	for (i = 0; i < AES_BLOCK_SIZE; i++) {
-		out[i] = in1[i] ^ in2[i];
-	}
-}
-
-static inline void aes_gcm_128_rightshift(uint8_t x[AES_BLOCK_SIZE])
-{
-	int8_t i;
-
-	for (i = AES_BLOCK_SIZE - 1; i >=0; i--) {
-		x[i] >>= 1;
-		if (i > 0) {
-			x[i] |= (x[i-1] & 1) << 7;
-		}
-	}
-}
-
 static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 				   const uint8_t y[AES_BLOCK_SIZE],
+				   uint8_t v[AES_BLOCK_SIZE],
 				   uint8_t z[AES_BLOCK_SIZE])
 {
 	uint8_t i;
-	uint8_t v[AES_BLOCK_SIZE];
 	/* 11100001 || 0^120 */
 	static const uint8_t r[AES_BLOCK_SIZE] = {
 		0xE1, 0x00, 0x00, 0x00,
@@ -75,12 +52,12 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 		for (mask = 0x80; mask != 0 ; mask >>= 1) {
 			uint8_t v_lsb = v[AES_BLOCK_SIZE-1] & 1;
 			if (x[i] & mask) {
-				aes_gcm_128_xor(z, v, z);
+				aes_block_xor(z, v, z);
 			}
 
-			aes_gcm_128_rightshift(v);
+			aes_block_rshift(v, v);
 			if (v_lsb != 0) {
-				aes_gcm_128_xor(v, r, v);
+				aes_block_xor(v, r, v);
 			}
 		}
 	}
@@ -89,8 +66,8 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
 static inline void aes_gcm_128_ghash_block(struct aes_gcm_128_context *ctx,
 					   const uint8_t in[AES_BLOCK_SIZE])
 {
-	aes_gcm_128_xor(ctx->Y, in, ctx->y.block);
-	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->Y);
+	aes_block_xor(ctx->Y, in, ctx->y.block);
+	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->v.block, ctx->Y);
 }
 
 void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
@@ -184,6 +161,15 @@ static inline void aes_gcm_128_crypt_tmp(struct aes_gcm_128_context *ctx,
 			tmp->ofs = 0;
 		}
 
+		if (likely(tmp->ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
+			aes_block_xor(m, tmp->block, m);
+			m += AES_BLOCK_SIZE;
+			m_len -= AES_BLOCK_SIZE;
+			aes_gcm_128_inc32(ctx->CB);
+			AES_encrypt(ctx->CB, tmp->block, &ctx->aes_key);
+			continue;
+		}
+
 		m[0] ^= tmp->block[tmp->ofs];
 		m += 1;
 		m_len -= 1;
@@ -215,7 +201,7 @@ void aes_gcm_128_digest(struct aes_gcm_128_context *ctx,
 	aes_gcm_128_ghash_block(ctx, ctx->AC);
 
 	AES_encrypt(ctx->J0, ctx->c.block, &ctx->aes_key);
-	aes_gcm_128_xor(ctx->c.block, ctx->Y, T);
+	aes_block_xor(ctx->c.block, ctx->Y, T);
 
 	ZERO_STRUCTP(ctx);
 }
diff --git a/lib/crypto/aes_gcm_128.h b/lib/crypto/aes_gcm_128.h
index 278b6db..8df11c2 100644
--- a/lib/crypto/aes_gcm_128.h
+++ b/lib/crypto/aes_gcm_128.h
@@ -24,17 +24,20 @@
 
 struct aes_gcm_128_context {
 	AES_KEY aes_key;
+
+	uint64_t __align;
+
+	struct aes_gcm_128_tmp {
+		size_t ofs;
+		size_t total;
+		uint8_t block[AES_BLOCK_SIZE];
+	} A, C, c, v, y;
+
 	uint8_t H[AES_BLOCK_SIZE];
 	uint8_t J0[AES_BLOCK_SIZE];
 	uint8_t CB[AES_BLOCK_SIZE];
 	uint8_t Y[AES_BLOCK_SIZE];
 	uint8_t AC[AES_BLOCK_SIZE];
-
-	struct aes_gcm_128_tmp {
-		uint8_t block[AES_BLOCK_SIZE];
-		size_t ofs;
-		size_t total;
-	} A, C, c, y;
 };
 
 void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
-- 
1.9.1


From b05089801eade6a47ccdf16ab70ed626eb5b4d1f Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 14 Aug 2015 23:45:07 +0200
Subject: [PATCH 10/12] lib/crypto: make use of aes_test.h in
 aes_gcm_128_test.c

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes_gcm_128_test.c | 358 ++++++++++++++++--------------------------
 1 file changed, 137 insertions(+), 221 deletions(-)

diff --git a/lib/crypto/aes_gcm_128_test.c b/lib/crypto/aes_gcm_128_test.c
index 4b53487..6d2cbf2 100644
--- a/lib/crypto/aes_gcm_128_test.c
+++ b/lib/crypto/aes_gcm_128_test.c
@@ -19,100 +19,141 @@
 #include "replace.h"
 #include "../lib/util/samba_util.h"
 #include "../lib/crypto/crypto.h"
+#include "../lib/crypto/aes_test.h"
 
+#ifndef AES_GCM_128_ONLY_TESTVECTORS
 struct torture_context;
-bool torture_local_crypto_aes_gcm_128(struct torture_context *torture);
+bool torture_local_crypto_aes_gcm_128(struct torture_context *tctx);
 
 /*
  This uses the test values from ...
 */
-bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
+bool torture_local_crypto_aes_gcm_128(struct torture_context *tctx)
 {
 	bool ret = true;
 	uint32_t i;
-	struct {
-		DATA_BLOB K;
-		DATA_BLOB IV;
-		DATA_BLOB A;
-		DATA_BLOB P;
-		DATA_BLOB C;
-		DATA_BLOB T;
-	} testarray[5];
-
-	TALLOC_CTX *tctx = talloc_new(torture);
-	if (!tctx) { return false; };
-
-	ZERO_STRUCT(testarray);
-
-	testarray[0].K = strhex_to_data_blob(tctx,
-				"00000000000000000000000000000000");
-	testarray[0].IV = strhex_to_data_blob(tctx,
-				"000000000000000000000000");
-	testarray[0].A = data_blob_null;
-	testarray[0].P = data_blob_null;
-	testarray[0].C = data_blob_null;
-	testarray[0].T = strhex_to_data_blob(tctx,
-				"58e2fccefa7e3061367f1d57a4e7455a");
-
-	testarray[1].K = strhex_to_data_blob(tctx,
-				"00000000000000000000000000000000");
-	testarray[1].IV = strhex_to_data_blob(tctx,
-				"000000000000000000000000");
-	testarray[1].A = data_blob_null;
-	testarray[1].P = strhex_to_data_blob(tctx,
-				"00000000000000000000000000000000");
-	testarray[1].C = strhex_to_data_blob(tctx,
-				"0388dace60b6a392f328c2b971b2fe78");
-	testarray[1].T = strhex_to_data_blob(tctx,
-				"ab6e47d42cec13bdf53a67b21257bddf");
-
-	testarray[2].K = strhex_to_data_blob(tctx,
-				"feffe9928665731c6d6a8f9467308308");
-	testarray[2].IV = strhex_to_data_blob(tctx,
-				"cafebabefacedbaddecaf888");
-	testarray[2].A = data_blob_null;
-	testarray[2].P = strhex_to_data_blob(tctx,
-				"d9313225f88406e5a55909c5aff5269a"
-				"86a7a9531534f7da2e4c303d8a318a72"
-				"1c3c0c95956809532fcf0e2449a6b525"
-				"b16aedf5aa0de657ba637b391aafd255");
-	testarray[2].C = strhex_to_data_blob(tctx,
-				"42831ec2217774244b7221b784d0d49c"
-				"e3aa212f2c02a4e035c17e2329aca12e"
-				"21d514b25466931c7d8f6a5aac84aa05"
-				"1ba30b396a0aac973d58e091473f5985");
-	testarray[2].T = strhex_to_data_blob(tctx,
-				"4d5c2af327cd64a62cf35abd2ba6fab4");
-
-	testarray[3].K = strhex_to_data_blob(tctx,
-				"feffe9928665731c6d6a8f9467308308");
-	testarray[3].IV = strhex_to_data_blob(tctx,
-				"cafebabefacedbaddecaf888");
-	testarray[3].A = strhex_to_data_blob(tctx,
-				"feedfacedeadbeeffeedfacedeadbeef"
-				"abaddad2");
-	testarray[3].P = strhex_to_data_blob(tctx,
-				"d9313225f88406e5a55909c5aff5269a"
-				"86a7a9531534f7da2e4c303d8a318a72"
-				"1c3c0c95956809532fcf0e2449a6b525"
-				"b16aedf5aa0de657ba637b39");
-	testarray[3].C = strhex_to_data_blob(tctx,
-				"42831ec2217774244b7221b784d0d49c"
-				"e3aa212f2c02a4e035c17e2329aca12e"
-				"21d514b25466931c7d8f6a5aac84aa05"
-				"1ba30b396a0aac973d58e091");
-	testarray[3].T = strhex_to_data_blob(tctx,
-				"5bc94fbc3221a5db94fae95ae7121a47");
-
-	for (i=0; testarray[i].T.length != 0; i++) {
+	struct aes_mode_testvector testarray[] = {
+#endif /* AES_GCM_128_ONLY_TESTVECTORS */
+#define AES_GCM_128_TESTVECTOR(_k, _n, _a, _p, _c, _t) \
+	AES_MODE_TESTVECTOR(aes_gcm_128, _k, _n, _a, _p, _c, _t)
+
+	AES_GCM_128_TESTVECTOR(
+		/* K */
+		"8BF9FBC2B8149484FF11AB1F3A544FF6",
+		/* N */
+		"010000000000000077F7A8FF",
+		/* A */
+		"010000000000000077F7A80000000000"
+		"A8000000000001004100002C00980000",
+		/* P */
+		"FE534D4240000100000000000B00811F"
+		"00000000000000000600000000000000"
+		"00000000010000004100002C00980000"
+		"00000000000000000000000000000000"
+		"3900000094010600FFFFFFFFFFFFFFFF"
+		"FFFFFFFFFFFFFFFF7800000030000000"
+		"000000007800000000000000FFFF0000"
+		"0100000000000000"
+		"03005C003100370032002E0033003100"
+		"2E0039002E003100380033005C006E00"
+		"650074006C006F0067006F006E000000",
+		/* C */
+		"863C07C1FBFA82D741A080C97DF52CFF"
+		"432A63A37E5ACFA3865AE4E6E422D502"
+		"FA7C6FBB9A7418F28C43F00A3869F687"
+		"257CA665E25E62A0F458C42AA9E95DC4"
+		"6CB351A0A497FABB7DCE58FEE5B20B08"
+		"522E0E701B112FB93B36E7A0FB084D35"
+		"62C0F3FDF0421079DD96BBCCA40949B3"
+		"A7FC1AA635A72384"
+		"2037DE3CA6385465D1884B29D7140790"
+		"88AD3E770E2528D527B302536B7E5B1B"
+		"430E048230AFE785DB89F4D87FC1F816",
+		/* T */
+		"BC9B5871EBFA89ADE21439ACDCD65D22"
+	),
+	AES_GCM_128_TESTVECTOR(
+		/* K */
+		"00000000000000000000000000000000",
+		/* N */
+		"000000000000000000000000",
+		/* A */
+		"",
+		/* P */
+		"",
+		/* C */
+		"",
+		/* T */
+		"58e2fccefa7e3061367f1d57a4e7455a"
+	),
+	AES_GCM_128_TESTVECTOR(
+		/* K */
+		"00000000000000000000000000000000",
+		/* N */
+		"000000000000000000000000",
+		/* A */
+		"",
+		/* P */
+		"00000000000000000000000000000000",
+		/* C */
+		"0388dace60b6a392f328c2b971b2fe78",
+		/* T */
+		"ab6e47d42cec13bdf53a67b21257bddf"
+	),
+	AES_GCM_128_TESTVECTOR(
+		/* K */
+		"feffe9928665731c6d6a8f9467308308",
+		/* N */
+		"cafebabefacedbaddecaf888",
+		/* A */
+		"",
+		/* P */
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		/* C */
+		"42831ec2217774244b7221b784d0d49c"
+		"e3aa212f2c02a4e035c17e2329aca12e"
+		"21d514b25466931c7d8f6a5aac84aa05"
+		"1ba30b396a0aac973d58e091473f5985",
+		/* T */
+		"4d5c2af327cd64a62cf35abd2ba6fab4"
+	),
+	AES_GCM_128_TESTVECTOR(
+		/* K */
+		"feffe9928665731c6d6a8f9467308308",
+		/* N */
+		"cafebabefacedbaddecaf888",
+		/* A */
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		/* P */
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		/* C */
+		"42831ec2217774244b7221b784d0d49c"
+		"e3aa212f2c02a4e035c17e2329aca12e"
+		"21d514b25466931c7d8f6a5aac84aa05"
+		"1ba30b396a0aac973d58e091",
+		/* T */
+		"5bc94fbc3221a5db94fae95ae7121a47"
+	),
+#ifndef AES_GCM_128_ONLY_TESTVECTORS
+	};
+
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
 		DATA_BLOB C;
 		int e;
 
 		C = data_blob_dup_talloc(tctx, testarray[i].P);
 
-		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
+		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
 		aes_gcm_128_updateA(&ctx,
 				    testarray[i].A.data,
 				    testarray[i].A.length);
@@ -122,61 +163,30 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		e = memcmp(testarray[i].T.data, T, sizeof(T));
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("C1\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("C2\n");
-			dump_data(0, C.data, C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
 			ret = false;
 			goto fail;
 		}
 
 		e = memcmp(testarray[i].C.data, C.data, C.length);
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("C1\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("C2\n");
-			dump_data(0, C.data, C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
 			ret = false;
 			goto fail;
 		}
 	}
 
-	for (i=0; testarray[i].T.length != 0; i++) {
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
 		DATA_BLOB C;
 		int e;
 		size_t j;
 
 		C = data_blob_dup_talloc(tctx, testarray[i].P);
 
-		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
+		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
 		for (j=0; j < testarray[i].A.length; j++) {
 			aes_gcm_128_updateA(&ctx, NULL, 0);
 			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
@@ -194,61 +204,30 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		e = memcmp(testarray[i].T.data, T, sizeof(T));
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("C1\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("C2\n");
-			dump_data(0, C.data, C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
 			ret = false;
 			goto fail;
 		}
 
 		e = memcmp(testarray[i].C.data, C.data, C.length);
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("C1\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("C2\n");
-			dump_data(0, C.data, C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
 			ret = false;
 			goto fail;
 		}
 	}
 
-	for (i=0; testarray[i].T.length != 0; i++) {
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
 		DATA_BLOB P;
 		int e;
 		size_t j;
 
 		P = data_blob_dup_talloc(tctx, testarray[i].C);
 
-		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
+		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
 		for (j=0; j < testarray[i].A.length; j++) {
 			aes_gcm_128_updateA(&ctx, NULL, 0);
 			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
@@ -266,60 +245,29 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		e = memcmp(testarray[i].T.data, T, sizeof(T));
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P1\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("P2\n");
-			dump_data(0, P.data, P.length);
-			printf("C\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
 			ret = false;
 			goto fail;
 		}
 
 		e = memcmp(testarray[i].P.data, P.data, P.length);
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P1\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("P2\n");
-			dump_data(0, P.data, P.length);
-			printf("C\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
 			ret = false;
 			goto fail;
 		}
 	}
 
-	for (i=0; testarray[i].T.length != 0; i++) {
+	for (i=0; i < ARRAY_SIZE(testarray); i++) {
 		struct aes_gcm_128_context ctx;
 		uint8_t T[AES_BLOCK_SIZE];
+		DATA_BLOB _T = data_blob_const(T, sizeof(T));
 		DATA_BLOB P;
 		int e;
 
 		P = data_blob_dup_talloc(tctx, testarray[i].C);
 
-		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
+		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
 		aes_gcm_128_updateA(&ctx, testarray[i].A.data, testarray[i].A.length);
 		aes_gcm_128_updateC(&ctx, P.data, P.length);
 		aes_gcm_128_crypt(&ctx, P.data, P.length);
@@ -327,52 +275,20 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
 
 		e = memcmp(testarray[i].T.data, T, sizeof(T));
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P1\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("P2\n");
-			dump_data(0, P.data, P.length);
-			printf("C\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
 			ret = false;
 			goto fail;
 		}
 
 		e = memcmp(testarray[i].P.data, P.data, P.length);
 		if (e != 0) {
-			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
-			printf("K\n");
-			dump_data(0, testarray[i].K.data, testarray[i].K.length);
-			printf("IV\n");
-			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
-			printf("A\n");
-			dump_data(0, testarray[i].A.data, testarray[i].A.length);
-			printf("P1\n");
-			dump_data(0, testarray[i].P.data, testarray[i].P.length);
-			printf("P2\n");
-			dump_data(0, P.data, P.length);
-			printf("C\n");
-			dump_data(0, testarray[i].C.data, testarray[i].C.length);
-			printf("T1\n");
-			dump_data(0, testarray[i].T.data, testarray[i].T.length);
-			printf("T2\n");
-			dump_data(0, T, sizeof(T));
+			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
 			ret = false;
 			goto fail;
 		}
 	}
 
  fail:
-	talloc_free(tctx);
 	return ret;
 }
+#endif /* AES_GCM_128_ONLY_TESTVECTORS */
-- 
1.9.1


From f0ffd863907ec38425a70496e39d86e6a6e85f2e Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:58:49 +0200
Subject: [PATCH 11/12] lib/crypto: sync AES_cfb8_encrypt() from heimdal

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.c | 35 ++++++++++++++++++-----------------
 lib/crypto/aes.h | 10 +++++++---
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index a47a456..f7f9688 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -113,24 +113,25 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
     }
 }
 
-void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
-		      size_t length, const AES_KEY *key,
-		      uint8_t *iv, int forward)
+void
+AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+                 unsigned long size, const AES_KEY *key,
+                 unsigned char *iv, int forward_encrypt)
 {
-	size_t i;
+    int i;
 
-	for (i=0; i < length; i++) {
-		uint8_t tiv[AES_BLOCK_SIZE*2];
+    for (i = 0; i < size; i++) {
+        unsigned char tmp[AES_BLOCK_SIZE + 1];
 
-		memcpy(tiv, iv, AES_BLOCK_SIZE);
-		AES_encrypt(iv, iv, key);
-		if (!forward) {
-			tiv[AES_BLOCK_SIZE] = in[i];
-		}
-		out[i] = in[i] ^ iv[0];
-		if (forward) {
-			tiv[AES_BLOCK_SIZE] = out[i];
-		}
-		memcpy(iv, tiv+1, AES_BLOCK_SIZE);
-	}
+        memcpy(tmp, iv, AES_BLOCK_SIZE);
+        AES_encrypt(iv, iv, key);
+        if (!forward_encrypt) {
+            tmp[AES_BLOCK_SIZE] = in[i];
+        }
+        out[i] = in[i] ^ iv[0];
+        if (forward_encrypt) {
+            tmp[AES_BLOCK_SIZE] = out[i];
+        }
+        memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
+    }
 }
diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index 2cfb587..7487486 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -42,6 +42,7 @@
 #define AES_encrypt samba_AES_encrypt
 #define AES_decrypt samba_AES_decrypt
 #define AES_cbc_encrypt samba_AES_cbc_encrypt
+#define AES_cfb8_encrypt samba_AES_cfb8_encrypt
 
 /*
  *
@@ -72,9 +73,12 @@ void AES_cbc_encrypt(const unsigned char *, unsigned char *,
 		     const unsigned long, const AES_KEY *,
 		     unsigned char *, int);
 
-void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
-		      size_t length, const AES_KEY *key,
-		      uint8_t *iv, int forward);
+void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+		      unsigned long size, const AES_KEY *key,
+		      unsigned char *iv, int forward_encrypt);
+
+#define aes_cfb8_encrypt(in, out, size, key, iv, forward_encrypt) \
+	AES_cfb8_encrypt(in, out, size, key, iv, forward_encrypt)
 
 #ifdef  __cplusplus
 }
-- 
1.9.1


From f7c4d538f3e291fc4096bc376e087883ccbceb67 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 12 Aug 2015 12:58:49 +0200
Subject: [PATCH 12/12] lib/crypto: make it possible to use only parts of
 aes.[ch]

This can be used in order to optimize some parts later.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/crypto/aes.c | 8 +++++++-
 lib/crypto/aes.h | 7 +++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index f7f9688..800a97e 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -32,9 +32,10 @@
  */
 
 #include "replace.h"
+#include "aes.h"
 
+#ifdef SAMBA_RIJNDAEL
 #include "rijndael-alg-fst.h"
-#include "aes.h"
 
 int
 AES_set_encrypt_key(const unsigned char *userkey, const int bits, AES_KEY *key)
@@ -65,7 +66,9 @@ AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
 {
     rijndaelDecrypt(key->key, key->rounds, in, out);
 }
+#endif /* SAMBA_RIJNDAEL */
 
+#ifdef SAMBA_AES_CBC_ENCRYPT
 void
 AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 		unsigned long size, const AES_KEY *key,
@@ -112,7 +115,9 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 	}
     }
 }
+#endif /* SAMBA_AES_CBC_ENCRYPT */
 
+#ifdef SAMBA_AES_CFB8_ENCRYPT
 void
 AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
                  unsigned long size, const AES_KEY *key,
@@ -135,3 +140,4 @@ AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
         memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
     }
 }
+#endif /* SAMBA_AES_CFB8_ENCRYPT */
diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
index 7487486..48ea764 100644
--- a/lib/crypto/aes.h
+++ b/lib/crypto/aes.h
@@ -36,6 +36,11 @@
 #ifndef LIB_CRYPTO_AES_H
 #define LIB_CRYPTO_AES_H 1
 
+#define SAMBA_RIJNDAEL 1
+#define SAMBA_AES_CBC_ENCRYPT 1
+#define SAMBA_AES_CFB8_ENCRYPT 1
+#define SAMBA_AES_BLOCK_XOR 1
+
 /* symbol renaming */
 #define AES_set_encrypt_key samba_AES_set_encrypt_key
 #define AES_set_decrypt_key samba_AES_decrypt_key
@@ -84,6 +89,7 @@ void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
 }
 #endif
 
+#ifdef SAMBA_AES_BLOCK_XOR
 static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
 				 const uint8_t in2[AES_BLOCK_SIZE],
 				 uint8_t out[AES_BLOCK_SIZE])
@@ -111,6 +117,7 @@ static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
 		memcpy(out, o, AES_BLOCK_SIZE);
 	}
 }
+#endif /* SAMBA_AES_BLOCK_XOR */
 
 static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
 				    uint8_t out[AES_BLOCK_SIZE])
-- 
1.9.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 198 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20150827/ea663867/signature.sig>


More information about the samba-technical mailing list