AES crypto performance improvements

Jeremy Allison jra at samba.org
Tue Aug 25 22:57:45 UTC 2015


On Mon, Aug 17, 2015 at 10:51:49PM +0200, Stefan Metzmacher wrote:
> Am 14.08.2015 um 15:28 schrieb Stefan Metzmacher:
> > Am 13.08.2015 um 23:22 schrieb Stefan Metzmacher:
> >> Hi,
> >>
> >> while debugging poor performance of our SMB3 crypto
> >> I developed the following performance improvements.
> >>
> >> The aes_block_xor() improved by a factor of ~10.
> >> The aes_block_[r|l]shift() functions improved by 25%.
> >>
> >> Please review and push.
> >>
> >> The real solution will be to use hardware support,
> >> I've extended the work from Simo in the following branch
> >> https://git.samba.org/?p=metze/samba/wip.git;a=shortlog;h=refs/heads/master3-smb-crypto
> >> (ignore the unrelated and directly reverted commits on top)
> >>
> >> But I'm not that happy with the abstraction yet.
> > 
> > There's a regression in the aes_ccm_128 optimization because that doesn't
> > have test cases... I'm working on it.
> 
> Here's an updated patchset, please review and push.

FYI. Reviewing this.... Taking a while as it's
complex code :-).


> From c3e7a29f4d42eb4fef192ba5dce14ce9ceb06319 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 12:09:24 +0200
> Subject: [PATCH 01/11] s3:vfs_smb_traffic_analyzer: remove samba_ prefix from
>  AES_* function calls
> 
> This should be an implementation detail in lib/crypto/aes.h.
> 
> In future we may add support for other implementations.
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  source3/modules/vfs_smb_traffic_analyzer.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/source3/modules/vfs_smb_traffic_analyzer.c b/source3/modules/vfs_smb_traffic_analyzer.c
> index 73ebf63..0208cde 100644
> --- a/source3/modules/vfs_smb_traffic_analyzer.c
> +++ b/source3/modules/vfs_smb_traffic_analyzer.c
> @@ -175,7 +175,7 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
>  	unsigned char filler[17]= "................";
>  	char *output;
>  	if (akey == NULL) return NULL;
> -	samba_AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
> +	AES_set_encrypt_key((const unsigned char *) akey, 128, &key);
>  	s1 = strlen(str) / 16;
>  	s2 = strlen(str) % 16;
>  	memcpy(filler, str + (s1*16), s2);
> @@ -185,10 +185,10 @@ static char *smb_traffic_analyzer_encrypt( TALLOC_CTX *ctx,
>  	*len = ((s1 + 1)*16);
>  	output = talloc_array(ctx, char, *len);
>  	for (h = 0; h < s1; h++) {
> -		samba_AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
> +		AES_encrypt((const unsigned char *) str+(16*h), (unsigned char *)output+16*h,
>  			&key);
>  	}
> -	samba_AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
> +	AES_encrypt(filler, (unsigned char *)(output+(16*h)), &key);
>  	*len = (s1*16)+16;
>  	return output;
>  }
> -- 
> 1.9.1
> 
> 
> From 38568865e3c9c4259230bc04fdd4eaa2787b04aa Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Tue, 11 Aug 2015 16:31:25 +0200
> Subject: [PATCH 02/11] lib/crypto: add aes_cmac_128 chunked tests
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_cmac_128_test.c | 24 ++++++++++++++++++++++++
>  1 file changed, 24 insertions(+)
> 
> diff --git a/lib/crypto/aes_cmac_128_test.c b/lib/crypto/aes_cmac_128_test.c
> index 173087f..c099a0f 100644
> --- a/lib/crypto/aes_cmac_128_test.c
> +++ b/lib/crypto/aes_cmac_128_test.c
> @@ -87,6 +87,30 @@ bool torture_local_crypto_aes_cmac_128(struct torture_context *torture)
>  			ret = false;
>  		}
>  	}
> +	for (i=0; testarray[i].cmac.length != 0; i++) {
> +		struct aes_cmac_128_context ctx;
> +		uint8_t cmac[AES_BLOCK_SIZE];
> +		int e;
> +		size_t j;
> +
> +		aes_cmac_128_init(&ctx, key.data);
> +		for (j=0; j < testarray[i].data.length; j++) {
> +			aes_cmac_128_update(&ctx,
> +					    &testarray[i].data.data[j],
> +					    1);
> +		}
> +		aes_cmac_128_final(&ctx, cmac);
> +
> +		e = memcmp(testarray[i].cmac.data, cmac, sizeof(cmac));
> +		if (e != 0) {
> +			printf("aes_cmac_128 chunked test[%u]: failed\n", i);
> +			dump_data(0, key.data, key.length);
> +			dump_data(0, testarray[i].data.data, testarray[i].data.length);
> +			dump_data(0, testarray[i].cmac.data, testarray[i].cmac.length);
> +			dump_data(0, cmac, sizeof(cmac));
> +			ret = false;
> +		}
> +	}
>  	talloc_free(tctx);
>  	return ret;
>  }
> -- 
> 1.9.1
> 
> 
> From 72857d8ba56630924da1241e21630ecd6a262394 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Fri, 14 Aug 2015 13:12:13 +0200
> Subject: [PATCH 03/11] lib/crypto: run all aes_gcm_128 testcases
> 
> We should not skip the first one.
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_gcm_128_test.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/crypto/aes_gcm_128_test.c b/lib/crypto/aes_gcm_128_test.c
> index 703ad86..f70d851 100644
> --- a/lib/crypto/aes_gcm_128_test.c
> +++ b/lib/crypto/aes_gcm_128_test.c
> @@ -104,7 +104,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  	testarray[3].T = strhex_to_data_blob(tctx,
>  				"5bc94fbc3221a5db94fae95ae7121a47");
>  
> -	for (i=1; testarray[i].T.length != 0; i++) {
> +	for (i=0; testarray[i].T.length != 0; i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
>  		DATA_BLOB C;
> @@ -167,7 +167,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  		}
>  	}
>  
> -	for (i=1; testarray[i].T.length != 0; i++) {
> +	for (i=0; testarray[i].T.length != 0; i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
>  		DATA_BLOB C;
> @@ -233,7 +233,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  		}
>  	}
>  
> -	for (i=1; testarray[i].T.length != 0; i++) {
> +	for (i=0; testarray[i].T.length != 0; i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
>  		DATA_BLOB P;
> @@ -299,7 +299,7 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  		}
>  	}
>  
> -	for (i=1; testarray[i].T.length != 0; i++) {
> +	for (i=0; testarray[i].T.length != 0; i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
>  		DATA_BLOB P;
> -- 
> 1.9.1
> 
> 
> From 15531f85b29dc97c9ae1aff4dad1f5b280f4d45d Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Fri, 14 Aug 2015 13:13:21 +0200
> Subject: [PATCH 04/11] lib/crypto: add aes_ccm_128 tests
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_ccm_128_test.c | 364 ++++++++++++++++++++++++++++++++++++++++++
>  lib/crypto/aes_test.h         |  67 ++++++++
>  lib/crypto/wscript_build      |  11 +-
>  source4/torture/local/local.c |   2 +
>  4 files changed, 439 insertions(+), 5 deletions(-)
>  create mode 100644 lib/crypto/aes_ccm_128_test.c
>  create mode 100644 lib/crypto/aes_test.h
> 
> diff --git a/lib/crypto/aes_ccm_128_test.c b/lib/crypto/aes_ccm_128_test.c
> new file mode 100644
> index 0000000..cc5d3db
> --- /dev/null
> +++ b/lib/crypto/aes_ccm_128_test.c
> @@ -0,0 +1,364 @@
> +/*
> +   AES-CCM-128 tests
> +
> +   Copyright (C) Stefan Metzmacher 2015
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +*/
> +#include "replace.h"
> +#include "../lib/util/samba_util.h"
> +#include "../lib/crypto/crypto.h"
> +#include "../lib/crypto/aes_test.h"
> +
> +#ifndef AES_CCM_128_ONLY_TESTVECTORS
> +struct torture_context;
> +bool torture_local_crypto_aes_ccm_128(struct torture_context *torture);
> +
> +/*
> + This uses our own test values as we rely on a 11 byte nonce
> + and the values from rfc rfc3610 use 13 byte nonce.
> +*/
> +bool torture_local_crypto_aes_ccm_128(struct torture_context *tctx)
> +{
> +	bool ret = true;
> +	uint32_t i;
> +	struct aes_mode_testvector testarray[] = {
> +#endif /* AES_CCM_128_ONLY_TESTVECTORS */
> +#define AES_CCM_128_TESTVECTOR(_k, _n, _a, _p, _c, _t) \
> +	AES_MODE_TESTVECTOR(aes_ccm_128, _k, _n, _a, _p, _c, _t)
> +
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"8BF9FBC2B8149484FF11AB1F3A544FF6",
> +		/* N */
> +		"010000000000000077F7A8",
> +		/* A */
> +		"010000000000000077F7A80000000000"
> +		"A8000000000001004100002C00980000",
> +		/* P */
> +		"FE534D4240000100000000000B00811F"
> +		"00000000000000000600000000000000"
> +		"00000000010000004100002C00980000"
> +		"00000000000000000000000000000000"
> +		"3900000094010600FFFFFFFFFFFFFFFF"
> +		"FFFFFFFFFFFFFFFF7800000030000000"
> +		"000000007800000000000000FFFF0000"
> +		"0100000000000000"
> +		"03005C003100370032002E0033003100"
> +		"2E0039002E003100380033005C006E00"
> +		"650074006C006F0067006F006E000000",
> +		/* C */
> +		"25985364BF9AF90EB0B9C8FB55B7C446"
> +		"780F310F1EC4677726BFBF34E38E6408"
> +		"057EE228814F11CBAAB794A79F7A1F78"
> +		"2DE73B7477985360A02D35A7A347ABF7"
> +		"9F18DD8687767423BB08F18642B6EFEE"
> +		"8B1543D83091AF5952F58BB4BD89FF6B"
> +		"0206E7170481C7BC61F06653D0CF10F7"
> +		"C78380389382C276"
> +		"7B8BF34D687A5C3D4F783F926F7755C0"
> +		"2D44C30848C69CFDD8E54395F1881611"
> +		"E5502285870A7179068923105190C837",
> +		/* T */
> +		"3C11F652F8EA5600C8607D2E0FEAFD42"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"f9fdca4ac64fe7f014de0f43039c7571",
> +		/* N */
> +		"5a8aa485c316e947125478",
> +		/* A */
> +		"3796cf51b8726652a4204733b8fbb047"
> +		"cf00fb91a9837e22ec22b1a268f88e2c",
> +		/* P */
> +		"a265480ca88d5f536db0dc6abc40faf0"
> +		"d05be7a9669777682345647586786983",
> +		/* C */
> +		"65F8D8422006FB77FB7CCEFDFFF93729"
> +		"B3EFCB06A0FAF3A2ABAB485723373F53",
> +		/* T */
> +		"2C62BD82AD231887A7B326E1E045BC91"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"197afb02ffbd8f699dacae87094d5243",
> +		/* N */
> +		"5a8aa485c316e947125478",
> +		/* A */
> +		"",
> +		/* P */
> +		"3796cf51b8726652a4204733b8fbb047"
> +		"cf00fb91a9837e22",
> +		/* C */
> +		"CA53910394115C5DAB5D7250F04D6A27"
> +		"2BCFA4329528F3AC",
> +		/* T */
> +		"38E3A318F9BA88D4DD2FAF3521820001"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"90929a4b0ac65b350ad1591611fe4829",
> +		/* N */
> +		"5a8aa485c316e9403aff85",
> +		/* A */
> +		"",
> +		/* P */
> +		"a16a2e741f1cd9717285b6d882c1fc53"
> +		"655e9773761ad697",
> +		/* C */
> +		"ACA5E98D2784D131AE76E3C8BF9C3988"
> +		"35C0206C71893F26",
> +		/* T */
> +		"AE67C0EA38C5383BFDC7967F4E9D1678"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"f9fdca4ac64fe7f014de0f43039c7571",
> +		/* N */
> +		"5a8aa485c316e947125478",
> +		/* A */
> +		"3796cf51b8726652a4204733b8fbb047"
> +		"cf00fb91a9837e22ec22b1a268f88e2c",
> +		/* P */
> +		"a265480ca88d5f536db0dc6abc40faf0"
> +		"d05be7a966977768",
> +		/* C */
> +		"65F8D8422006FB77FB7CCEFDFFF93729"
> +		"B3EFCB06A0FAF3A2",
> +		/* T */
> +		"03C6E244586AFAB9B60D9F6DBDF7EB1A"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"26511fb51fcfa75cb4b44da75a6e5a0e",
> +		/* N */
> +		"5a8aa485c316e9403aff85",
> +		/* A */
> +		"a16a2e741f1cd9717285b6d882c1fc53"
> +		"655e9773761ad697a7ee6410184c7982",
> +		/* P */
> +		"8739b4bea1a099fe547499cbc6d1b13d"
> +		"849b8084c9b6acc5",
> +		/* C */
> +		"D31F9FC23674D5272125375E0A2F5365"
> +		"41B1FAF1DD68C819",
> +		/* T */
> +		"4F315233A76C4DD99972561C5158AB3B"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"f9fdca4ac64fe7f014de0f43039c7571",
> +		/* N */
> +		"5a8aa485c316e947125478",
> +		/* A */
> +		"3796cf51b8726652a4204733b8fbb047"
> +		"cf00fb91a9837e22ec22b1a268",
> +		/* P */
> +		"a265480ca88d5f536db0dc6abc40faf0"
> +		"d05be7a9669777682376345745",
> +		/* C */
> +		"65F8D8422006FB77FB7CCEFDFFF93729"
> +		"B3EFCB06A0FAF3A2AB981875E0",
> +		/* T */
> +		"EA93AAEDA607226E9E79D2EE5C4B62F8"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"26511fb51fcfa75cb4b44da75a6e5a0e",
> +		/* N */
> +		"5a8aa485c316e9403aff85",
> +		/* A */
> +		"a16a2e741f1cd9717285b6d882c1fc53"
> +		"65",
> +		/* P */
> +		"8739b4bea1a099fe547499cbc6d1b13d"
> +		"84",
> +		/* C */
> +		"D31F9FC23674D5272125375E0A2F5365"
> +		"41",
> +		/* T */
> +		"036F58DA2372B29BD0E01C58A0E7F9EE"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"00000000000000000000000000000000",
> +		/* N */
> +		"0000000000000000000000",
> +		/* A */
> +		"",
> +		/* P */
> +		"00",
> +		/* C */
> +		"2E",
> +		/* T */
> +		"61787D2C432A58293B73D01154E61B6B"
> +	),
> +	AES_CCM_128_TESTVECTOR(
> +		/* K */
> +		"00000000000000000000000000000000",
> +		/* N */
> +		"0000000000000000000000",
> +		/* A */
> +		"00",
> +		/* P */
> +		"00",
> +		/* C */
> +		"2E",
> +		/* T */
> +		"E4284A0E813F0FFA146CF59F9ADAFBD7"
> +	),
> +#ifndef AES_CCM_128_ONLY_TESTVECTORS
> +	};
> +
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
> +		struct aes_ccm_128_context ctx;
> +		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
> +		DATA_BLOB C;
> +		int e;
> +
> +		C = data_blob_dup_talloc(tctx, testarray[i].P);
> +
> +		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
> +				 testarray[i].A.length, testarray[i].P.length);
> +		aes_ccm_128_update(&ctx,
> +				   testarray[i].A.data,
> +				   testarray[i].A.length);
> +		aes_ccm_128_update(&ctx, C.data, C.length);
> +		aes_ccm_128_crypt(&ctx, C.data, C.length);
> +		aes_ccm_128_digest(&ctx, T);
> +
> +		e = memcmp(testarray[i].T.data, T, sizeof(T));
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +
> +		e = memcmp(testarray[i].C.data, C.data, C.length);
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +	}
> +
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
> +		struct aes_ccm_128_context ctx;
> +		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
> +		DATA_BLOB C;
> +		int e;
> +		size_t j;
> +
> +		C = data_blob_dup_talloc(tctx, testarray[i].P);
> +
> +		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
> +				 testarray[i].A.length, testarray[i].P.length);
> +		for (j=0; j < testarray[i].A.length; j++) {
> +			aes_ccm_128_update(&ctx, &testarray[i].A.data[j], 1);
> +		}
> +		for (j=0; j < C.length; j++) {
> +			aes_ccm_128_update(&ctx, &C.data[j], 1);
> +			aes_ccm_128_crypt(&ctx, &C.data[j], 1);
> +		}
> +		aes_ccm_128_digest(&ctx, T);
> +
> +		e = memcmp(testarray[i].T.data, T, sizeof(T));
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +
> +		e = memcmp(testarray[i].C.data, C.data, C.length);
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +	}
> +
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
> +		struct aes_ccm_128_context ctx;
> +		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
> +		DATA_BLOB P;
> +		int e;
> +		size_t j;
> +
> +		P = data_blob_dup_talloc(tctx, testarray[i].C);
> +
> +		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
> +				 testarray[i].A.length, testarray[i].P.length);
> +		for (j=0; j < testarray[i].A.length; j++) {
> +			aes_ccm_128_update(&ctx, &testarray[i].A.data[j], 1);
> +		}
> +		for (j=0; j < P.length; j++) {
> +			aes_ccm_128_crypt(&ctx, &P.data[j], 1);
> +			aes_ccm_128_update(&ctx, &P.data[j], 1);
> +		}
> +		aes_ccm_128_digest(&ctx, T);
> +
> +		e = memcmp(testarray[i].T.data, T, sizeof(T));
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +
> +		e = memcmp(testarray[i].P.data, P.data, P.length);
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +	}
> +
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
> +		struct aes_ccm_128_context ctx;
> +		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
> +		DATA_BLOB P;
> +		int e;
> +
> +		P = data_blob_dup_talloc(tctx, testarray[i].C);
> +
> +		aes_ccm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data,
> +				 testarray[i].A.length, testarray[i].P.length);
> +		aes_ccm_128_update(&ctx, testarray[i].A.data, testarray[i].A.length);
> +		aes_ccm_128_crypt(&ctx, P.data, P.length);
> +		aes_ccm_128_update(&ctx, P.data, P.length);
> +		aes_ccm_128_digest(&ctx, T);
> +
> +		e = memcmp(testarray[i].T.data, T, sizeof(T));
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +
> +		e = memcmp(testarray[i].P.data, P.data, P.length);
> +		if (e != 0) {
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
> +			ret = false;
> +			goto fail;
> +		}
> +	}
> +
> + fail:
> +	return ret;
> +}
> +
> +#endif /* AES_CCM_128_ONLY_TESTVECTORS */
> diff --git a/lib/crypto/aes_test.h b/lib/crypto/aes_test.h
> new file mode 100644
> index 0000000..ad89249
> --- /dev/null
> +++ b/lib/crypto/aes_test.h
> @@ -0,0 +1,67 @@
> +#ifndef LIB_CRYPTO_AES_TEST_H
> +#define LIB_CRYPTO_AES_TEST_H
> +
> +struct aes_mode_testvector {
> +	DATA_BLOB K;
> +	DATA_BLOB N;
> +	DATA_BLOB A;
> +	DATA_BLOB P;
> +	DATA_BLOB C;
> +	DATA_BLOB T;
> +	const char *mode;
> +	bool aes_cmac_128;
> +	bool aes_ccm_128;
> +	bool aes_gcm_128;
> +	const char *location;
> +};
> +
> +#define AES_MODE_TESTVECTOR(_mode, _k, _n, _a, _p, _c, _t) \
> +	{ \
> +		.K = strhex_to_data_blob(tctx, _k), \
> +		.N = strhex_to_data_blob(tctx, _n), \
> +		.A = strhex_to_data_blob(tctx, _a), \
> +		.P = strhex_to_data_blob(tctx, _p), \
> +		.C = strhex_to_data_blob(tctx, _c), \
> +		.T = strhex_to_data_blob(tctx, _t), \
> +		._mode = true, \
> +		.mode = #_mode, \
> +		.location = __location__, \
> +	}
> +
> +#define aes_mode_testvector_debug(tv, P, C, T) \
> +	_aes_mode_testvector_debug(tv, P, C, T, __location__)
> +static inline void _aes_mode_testvector_debug(const struct aes_mode_testvector *tv,
> +					      const DATA_BLOB *P,
> +					      const DATA_BLOB *C,
> +					      const DATA_BLOB *T,
> +					      const char *location)
> +{
> +	printf("location: %s\n", location);
> +	printf("TEST: %s\n", tv->location);
> +	printf("MODE: %s\n", tv->mode);
> +	printf("K\n");
> +	dump_data(0, tv->K.data, tv->K.length);
> +	printf("N\n");
> +	dump_data(0, tv->N.data, tv->N.length);
> +	printf("A\n");
> +	dump_data(0, tv->A.data, tv->A.length);
> +	printf("P\n");
> +	dump_data(0, tv->P.data, tv->P.length);
> +	if (P) {
> +		printf("PV\n");
> +		dump_data(0, P->data, P->length);
> +	}
> +	printf("C\n");
> +	dump_data(0, tv->C.data, tv->C.length);
> +	if (C) {
> +		printf("CV\n");
> +		dump_data(0, C->data, C->length);
> +	}
> +	printf("T\n");
> +	dump_data(0, tv->T.data, tv->T.length);
> +	if (T) {
> +		printf("TV\n");
> +		dump_data(0, T->data, T->length);
> +	}
> +}
> +#endif /* LIB_CRYPTO_AES_TEST_H */
> diff --git a/lib/crypto/wscript_build b/lib/crypto/wscript_build
> index f2326a2..7f94532 100644
> --- a/lib/crypto/wscript_build
> +++ b/lib/crypto/wscript_build
> @@ -19,8 +19,9 @@ bld.SAMBA_SUBSYSTEM('LIBCRYPTO',
>          )
>  
>  bld.SAMBA_SUBSYSTEM('TORTURE_LIBCRYPTO',
> -	source='md4test.c md5test.c hmacmd5test.c aes_cmac_128_test.c aes_gcm_128_test.c',
> -	autoproto='test_proto.h',
> -	deps='LIBCRYPTO'
> -	)
> -
> +        source='''md4test.c md5test.c hmacmd5test.c
> +            aes_cmac_128_test.c aes_ccm_128_test.c aes_gcm_128_test.c
> +        ''',
> +        autoproto='test_proto.h',
> +        deps='LIBCRYPTO'
> +        )
> diff --git a/source4/torture/local/local.c b/source4/torture/local/local.c
> index 5d3b4e1..bad7b65 100644
> --- a/source4/torture/local/local.c
> +++ b/source4/torture/local/local.c
> @@ -91,6 +91,8 @@ NTSTATUS torture_local_init(void)
>  				      torture_local_crypto_hmacmd5);
>  	torture_suite_add_simple_test(suite, "crypto.aes_cmac_128",
>  				      torture_local_crypto_aes_cmac_128);
> +	torture_suite_add_simple_test(suite, "crypto.aes_ccm_128",
> +				      torture_local_crypto_aes_ccm_128);
>  	torture_suite_add_simple_test(suite, "crypto.aes_gcm_128",
>  				      torture_local_crypto_aes_gcm_128);
>  
> -- 
> 1.9.1
> 
> 
> From 0bb37be94e56da81b28f4c1560b53b4ceccc0b04 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 00:59:58 +0200
> Subject: [PATCH 05/11] lib/crypto: add optimized helper functions
>  aes_block_{xor,lshift,rshift}()
> 
> These are typical operations on an AES_BLOCK used by different modes.
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes.h | 575 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 575 insertions(+)
> 
> diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
> index a2b6c07..2cfb587 100644
> --- a/lib/crypto/aes.h
> +++ b/lib/crypto/aes.h
> @@ -80,4 +80,579 @@ void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
>  }
>  #endif
>  
> +static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
> +				 const uint8_t in2[AES_BLOCK_SIZE],
> +				 uint8_t out[AES_BLOCK_SIZE])
> +{
> +#define __IS_ALIGN8(p) ((((uintptr_t)(p)) & 0x7) == 0)
> +#define __IS_ALIGNED(a,b,c) __IS_ALIGN8(\
> +		((uintptr_t)(a)) | \
> +		((uintptr_t)(b)) | \
> +		((uintptr_t)(c)))
> +	/* If everything is aligned we can optimize */
> +	if (likely(__IS_ALIGNED(in1, in2, out))) {
> +#define __RO64(p) ((const uint64_t *)(p))
> +#define __RW64(p) ((uint64_t *)(p))
> +		__RW64(out)[0] = __RO64(in1)[0] ^ __RO64(in2)[0];
> +		__RW64(out)[1] = __RO64(in1)[1] ^ __RO64(in2)[1];
> +	} else {
> +		uint64_t i1[2];
> +		uint64_t i2[2];
> +		uint64_t o[2];
> +
> +		memcpy(i1, in1, AES_BLOCK_SIZE);
> +		memcpy(i2, in2, AES_BLOCK_SIZE);
> +		o[0] = i1[0] ^ i2[0];
> +		o[1] = i1[1] ^ i2[1];
> +		memcpy(out, o, AES_BLOCK_SIZE);
> +	}
> +}
> +
> +static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
> +				    uint8_t out[AES_BLOCK_SIZE])
> +{
> +	static const struct aes_block_lshift_entry {
> +		uint8_t lshift;
> +		uint8_t overflow;
> +	} aes_block_lshift_table[UINT8_MAX+1] = {
> +		[0x00] = { .lshift = 0x00, .overflow = 0x00 },
> +		[0x01] = { .lshift = 0x02, .overflow = 0x00 },
> +		[0x02] = { .lshift = 0x04, .overflow = 0x00 },
> +		[0x03] = { .lshift = 0x06, .overflow = 0x00 },
> +		[0x04] = { .lshift = 0x08, .overflow = 0x00 },
> +		[0x05] = { .lshift = 0x0a, .overflow = 0x00 },
> +		[0x06] = { .lshift = 0x0c, .overflow = 0x00 },
> +		[0x07] = { .lshift = 0x0e, .overflow = 0x00 },
> +		[0x08] = { .lshift = 0x10, .overflow = 0x00 },
> +		[0x09] = { .lshift = 0x12, .overflow = 0x00 },
> +		[0x0a] = { .lshift = 0x14, .overflow = 0x00 },
> +		[0x0b] = { .lshift = 0x16, .overflow = 0x00 },
> +		[0x0c] = { .lshift = 0x18, .overflow = 0x00 },
> +		[0x0d] = { .lshift = 0x1a, .overflow = 0x00 },
> +		[0x0e] = { .lshift = 0x1c, .overflow = 0x00 },
> +		[0x0f] = { .lshift = 0x1e, .overflow = 0x00 },
> +		[0x10] = { .lshift = 0x20, .overflow = 0x00 },
> +		[0x11] = { .lshift = 0x22, .overflow = 0x00 },
> +		[0x12] = { .lshift = 0x24, .overflow = 0x00 },
> +		[0x13] = { .lshift = 0x26, .overflow = 0x00 },
> +		[0x14] = { .lshift = 0x28, .overflow = 0x00 },
> +		[0x15] = { .lshift = 0x2a, .overflow = 0x00 },
> +		[0x16] = { .lshift = 0x2c, .overflow = 0x00 },
> +		[0x17] = { .lshift = 0x2e, .overflow = 0x00 },
> +		[0x18] = { .lshift = 0x30, .overflow = 0x00 },
> +		[0x19] = { .lshift = 0x32, .overflow = 0x00 },
> +		[0x1a] = { .lshift = 0x34, .overflow = 0x00 },
> +		[0x1b] = { .lshift = 0x36, .overflow = 0x00 },
> +		[0x1c] = { .lshift = 0x38, .overflow = 0x00 },
> +		[0x1d] = { .lshift = 0x3a, .overflow = 0x00 },
> +		[0x1e] = { .lshift = 0x3c, .overflow = 0x00 },
> +		[0x1f] = { .lshift = 0x3e, .overflow = 0x00 },
> +		[0x20] = { .lshift = 0x40, .overflow = 0x00 },
> +		[0x21] = { .lshift = 0x42, .overflow = 0x00 },
> +		[0x22] = { .lshift = 0x44, .overflow = 0x00 },
> +		[0x23] = { .lshift = 0x46, .overflow = 0x00 },
> +		[0x24] = { .lshift = 0x48, .overflow = 0x00 },
> +		[0x25] = { .lshift = 0x4a, .overflow = 0x00 },
> +		[0x26] = { .lshift = 0x4c, .overflow = 0x00 },
> +		[0x27] = { .lshift = 0x4e, .overflow = 0x00 },
> +		[0x28] = { .lshift = 0x50, .overflow = 0x00 },
> +		[0x29] = { .lshift = 0x52, .overflow = 0x00 },
> +		[0x2a] = { .lshift = 0x54, .overflow = 0x00 },
> +		[0x2b] = { .lshift = 0x56, .overflow = 0x00 },
> +		[0x2c] = { .lshift = 0x58, .overflow = 0x00 },
> +		[0x2d] = { .lshift = 0x5a, .overflow = 0x00 },
> +		[0x2e] = { .lshift = 0x5c, .overflow = 0x00 },
> +		[0x2f] = { .lshift = 0x5e, .overflow = 0x00 },
> +		[0x30] = { .lshift = 0x60, .overflow = 0x00 },
> +		[0x31] = { .lshift = 0x62, .overflow = 0x00 },
> +		[0x32] = { .lshift = 0x64, .overflow = 0x00 },
> +		[0x33] = { .lshift = 0x66, .overflow = 0x00 },
> +		[0x34] = { .lshift = 0x68, .overflow = 0x00 },
> +		[0x35] = { .lshift = 0x6a, .overflow = 0x00 },
> +		[0x36] = { .lshift = 0x6c, .overflow = 0x00 },
> +		[0x37] = { .lshift = 0x6e, .overflow = 0x00 },
> +		[0x38] = { .lshift = 0x70, .overflow = 0x00 },
> +		[0x39] = { .lshift = 0x72, .overflow = 0x00 },
> +		[0x3a] = { .lshift = 0x74, .overflow = 0x00 },
> +		[0x3b] = { .lshift = 0x76, .overflow = 0x00 },
> +		[0x3c] = { .lshift = 0x78, .overflow = 0x00 },
> +		[0x3d] = { .lshift = 0x7a, .overflow = 0x00 },
> +		[0x3e] = { .lshift = 0x7c, .overflow = 0x00 },
> +		[0x3f] = { .lshift = 0x7e, .overflow = 0x00 },
> +		[0x40] = { .lshift = 0x80, .overflow = 0x00 },
> +		[0x41] = { .lshift = 0x82, .overflow = 0x00 },
> +		[0x42] = { .lshift = 0x84, .overflow = 0x00 },
> +		[0x43] = { .lshift = 0x86, .overflow = 0x00 },
> +		[0x44] = { .lshift = 0x88, .overflow = 0x00 },
> +		[0x45] = { .lshift = 0x8a, .overflow = 0x00 },
> +		[0x46] = { .lshift = 0x8c, .overflow = 0x00 },
> +		[0x47] = { .lshift = 0x8e, .overflow = 0x00 },
> +		[0x48] = { .lshift = 0x90, .overflow = 0x00 },
> +		[0x49] = { .lshift = 0x92, .overflow = 0x00 },
> +		[0x4a] = { .lshift = 0x94, .overflow = 0x00 },
> +		[0x4b] = { .lshift = 0x96, .overflow = 0x00 },
> +		[0x4c] = { .lshift = 0x98, .overflow = 0x00 },
> +		[0x4d] = { .lshift = 0x9a, .overflow = 0x00 },
> +		[0x4e] = { .lshift = 0x9c, .overflow = 0x00 },
> +		[0x4f] = { .lshift = 0x9e, .overflow = 0x00 },
> +		[0x50] = { .lshift = 0xa0, .overflow = 0x00 },
> +		[0x51] = { .lshift = 0xa2, .overflow = 0x00 },
> +		[0x52] = { .lshift = 0xa4, .overflow = 0x00 },
> +		[0x53] = { .lshift = 0xa6, .overflow = 0x00 },
> +		[0x54] = { .lshift = 0xa8, .overflow = 0x00 },
> +		[0x55] = { .lshift = 0xaa, .overflow = 0x00 },
> +		[0x56] = { .lshift = 0xac, .overflow = 0x00 },
> +		[0x57] = { .lshift = 0xae, .overflow = 0x00 },
> +		[0x58] = { .lshift = 0xb0, .overflow = 0x00 },
> +		[0x59] = { .lshift = 0xb2, .overflow = 0x00 },
> +		[0x5a] = { .lshift = 0xb4, .overflow = 0x00 },
> +		[0x5b] = { .lshift = 0xb6, .overflow = 0x00 },
> +		[0x5c] = { .lshift = 0xb8, .overflow = 0x00 },
> +		[0x5d] = { .lshift = 0xba, .overflow = 0x00 },
> +		[0x5e] = { .lshift = 0xbc, .overflow = 0x00 },
> +		[0x5f] = { .lshift = 0xbe, .overflow = 0x00 },
> +		[0x60] = { .lshift = 0xc0, .overflow = 0x00 },
> +		[0x61] = { .lshift = 0xc2, .overflow = 0x00 },
> +		[0x62] = { .lshift = 0xc4, .overflow = 0x00 },
> +		[0x63] = { .lshift = 0xc6, .overflow = 0x00 },
> +		[0x64] = { .lshift = 0xc8, .overflow = 0x00 },
> +		[0x65] = { .lshift = 0xca, .overflow = 0x00 },
> +		[0x66] = { .lshift = 0xcc, .overflow = 0x00 },
> +		[0x67] = { .lshift = 0xce, .overflow = 0x00 },
> +		[0x68] = { .lshift = 0xd0, .overflow = 0x00 },
> +		[0x69] = { .lshift = 0xd2, .overflow = 0x00 },
> +		[0x6a] = { .lshift = 0xd4, .overflow = 0x00 },
> +		[0x6b] = { .lshift = 0xd6, .overflow = 0x00 },
> +		[0x6c] = { .lshift = 0xd8, .overflow = 0x00 },
> +		[0x6d] = { .lshift = 0xda, .overflow = 0x00 },
> +		[0x6e] = { .lshift = 0xdc, .overflow = 0x00 },
> +		[0x6f] = { .lshift = 0xde, .overflow = 0x00 },
> +		[0x70] = { .lshift = 0xe0, .overflow = 0x00 },
> +		[0x71] = { .lshift = 0xe2, .overflow = 0x00 },
> +		[0x72] = { .lshift = 0xe4, .overflow = 0x00 },
> +		[0x73] = { .lshift = 0xe6, .overflow = 0x00 },
> +		[0x74] = { .lshift = 0xe8, .overflow = 0x00 },
> +		[0x75] = { .lshift = 0xea, .overflow = 0x00 },
> +		[0x76] = { .lshift = 0xec, .overflow = 0x00 },
> +		[0x77] = { .lshift = 0xee, .overflow = 0x00 },
> +		[0x78] = { .lshift = 0xf0, .overflow = 0x00 },
> +		[0x79] = { .lshift = 0xf2, .overflow = 0x00 },
> +		[0x7a] = { .lshift = 0xf4, .overflow = 0x00 },
> +		[0x7b] = { .lshift = 0xf6, .overflow = 0x00 },
> +		[0x7c] = { .lshift = 0xf8, .overflow = 0x00 },
> +		[0x7d] = { .lshift = 0xfa, .overflow = 0x00 },
> +		[0x7e] = { .lshift = 0xfc, .overflow = 0x00 },
> +		[0x7f] = { .lshift = 0xfe, .overflow = 0x00 },
> +		[0x80] = { .lshift = 0x00, .overflow = 0x01 },
> +		[0x81] = { .lshift = 0x02, .overflow = 0x01 },
> +		[0x82] = { .lshift = 0x04, .overflow = 0x01 },
> +		[0x83] = { .lshift = 0x06, .overflow = 0x01 },
> +		[0x84] = { .lshift = 0x08, .overflow = 0x01 },
> +		[0x85] = { .lshift = 0x0a, .overflow = 0x01 },
> +		[0x86] = { .lshift = 0x0c, .overflow = 0x01 },
> +		[0x87] = { .lshift = 0x0e, .overflow = 0x01 },
> +		[0x88] = { .lshift = 0x10, .overflow = 0x01 },
> +		[0x89] = { .lshift = 0x12, .overflow = 0x01 },
> +		[0x8a] = { .lshift = 0x14, .overflow = 0x01 },
> +		[0x8b] = { .lshift = 0x16, .overflow = 0x01 },
> +		[0x8c] = { .lshift = 0x18, .overflow = 0x01 },
> +		[0x8d] = { .lshift = 0x1a, .overflow = 0x01 },
> +		[0x8e] = { .lshift = 0x1c, .overflow = 0x01 },
> +		[0x8f] = { .lshift = 0x1e, .overflow = 0x01 },
> +		[0x90] = { .lshift = 0x20, .overflow = 0x01 },
> +		[0x91] = { .lshift = 0x22, .overflow = 0x01 },
> +		[0x92] = { .lshift = 0x24, .overflow = 0x01 },
> +		[0x93] = { .lshift = 0x26, .overflow = 0x01 },
> +		[0x94] = { .lshift = 0x28, .overflow = 0x01 },
> +		[0x95] = { .lshift = 0x2a, .overflow = 0x01 },
> +		[0x96] = { .lshift = 0x2c, .overflow = 0x01 },
> +		[0x97] = { .lshift = 0x2e, .overflow = 0x01 },
> +		[0x98] = { .lshift = 0x30, .overflow = 0x01 },
> +		[0x99] = { .lshift = 0x32, .overflow = 0x01 },
> +		[0x9a] = { .lshift = 0x34, .overflow = 0x01 },
> +		[0x9b] = { .lshift = 0x36, .overflow = 0x01 },
> +		[0x9c] = { .lshift = 0x38, .overflow = 0x01 },
> +		[0x9d] = { .lshift = 0x3a, .overflow = 0x01 },
> +		[0x9e] = { .lshift = 0x3c, .overflow = 0x01 },
> +		[0x9f] = { .lshift = 0x3e, .overflow = 0x01 },
> +		[0xa0] = { .lshift = 0x40, .overflow = 0x01 },
> +		[0xa1] = { .lshift = 0x42, .overflow = 0x01 },
> +		[0xa2] = { .lshift = 0x44, .overflow = 0x01 },
> +		[0xa3] = { .lshift = 0x46, .overflow = 0x01 },
> +		[0xa4] = { .lshift = 0x48, .overflow = 0x01 },
> +		[0xa5] = { .lshift = 0x4a, .overflow = 0x01 },
> +		[0xa6] = { .lshift = 0x4c, .overflow = 0x01 },
> +		[0xa7] = { .lshift = 0x4e, .overflow = 0x01 },
> +		[0xa8] = { .lshift = 0x50, .overflow = 0x01 },
> +		[0xa9] = { .lshift = 0x52, .overflow = 0x01 },
> +		[0xaa] = { .lshift = 0x54, .overflow = 0x01 },
> +		[0xab] = { .lshift = 0x56, .overflow = 0x01 },
> +		[0xac] = { .lshift = 0x58, .overflow = 0x01 },
> +		[0xad] = { .lshift = 0x5a, .overflow = 0x01 },
> +		[0xae] = { .lshift = 0x5c, .overflow = 0x01 },
> +		[0xaf] = { .lshift = 0x5e, .overflow = 0x01 },
> +		[0xb0] = { .lshift = 0x60, .overflow = 0x01 },
> +		[0xb1] = { .lshift = 0x62, .overflow = 0x01 },
> +		[0xb2] = { .lshift = 0x64, .overflow = 0x01 },
> +		[0xb3] = { .lshift = 0x66, .overflow = 0x01 },
> +		[0xb4] = { .lshift = 0x68, .overflow = 0x01 },
> +		[0xb5] = { .lshift = 0x6a, .overflow = 0x01 },
> +		[0xb6] = { .lshift = 0x6c, .overflow = 0x01 },
> +		[0xb7] = { .lshift = 0x6e, .overflow = 0x01 },
> +		[0xb8] = { .lshift = 0x70, .overflow = 0x01 },
> +		[0xb9] = { .lshift = 0x72, .overflow = 0x01 },
> +		[0xba] = { .lshift = 0x74, .overflow = 0x01 },
> +		[0xbb] = { .lshift = 0x76, .overflow = 0x01 },
> +		[0xbc] = { .lshift = 0x78, .overflow = 0x01 },
> +		[0xbd] = { .lshift = 0x7a, .overflow = 0x01 },
> +		[0xbe] = { .lshift = 0x7c, .overflow = 0x01 },
> +		[0xbf] = { .lshift = 0x7e, .overflow = 0x01 },
> +		[0xc0] = { .lshift = 0x80, .overflow = 0x01 },
> +		[0xc1] = { .lshift = 0x82, .overflow = 0x01 },
> +		[0xc2] = { .lshift = 0x84, .overflow = 0x01 },
> +		[0xc3] = { .lshift = 0x86, .overflow = 0x01 },
> +		[0xc4] = { .lshift = 0x88, .overflow = 0x01 },
> +		[0xc5] = { .lshift = 0x8a, .overflow = 0x01 },
> +		[0xc6] = { .lshift = 0x8c, .overflow = 0x01 },
> +		[0xc7] = { .lshift = 0x8e, .overflow = 0x01 },
> +		[0xc8] = { .lshift = 0x90, .overflow = 0x01 },
> +		[0xc9] = { .lshift = 0x92, .overflow = 0x01 },
> +		[0xca] = { .lshift = 0x94, .overflow = 0x01 },
> +		[0xcb] = { .lshift = 0x96, .overflow = 0x01 },
> +		[0xcc] = { .lshift = 0x98, .overflow = 0x01 },
> +		[0xcd] = { .lshift = 0x9a, .overflow = 0x01 },
> +		[0xce] = { .lshift = 0x9c, .overflow = 0x01 },
> +		[0xcf] = { .lshift = 0x9e, .overflow = 0x01 },
> +		[0xd0] = { .lshift = 0xa0, .overflow = 0x01 },
> +		[0xd1] = { .lshift = 0xa2, .overflow = 0x01 },
> +		[0xd2] = { .lshift = 0xa4, .overflow = 0x01 },
> +		[0xd3] = { .lshift = 0xa6, .overflow = 0x01 },
> +		[0xd4] = { .lshift = 0xa8, .overflow = 0x01 },
> +		[0xd5] = { .lshift = 0xaa, .overflow = 0x01 },
> +		[0xd6] = { .lshift = 0xac, .overflow = 0x01 },
> +		[0xd7] = { .lshift = 0xae, .overflow = 0x01 },
> +		[0xd8] = { .lshift = 0xb0, .overflow = 0x01 },
> +		[0xd9] = { .lshift = 0xb2, .overflow = 0x01 },
> +		[0xda] = { .lshift = 0xb4, .overflow = 0x01 },
> +		[0xdb] = { .lshift = 0xb6, .overflow = 0x01 },
> +		[0xdc] = { .lshift = 0xb8, .overflow = 0x01 },
> +		[0xdd] = { .lshift = 0xba, .overflow = 0x01 },
> +		[0xde] = { .lshift = 0xbc, .overflow = 0x01 },
> +		[0xdf] = { .lshift = 0xbe, .overflow = 0x01 },
> +		[0xe0] = { .lshift = 0xc0, .overflow = 0x01 },
> +		[0xe1] = { .lshift = 0xc2, .overflow = 0x01 },
> +		[0xe2] = { .lshift = 0xc4, .overflow = 0x01 },
> +		[0xe3] = { .lshift = 0xc6, .overflow = 0x01 },
> +		[0xe4] = { .lshift = 0xc8, .overflow = 0x01 },
> +		[0xe5] = { .lshift = 0xca, .overflow = 0x01 },
> +		[0xe6] = { .lshift = 0xcc, .overflow = 0x01 },
> +		[0xe7] = { .lshift = 0xce, .overflow = 0x01 },
> +		[0xe8] = { .lshift = 0xd0, .overflow = 0x01 },
> +		[0xe9] = { .lshift = 0xd2, .overflow = 0x01 },
> +		[0xea] = { .lshift = 0xd4, .overflow = 0x01 },
> +		[0xeb] = { .lshift = 0xd6, .overflow = 0x01 },
> +		[0xec] = { .lshift = 0xd8, .overflow = 0x01 },
> +		[0xed] = { .lshift = 0xda, .overflow = 0x01 },
> +		[0xee] = { .lshift = 0xdc, .overflow = 0x01 },
> +		[0xef] = { .lshift = 0xde, .overflow = 0x01 },
> +		[0xf0] = { .lshift = 0xe0, .overflow = 0x01 },
> +		[0xf1] = { .lshift = 0xe2, .overflow = 0x01 },
> +		[0xf2] = { .lshift = 0xe4, .overflow = 0x01 },
> +		[0xf3] = { .lshift = 0xe6, .overflow = 0x01 },
> +		[0xf4] = { .lshift = 0xe8, .overflow = 0x01 },
> +		[0xf5] = { .lshift = 0xea, .overflow = 0x01 },
> +		[0xf6] = { .lshift = 0xec, .overflow = 0x01 },
> +		[0xf7] = { .lshift = 0xee, .overflow = 0x01 },
> +		[0xf8] = { .lshift = 0xf0, .overflow = 0x01 },
> +		[0xf9] = { .lshift = 0xf2, .overflow = 0x01 },
> +		[0xfa] = { .lshift = 0xf4, .overflow = 0x01 },
> +		[0xfb] = { .lshift = 0xf6, .overflow = 0x01 },
> +		[0xfc] = { .lshift = 0xf8, .overflow = 0x01 },
> +		[0xfd] = { .lshift = 0xfa, .overflow = 0x01 },
> +		[0xfe] = { .lshift = 0xfc, .overflow = 0x01 },
> +		[0xff] = { .lshift = 0xfe, .overflow = 0x01 },
> +	};
> +	int8_t i;
> +	uint8_t overflow = 0;
> +
> +	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
> +		const struct aes_block_lshift_entry *e = &aes_block_lshift_table[in[i]];
> +		out[i] = e->lshift | overflow;
> +		overflow = e->overflow;
> +	}
> +}
> +
> +static inline void aes_block_rshift(const uint8_t in[AES_BLOCK_SIZE],
> +				    uint8_t out[AES_BLOCK_SIZE])
> +{
> +	static const struct aes_block_rshift_entry {
> +		uint8_t rshift;
> +		uint8_t overflow;
> +	} aes_block_rshift_table[UINT8_MAX+1] = {
> +		[0x00] = { .rshift = 0x00, .overflow = 0x00 },
> +		[0x01] = { .rshift = 0x00, .overflow = 0x80 },
> +		[0x02] = { .rshift = 0x01, .overflow = 0x00 },
> +		[0x03] = { .rshift = 0x01, .overflow = 0x80 },
> +		[0x04] = { .rshift = 0x02, .overflow = 0x00 },
> +		[0x05] = { .rshift = 0x02, .overflow = 0x80 },
> +		[0x06] = { .rshift = 0x03, .overflow = 0x00 },
> +		[0x07] = { .rshift = 0x03, .overflow = 0x80 },
> +		[0x08] = { .rshift = 0x04, .overflow = 0x00 },
> +		[0x09] = { .rshift = 0x04, .overflow = 0x80 },
> +		[0x0a] = { .rshift = 0x05, .overflow = 0x00 },
> +		[0x0b] = { .rshift = 0x05, .overflow = 0x80 },
> +		[0x0c] = { .rshift = 0x06, .overflow = 0x00 },
> +		[0x0d] = { .rshift = 0x06, .overflow = 0x80 },
> +		[0x0e] = { .rshift = 0x07, .overflow = 0x00 },
> +		[0x0f] = { .rshift = 0x07, .overflow = 0x80 },
> +		[0x10] = { .rshift = 0x08, .overflow = 0x00 },
> +		[0x11] = { .rshift = 0x08, .overflow = 0x80 },
> +		[0x12] = { .rshift = 0x09, .overflow = 0x00 },
> +		[0x13] = { .rshift = 0x09, .overflow = 0x80 },
> +		[0x14] = { .rshift = 0x0a, .overflow = 0x00 },
> +		[0x15] = { .rshift = 0x0a, .overflow = 0x80 },
> +		[0x16] = { .rshift = 0x0b, .overflow = 0x00 },
> +		[0x17] = { .rshift = 0x0b, .overflow = 0x80 },
> +		[0x18] = { .rshift = 0x0c, .overflow = 0x00 },
> +		[0x19] = { .rshift = 0x0c, .overflow = 0x80 },
> +		[0x1a] = { .rshift = 0x0d, .overflow = 0x00 },
> +		[0x1b] = { .rshift = 0x0d, .overflow = 0x80 },
> +		[0x1c] = { .rshift = 0x0e, .overflow = 0x00 },
> +		[0x1d] = { .rshift = 0x0e, .overflow = 0x80 },
> +		[0x1e] = { .rshift = 0x0f, .overflow = 0x00 },
> +		[0x1f] = { .rshift = 0x0f, .overflow = 0x80 },
> +		[0x20] = { .rshift = 0x10, .overflow = 0x00 },
> +		[0x21] = { .rshift = 0x10, .overflow = 0x80 },
> +		[0x22] = { .rshift = 0x11, .overflow = 0x00 },
> +		[0x23] = { .rshift = 0x11, .overflow = 0x80 },
> +		[0x24] = { .rshift = 0x12, .overflow = 0x00 },
> +		[0x25] = { .rshift = 0x12, .overflow = 0x80 },
> +		[0x26] = { .rshift = 0x13, .overflow = 0x00 },
> +		[0x27] = { .rshift = 0x13, .overflow = 0x80 },
> +		[0x28] = { .rshift = 0x14, .overflow = 0x00 },
> +		[0x29] = { .rshift = 0x14, .overflow = 0x80 },
> +		[0x2a] = { .rshift = 0x15, .overflow = 0x00 },
> +		[0x2b] = { .rshift = 0x15, .overflow = 0x80 },
> +		[0x2c] = { .rshift = 0x16, .overflow = 0x00 },
> +		[0x2d] = { .rshift = 0x16, .overflow = 0x80 },
> +		[0x2e] = { .rshift = 0x17, .overflow = 0x00 },
> +		[0x2f] = { .rshift = 0x17, .overflow = 0x80 },
> +		[0x30] = { .rshift = 0x18, .overflow = 0x00 },
> +		[0x31] = { .rshift = 0x18, .overflow = 0x80 },
> +		[0x32] = { .rshift = 0x19, .overflow = 0x00 },
> +		[0x33] = { .rshift = 0x19, .overflow = 0x80 },
> +		[0x34] = { .rshift = 0x1a, .overflow = 0x00 },
> +		[0x35] = { .rshift = 0x1a, .overflow = 0x80 },
> +		[0x36] = { .rshift = 0x1b, .overflow = 0x00 },
> +		[0x37] = { .rshift = 0x1b, .overflow = 0x80 },
> +		[0x38] = { .rshift = 0x1c, .overflow = 0x00 },
> +		[0x39] = { .rshift = 0x1c, .overflow = 0x80 },
> +		[0x3a] = { .rshift = 0x1d, .overflow = 0x00 },
> +		[0x3b] = { .rshift = 0x1d, .overflow = 0x80 },
> +		[0x3c] = { .rshift = 0x1e, .overflow = 0x00 },
> +		[0x3d] = { .rshift = 0x1e, .overflow = 0x80 },
> +		[0x3e] = { .rshift = 0x1f, .overflow = 0x00 },
> +		[0x3f] = { .rshift = 0x1f, .overflow = 0x80 },
> +		[0x40] = { .rshift = 0x20, .overflow = 0x00 },
> +		[0x41] = { .rshift = 0x20, .overflow = 0x80 },
> +		[0x42] = { .rshift = 0x21, .overflow = 0x00 },
> +		[0x43] = { .rshift = 0x21, .overflow = 0x80 },
> +		[0x44] = { .rshift = 0x22, .overflow = 0x00 },
> +		[0x45] = { .rshift = 0x22, .overflow = 0x80 },
> +		[0x46] = { .rshift = 0x23, .overflow = 0x00 },
> +		[0x47] = { .rshift = 0x23, .overflow = 0x80 },
> +		[0x48] = { .rshift = 0x24, .overflow = 0x00 },
> +		[0x49] = { .rshift = 0x24, .overflow = 0x80 },
> +		[0x4a] = { .rshift = 0x25, .overflow = 0x00 },
> +		[0x4b] = { .rshift = 0x25, .overflow = 0x80 },
> +		[0x4c] = { .rshift = 0x26, .overflow = 0x00 },
> +		[0x4d] = { .rshift = 0x26, .overflow = 0x80 },
> +		[0x4e] = { .rshift = 0x27, .overflow = 0x00 },
> +		[0x4f] = { .rshift = 0x27, .overflow = 0x80 },
> +		[0x50] = { .rshift = 0x28, .overflow = 0x00 },
> +		[0x51] = { .rshift = 0x28, .overflow = 0x80 },
> +		[0x52] = { .rshift = 0x29, .overflow = 0x00 },
> +		[0x53] = { .rshift = 0x29, .overflow = 0x80 },
> +		[0x54] = { .rshift = 0x2a, .overflow = 0x00 },
> +		[0x55] = { .rshift = 0x2a, .overflow = 0x80 },
> +		[0x56] = { .rshift = 0x2b, .overflow = 0x00 },
> +		[0x57] = { .rshift = 0x2b, .overflow = 0x80 },
> +		[0x58] = { .rshift = 0x2c, .overflow = 0x00 },
> +		[0x59] = { .rshift = 0x2c, .overflow = 0x80 },
> +		[0x5a] = { .rshift = 0x2d, .overflow = 0x00 },
> +		[0x5b] = { .rshift = 0x2d, .overflow = 0x80 },
> +		[0x5c] = { .rshift = 0x2e, .overflow = 0x00 },
> +		[0x5d] = { .rshift = 0x2e, .overflow = 0x80 },
> +		[0x5e] = { .rshift = 0x2f, .overflow = 0x00 },
> +		[0x5f] = { .rshift = 0x2f, .overflow = 0x80 },
> +		[0x60] = { .rshift = 0x30, .overflow = 0x00 },
> +		[0x61] = { .rshift = 0x30, .overflow = 0x80 },
> +		[0x62] = { .rshift = 0x31, .overflow = 0x00 },
> +		[0x63] = { .rshift = 0x31, .overflow = 0x80 },
> +		[0x64] = { .rshift = 0x32, .overflow = 0x00 },
> +		[0x65] = { .rshift = 0x32, .overflow = 0x80 },
> +		[0x66] = { .rshift = 0x33, .overflow = 0x00 },
> +		[0x67] = { .rshift = 0x33, .overflow = 0x80 },
> +		[0x68] = { .rshift = 0x34, .overflow = 0x00 },
> +		[0x69] = { .rshift = 0x34, .overflow = 0x80 },
> +		[0x6a] = { .rshift = 0x35, .overflow = 0x00 },
> +		[0x6b] = { .rshift = 0x35, .overflow = 0x80 },
> +		[0x6c] = { .rshift = 0x36, .overflow = 0x00 },
> +		[0x6d] = { .rshift = 0x36, .overflow = 0x80 },
> +		[0x6e] = { .rshift = 0x37, .overflow = 0x00 },
> +		[0x6f] = { .rshift = 0x37, .overflow = 0x80 },
> +		[0x70] = { .rshift = 0x38, .overflow = 0x00 },
> +		[0x71] = { .rshift = 0x38, .overflow = 0x80 },
> +		[0x72] = { .rshift = 0x39, .overflow = 0x00 },
> +		[0x73] = { .rshift = 0x39, .overflow = 0x80 },
> +		[0x74] = { .rshift = 0x3a, .overflow = 0x00 },
> +		[0x75] = { .rshift = 0x3a, .overflow = 0x80 },
> +		[0x76] = { .rshift = 0x3b, .overflow = 0x00 },
> +		[0x77] = { .rshift = 0x3b, .overflow = 0x80 },
> +		[0x78] = { .rshift = 0x3c, .overflow = 0x00 },
> +		[0x79] = { .rshift = 0x3c, .overflow = 0x80 },
> +		[0x7a] = { .rshift = 0x3d, .overflow = 0x00 },
> +		[0x7b] = { .rshift = 0x3d, .overflow = 0x80 },
> +		[0x7c] = { .rshift = 0x3e, .overflow = 0x00 },
> +		[0x7d] = { .rshift = 0x3e, .overflow = 0x80 },
> +		[0x7e] = { .rshift = 0x3f, .overflow = 0x00 },
> +		[0x7f] = { .rshift = 0x3f, .overflow = 0x80 },
> +		[0x80] = { .rshift = 0x40, .overflow = 0x00 },
> +		[0x81] = { .rshift = 0x40, .overflow = 0x80 },
> +		[0x82] = { .rshift = 0x41, .overflow = 0x00 },
> +		[0x83] = { .rshift = 0x41, .overflow = 0x80 },
> +		[0x84] = { .rshift = 0x42, .overflow = 0x00 },
> +		[0x85] = { .rshift = 0x42, .overflow = 0x80 },
> +		[0x86] = { .rshift = 0x43, .overflow = 0x00 },
> +		[0x87] = { .rshift = 0x43, .overflow = 0x80 },
> +		[0x88] = { .rshift = 0x44, .overflow = 0x00 },
> +		[0x89] = { .rshift = 0x44, .overflow = 0x80 },
> +		[0x8a] = { .rshift = 0x45, .overflow = 0x00 },
> +		[0x8b] = { .rshift = 0x45, .overflow = 0x80 },
> +		[0x8c] = { .rshift = 0x46, .overflow = 0x00 },
> +		[0x8d] = { .rshift = 0x46, .overflow = 0x80 },
> +		[0x8e] = { .rshift = 0x47, .overflow = 0x00 },
> +		[0x8f] = { .rshift = 0x47, .overflow = 0x80 },
> +		[0x90] = { .rshift = 0x48, .overflow = 0x00 },
> +		[0x91] = { .rshift = 0x48, .overflow = 0x80 },
> +		[0x92] = { .rshift = 0x49, .overflow = 0x00 },
> +		[0x93] = { .rshift = 0x49, .overflow = 0x80 },
> +		[0x94] = { .rshift = 0x4a, .overflow = 0x00 },
> +		[0x95] = { .rshift = 0x4a, .overflow = 0x80 },
> +		[0x96] = { .rshift = 0x4b, .overflow = 0x00 },
> +		[0x97] = { .rshift = 0x4b, .overflow = 0x80 },
> +		[0x98] = { .rshift = 0x4c, .overflow = 0x00 },
> +		[0x99] = { .rshift = 0x4c, .overflow = 0x80 },
> +		[0x9a] = { .rshift = 0x4d, .overflow = 0x00 },
> +		[0x9b] = { .rshift = 0x4d, .overflow = 0x80 },
> +		[0x9c] = { .rshift = 0x4e, .overflow = 0x00 },
> +		[0x9d] = { .rshift = 0x4e, .overflow = 0x80 },
> +		[0x9e] = { .rshift = 0x4f, .overflow = 0x00 },
> +		[0x9f] = { .rshift = 0x4f, .overflow = 0x80 },
> +		[0xa0] = { .rshift = 0x50, .overflow = 0x00 },
> +		[0xa1] = { .rshift = 0x50, .overflow = 0x80 },
> +		[0xa2] = { .rshift = 0x51, .overflow = 0x00 },
> +		[0xa3] = { .rshift = 0x51, .overflow = 0x80 },
> +		[0xa4] = { .rshift = 0x52, .overflow = 0x00 },
> +		[0xa5] = { .rshift = 0x52, .overflow = 0x80 },
> +		[0xa6] = { .rshift = 0x53, .overflow = 0x00 },
> +		[0xa7] = { .rshift = 0x53, .overflow = 0x80 },
> +		[0xa8] = { .rshift = 0x54, .overflow = 0x00 },
> +		[0xa9] = { .rshift = 0x54, .overflow = 0x80 },
> +		[0xaa] = { .rshift = 0x55, .overflow = 0x00 },
> +		[0xab] = { .rshift = 0x55, .overflow = 0x80 },
> +		[0xac] = { .rshift = 0x56, .overflow = 0x00 },
> +		[0xad] = { .rshift = 0x56, .overflow = 0x80 },
> +		[0xae] = { .rshift = 0x57, .overflow = 0x00 },
> +		[0xaf] = { .rshift = 0x57, .overflow = 0x80 },
> +		[0xb0] = { .rshift = 0x58, .overflow = 0x00 },
> +		[0xb1] = { .rshift = 0x58, .overflow = 0x80 },
> +		[0xb2] = { .rshift = 0x59, .overflow = 0x00 },
> +		[0xb3] = { .rshift = 0x59, .overflow = 0x80 },
> +		[0xb4] = { .rshift = 0x5a, .overflow = 0x00 },
> +		[0xb5] = { .rshift = 0x5a, .overflow = 0x80 },
> +		[0xb6] = { .rshift = 0x5b, .overflow = 0x00 },
> +		[0xb7] = { .rshift = 0x5b, .overflow = 0x80 },
> +		[0xb8] = { .rshift = 0x5c, .overflow = 0x00 },
> +		[0xb9] = { .rshift = 0x5c, .overflow = 0x80 },
> +		[0xba] = { .rshift = 0x5d, .overflow = 0x00 },
> +		[0xbb] = { .rshift = 0x5d, .overflow = 0x80 },
> +		[0xbc] = { .rshift = 0x5e, .overflow = 0x00 },
> +		[0xbd] = { .rshift = 0x5e, .overflow = 0x80 },
> +		[0xbe] = { .rshift = 0x5f, .overflow = 0x00 },
> +		[0xbf] = { .rshift = 0x5f, .overflow = 0x80 },
> +		[0xc0] = { .rshift = 0x60, .overflow = 0x00 },
> +		[0xc1] = { .rshift = 0x60, .overflow = 0x80 },
> +		[0xc2] = { .rshift = 0x61, .overflow = 0x00 },
> +		[0xc3] = { .rshift = 0x61, .overflow = 0x80 },
> +		[0xc4] = { .rshift = 0x62, .overflow = 0x00 },
> +		[0xc5] = { .rshift = 0x62, .overflow = 0x80 },
> +		[0xc6] = { .rshift = 0x63, .overflow = 0x00 },
> +		[0xc7] = { .rshift = 0x63, .overflow = 0x80 },
> +		[0xc8] = { .rshift = 0x64, .overflow = 0x00 },
> +		[0xc9] = { .rshift = 0x64, .overflow = 0x80 },
> +		[0xca] = { .rshift = 0x65, .overflow = 0x00 },
> +		[0xcb] = { .rshift = 0x65, .overflow = 0x80 },
> +		[0xcc] = { .rshift = 0x66, .overflow = 0x00 },
> +		[0xcd] = { .rshift = 0x66, .overflow = 0x80 },
> +		[0xce] = { .rshift = 0x67, .overflow = 0x00 },
> +		[0xcf] = { .rshift = 0x67, .overflow = 0x80 },
> +		[0xd0] = { .rshift = 0x68, .overflow = 0x00 },
> +		[0xd1] = { .rshift = 0x68, .overflow = 0x80 },
> +		[0xd2] = { .rshift = 0x69, .overflow = 0x00 },
> +		[0xd3] = { .rshift = 0x69, .overflow = 0x80 },
> +		[0xd4] = { .rshift = 0x6a, .overflow = 0x00 },
> +		[0xd5] = { .rshift = 0x6a, .overflow = 0x80 },
> +		[0xd6] = { .rshift = 0x6b, .overflow = 0x00 },
> +		[0xd7] = { .rshift = 0x6b, .overflow = 0x80 },
> +		[0xd8] = { .rshift = 0x6c, .overflow = 0x00 },
> +		[0xd9] = { .rshift = 0x6c, .overflow = 0x80 },
> +		[0xda] = { .rshift = 0x6d, .overflow = 0x00 },
> +		[0xdb] = { .rshift = 0x6d, .overflow = 0x80 },
> +		[0xdc] = { .rshift = 0x6e, .overflow = 0x00 },
> +		[0xdd] = { .rshift = 0x6e, .overflow = 0x80 },
> +		[0xde] = { .rshift = 0x6f, .overflow = 0x00 },
> +		[0xdf] = { .rshift = 0x6f, .overflow = 0x80 },
> +		[0xe0] = { .rshift = 0x70, .overflow = 0x00 },
> +		[0xe1] = { .rshift = 0x70, .overflow = 0x80 },
> +		[0xe2] = { .rshift = 0x71, .overflow = 0x00 },
> +		[0xe3] = { .rshift = 0x71, .overflow = 0x80 },
> +		[0xe4] = { .rshift = 0x72, .overflow = 0x00 },
> +		[0xe5] = { .rshift = 0x72, .overflow = 0x80 },
> +		[0xe6] = { .rshift = 0x73, .overflow = 0x00 },
> +		[0xe7] = { .rshift = 0x73, .overflow = 0x80 },
> +		[0xe8] = { .rshift = 0x74, .overflow = 0x00 },
> +		[0xe9] = { .rshift = 0x74, .overflow = 0x80 },
> +		[0xea] = { .rshift = 0x75, .overflow = 0x00 },
> +		[0xeb] = { .rshift = 0x75, .overflow = 0x80 },
> +		[0xec] = { .rshift = 0x76, .overflow = 0x00 },
> +		[0xed] = { .rshift = 0x76, .overflow = 0x80 },
> +		[0xee] = { .rshift = 0x77, .overflow = 0x00 },
> +		[0xef] = { .rshift = 0x77, .overflow = 0x80 },
> +		[0xf0] = { .rshift = 0x78, .overflow = 0x00 },
> +		[0xf1] = { .rshift = 0x78, .overflow = 0x80 },
> +		[0xf2] = { .rshift = 0x79, .overflow = 0x00 },
> +		[0xf3] = { .rshift = 0x79, .overflow = 0x80 },
> +		[0xf4] = { .rshift = 0x7a, .overflow = 0x00 },
> +		[0xf5] = { .rshift = 0x7a, .overflow = 0x80 },
> +		[0xf6] = { .rshift = 0x7b, .overflow = 0x00 },
> +		[0xf7] = { .rshift = 0x7b, .overflow = 0x80 },
> +		[0xf8] = { .rshift = 0x7c, .overflow = 0x00 },
> +		[0xf9] = { .rshift = 0x7c, .overflow = 0x80 },
> +		[0xfa] = { .rshift = 0x7d, .overflow = 0x00 },
> +		[0xfb] = { .rshift = 0x7d, .overflow = 0x80 },
> +		[0xfc] = { .rshift = 0x7e, .overflow = 0x00 },
> +		[0xfd] = { .rshift = 0x7e, .overflow = 0x80 },
> +		[0xfe] = { .rshift = 0x7f, .overflow = 0x00 },
> +		[0xff] = { .rshift = 0x7f, .overflow = 0x80 },
> +	};
> +	uint8_t i;
> +	uint8_t overflow = 0;
> +
> +	for (i = 0; i < AES_BLOCK_SIZE; i++) {
> +		const struct aes_block_rshift_entry *e = &aes_block_rshift_table[in[i]];
> +		out[i] = e->rshift | overflow;
> +		overflow = e->overflow;
> +	}
> +}
>  #endif /* LIB_CRYPTO_AES_H */
> -- 
> 1.9.1
> 
> 
> From 229affe4d8d97015aa6f88d45747099bbde948bc Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 11:29:47 +0200
> Subject: [PATCH 06/11] lib/crypto: optimize aes_cmac_128
> 
> - We avoid variables in order to do a lazy cleanup
>   in aes_cmac_128_final() via ZERO_STRUCTP(ctx)
> - We avoid unused memcpy() calls
> - We use the optimized aes_block_{xor,lshift}() functions
> - Align AES_BLOCK_SIZE arrays to 8 bytes
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_cmac_128.c | 104 +++++++++-------------------------------------
>  lib/crypto/aes_cmac_128.h |   6 +++
>  2 files changed, 25 insertions(+), 85 deletions(-)
> 
> diff --git a/lib/crypto/aes_cmac_128.c b/lib/crypto/aes_cmac_128.c
> index b630eea..5d71e82 100644
> --- a/lib/crypto/aes_cmac_128.c
> +++ b/lib/crypto/aes_cmac_128.c
> @@ -33,92 +33,42 @@ static const uint8_t const_Rb[] = {
>  
>  #define _MSB(x) (((x)[0] & 0x80)?1:0)
>  
> -static inline void aes_cmac_128_left_shift_1(const uint8_t in[AES_BLOCK_SIZE],
> -					     uint8_t out[AES_BLOCK_SIZE])
> -{
> -	uint8_t overflow = 0;
> -	int8_t i;
> -
> -	for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
> -		out[i] = in[i] << 1;
> -
> -		out[i] |= overflow;
> -
> -		overflow = _MSB(&in[i]);
> -	}
> -}
> -
> -static inline void aes_cmac_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
> -				    const uint8_t in2[AES_BLOCK_SIZE],
> -				    uint8_t out[AES_BLOCK_SIZE])
> -{
> -	uint8_t i;
> -
> -	for (i = 0; i < AES_BLOCK_SIZE; i++) {
> -		out[i] = in1[i] ^ in2[i];
> -	}
> -}
> -
>  void aes_cmac_128_init(struct aes_cmac_128_context *ctx,
>  		       const uint8_t K[AES_BLOCK_SIZE])
>  {
> -	uint8_t L[AES_BLOCK_SIZE];
> -
>  	ZERO_STRUCTP(ctx);
>  
>  	AES_set_encrypt_key(K, 128, &ctx->aes_key);
>  
>  	/* step 1 - generate subkeys k1 and k2 */
>  
> -	AES_encrypt(const_Zero, L, &ctx->aes_key);
> +	AES_encrypt(const_Zero, ctx->L, &ctx->aes_key);
>  
> -	if (_MSB(L) == 0) {
> -		aes_cmac_128_left_shift_1(L, ctx->K1);
> +	if (_MSB(ctx->L) == 0) {
> +		aes_block_lshift(ctx->L, ctx->K1);
>  	} else {
> -		uint8_t tmp_block[AES_BLOCK_SIZE];
> -
> -		aes_cmac_128_left_shift_1(L, tmp_block);
> -		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K1);
> -		ZERO_STRUCT(tmp_block);
> +		aes_block_lshift(ctx->L, ctx->tmp);
> +		aes_block_xor(ctx->tmp, const_Rb, ctx->K1);
>  	}
>  
>  	if (_MSB(ctx->K1) == 0) {
> -		aes_cmac_128_left_shift_1(ctx->K1, ctx->K2);
> +		aes_block_lshift(ctx->K1, ctx->K2);
>  	} else {
> -		uint8_t tmp_block[AES_BLOCK_SIZE];
> -
> -		aes_cmac_128_left_shift_1(ctx->K1, tmp_block);
> -		aes_cmac_128_xor(tmp_block, const_Rb, ctx->K2);
> -		ZERO_STRUCT(tmp_block);
> +		aes_block_lshift(ctx->K1, ctx->tmp);
> +		aes_block_xor(ctx->tmp, const_Rb, ctx->K2);
>  	}
> -
> -	ZERO_STRUCT(L);
>  }
>  
>  void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
> -			 const uint8_t *_msg, size_t _msg_len)
> +			 const uint8_t *msg, size_t msg_len)
>  {
> -	uint8_t tmp_block[AES_BLOCK_SIZE];
> -	uint8_t Y[AES_BLOCK_SIZE];
> -	const uint8_t *msg = _msg;
> -	size_t msg_len = _msg_len;
> -
> -	/*
> -	 * copy the remembered last block
> -	 */
> -	ZERO_STRUCT(tmp_block);
> -	if (ctx->last_len) {
> -		memcpy(tmp_block, ctx->last, ctx->last_len);
> -	}
> -
>  	/*
>  	 * check if we expand the block
>  	 */
>  	if (ctx->last_len < AES_BLOCK_SIZE) {
>  		size_t len = MIN(AES_BLOCK_SIZE - ctx->last_len, msg_len);
>  
> -		memcpy(&tmp_block[ctx->last_len], msg, len);
> -		memcpy(ctx->last, tmp_block, AES_BLOCK_SIZE);
> +		memcpy(&ctx->last[ctx->last_len], msg, len);
>  		msg += len;
>  		msg_len -= len;
>  		ctx->last_len += len;
> @@ -126,59 +76,43 @@ void aes_cmac_128_update(struct aes_cmac_128_context *ctx,
>  
>  	if (msg_len == 0) {
>  		/* if it is still the last block, we are done */
> -		ZERO_STRUCT(tmp_block);
>  		return;
>  	}
>  
>  	/*
> -	 * It is not the last block anymore
> -	 */
> -	ZERO_STRUCT(ctx->last);
> -	ctx->last_len = 0;
> -
> -	/*
>  	 * now checksum everything but the last block
>  	 */
> -	aes_cmac_128_xor(ctx->X, tmp_block, Y);
> -	AES_encrypt(Y, ctx->X, &ctx->aes_key);
> +	aes_block_xor(ctx->X, ctx->last, ctx->Y);
> +	AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
>  
>  	while (msg_len > AES_BLOCK_SIZE) {
> -		memcpy(tmp_block, msg, AES_BLOCK_SIZE);
> +		aes_block_xor(ctx->X, msg, ctx->Y);
> +		AES_encrypt(ctx->Y, ctx->X, &ctx->aes_key);
>  		msg += AES_BLOCK_SIZE;
>  		msg_len -= AES_BLOCK_SIZE;
> -
> -		aes_cmac_128_xor(ctx->X, tmp_block, Y);
> -		AES_encrypt(Y, ctx->X, &ctx->aes_key);
>  	}
>  
>  	/*
>  	 * copy the last block, it will be processed in
>  	 * aes_cmac_128_final().
>  	 */
> +	ZERO_STRUCT(ctx->last);
>  	memcpy(ctx->last, msg, msg_len);
>  	ctx->last_len = msg_len;
> -
> -	ZERO_STRUCT(tmp_block);
> -	ZERO_STRUCT(Y);
>  }
>  
>  void aes_cmac_128_final(struct aes_cmac_128_context *ctx,
>  			uint8_t T[AES_BLOCK_SIZE])
>  {
> -	uint8_t tmp_block[AES_BLOCK_SIZE];
> -	uint8_t Y[AES_BLOCK_SIZE];
> -
>  	if (ctx->last_len < AES_BLOCK_SIZE) {
>  		ctx->last[ctx->last_len] = 0x80;
> -		aes_cmac_128_xor(ctx->last, ctx->K2, tmp_block);
> +		aes_block_xor(ctx->last, ctx->K2, ctx->tmp);
>  	} else {
> -		aes_cmac_128_xor(ctx->last, ctx->K1, tmp_block);
> +		aes_block_xor(ctx->last, ctx->K1, ctx->tmp);
>  	}
>  
> -	aes_cmac_128_xor(tmp_block, ctx->X, Y);
> -	AES_encrypt(Y, T, &ctx->aes_key);
> +	aes_block_xor(ctx->tmp, ctx->X, ctx->Y);
> +	AES_encrypt(ctx->Y, T, &ctx->aes_key);
>  
> -	ZERO_STRUCT(tmp_block);
> -	ZERO_STRUCT(Y);
>  	ZERO_STRUCTP(ctx);
>  }
> diff --git a/lib/crypto/aes_cmac_128.h b/lib/crypto/aes_cmac_128.h
> index 28117a0..9bcf951 100644
> --- a/lib/crypto/aes_cmac_128.h
> +++ b/lib/crypto/aes_cmac_128.h
> @@ -22,10 +22,16 @@
>  struct aes_cmac_128_context {
>  	AES_KEY aes_key;
>  
> +	uint64_t __align;
> +
>  	uint8_t K1[AES_BLOCK_SIZE];
>  	uint8_t K2[AES_BLOCK_SIZE];
>  
> +	uint8_t L[AES_BLOCK_SIZE];
>  	uint8_t X[AES_BLOCK_SIZE];
> +	uint8_t Y[AES_BLOCK_SIZE];
> +
> +	uint8_t tmp[AES_BLOCK_SIZE];
>  
>  	uint8_t last[AES_BLOCK_SIZE];
>  	size_t last_len;
> -- 
> 1.9.1
> 
> 
> From 479b414c880c96fc31e69f0b269016b1748970d9 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 11:29:47 +0200
> Subject: [PATCH 07/11] lib/crypto: optimize aes_ccm_128
> 
> - We avoid variables in order to do a lazy cleanup
>   in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
> - We use the optimized aes_block_xor() function
> - We reuse A_i instead of rebuilding it everything completely.
> - Align AES_BLOCK_SIZE arrays to 8 bytes
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_ccm_128.c | 107 +++++++++++++++++++++++++++--------------------
>  lib/crypto/aes_ccm_128.h |   8 +++-
>  2 files changed, 67 insertions(+), 48 deletions(-)
> 
> diff --git a/lib/crypto/aes_ccm_128.c b/lib/crypto/aes_ccm_128.c
> index 94b9803..04ae565 100644
> --- a/lib/crypto/aes_ccm_128.c
> +++ b/lib/crypto/aes_ccm_128.c
> @@ -24,24 +24,11 @@
>  #define M_ ((AES_CCM_128_M - 2) / 2)
>  #define L_ (AES_CCM_128_L - 1)
>  
> -static inline void aes_ccm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
> -				   const uint8_t in2[AES_BLOCK_SIZE],
> -				   uint8_t out[AES_BLOCK_SIZE])
> -{
> -	uint8_t i;
> -
> -	for (i = 0; i < AES_BLOCK_SIZE; i++) {
> -		out[i] = in1[i] ^ in2[i];
> -	}
> -}
> -
>  void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
>  		      const uint8_t K[AES_BLOCK_SIZE],
>  		      const uint8_t N[AES_CCM_128_NONCE_SIZE],
>  		      size_t a_total, size_t m_total)
>  {
> -	uint8_t B_0[AES_BLOCK_SIZE];
> -
>  	ZERO_STRUCTP(ctx);
>  
>  	AES_set_encrypt_key(K, 128, &ctx->aes_key);
> @@ -52,22 +39,23 @@ void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
>  	/*
>  	 * prepare B_0
>  	 */
> -	B_0[0]  = L_;
> -	B_0[0] += 8 * M_;
> +	ctx->B_i[0]  = L_;
> +	ctx->B_i[0] += 8 * M_;
>  	if (a_total > 0) {
> -		B_0[0] += 64;
> +		ctx->B_i[0] += 64;
>  	}
> -	memcpy(&B_0[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
> -	RSIVAL(B_0, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
> +	memcpy(&ctx->B_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
> +	RSIVAL(ctx->B_i, (AES_BLOCK_SIZE - AES_CCM_128_L), m_total);
>  
>  	/*
>  	 * prepare X_1
>  	 */
> -	AES_encrypt(B_0, ctx->X_i, &ctx->aes_key);
> +	AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
>  
>  	/*
>  	 * prepare B_1
>  	 */
> +	ZERO_STRUCT(ctx->B_i);
>  	if (a_total >= UINT32_MAX) {
>  		RSSVAL(ctx->B_i, 0, 0xFFFF);
>  		RSBVAL(ctx->B_i, 2, (uint64_t)a_total);
> @@ -81,6 +69,12 @@ void aes_ccm_128_init(struct aes_ccm_128_context *ctx,
>  		ctx->B_i_ofs = 2;
>  	}
>  
> +	/*
> +	 * prepare A_i
> +	 */
> +	ctx->A_i[0]  = L_;
> +	memcpy(&ctx->A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
> +
>  	ctx->S_i_ofs = AES_BLOCK_SIZE;
>  }
>  
> @@ -95,45 +89,59 @@ void aes_ccm_128_update(struct aes_ccm_128_context *ctx,
>  		remain = &ctx->m_remain;
>  	}
>  
> -	while (v_len > 0) {
> +	if (unlikely(v_len > *remain)) {
> +		abort();
> +	}
> +
> +	if (ctx->B_i_ofs > 0) {
>  		size_t n = MIN(AES_BLOCK_SIZE - ctx->B_i_ofs, v_len);
> -		bool more = true;
>  
>  		memcpy(&ctx->B_i[ctx->B_i_ofs], v, n);
>  		v += n;
>  		v_len -= n;
>  		ctx->B_i_ofs += n;
>  		*remain -= n;
> +	}
>  
> -		if (ctx->B_i_ofs == AES_BLOCK_SIZE) {
> -			more = false;
> -		} else if (*remain == 0) {
> -			more = false;
> -		}
> -
> -		if (more) {
> -			continue;
> -		}
> +	if ((ctx->B_i_ofs == AES_BLOCK_SIZE) || (*remain == 0)) {
> +		aes_block_xor(ctx->X_i, ctx->B_i, ctx->B_i);
> +		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
> +		ctx->B_i_ofs = 0;
> +	}
>  
> -		aes_ccm_128_xor(ctx->X_i, ctx->B_i, ctx->B_i);
> +	while (v_len >= AES_BLOCK_SIZE) {
> +		aes_block_xor(ctx->X_i, v, ctx->B_i);
>  		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
> +		ctx->B_i_ofs = 0;
> +		v += AES_BLOCK_SIZE;
> +		v_len -= AES_BLOCK_SIZE;
> +		*remain -= AES_BLOCK_SIZE;
> +	}
>  
> +	if (v_len > 0) {
>  		ZERO_STRUCT(ctx->B_i);
> +		memcpy(ctx->B_i, v, v_len);
> +		ctx->B_i_ofs += v_len;
> +		*remain -= v_len;
> +	}
> +
> +	if (*remain > 0) {
> +		return;
> +	}
> +
> +	if (ctx->B_i_ofs > 0) {
> +		aes_block_xor(ctx->X_i, ctx->B_i, ctx->B_i);
> +		AES_encrypt(ctx->B_i, ctx->X_i, &ctx->aes_key);
>  		ctx->B_i_ofs = 0;
>  	}
>  }
>  
> -static void aes_ccm_128_S_i(struct aes_ccm_128_context *ctx,
> -			    uint8_t S_i[AES_BLOCK_SIZE],
> -			    size_t i)
> +static inline void aes_ccm_128_S_i(struct aes_ccm_128_context *ctx,
> +				   uint8_t S_i[AES_BLOCK_SIZE],
> +				   size_t i)
>  {
> -	uint8_t A_i[AES_BLOCK_SIZE];
> -
> -	A_i[0]  = L_;
> -	memcpy(&A_i[1], ctx->nonce, AES_CCM_128_NONCE_SIZE);
> -	RSIVAL(A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
> -
> -	AES_encrypt(A_i, S_i, &ctx->aes_key);
> +	RSIVAL(ctx->A_i, (AES_BLOCK_SIZE - AES_CCM_128_L), i);
> +	AES_encrypt(ctx->A_i, S_i, &ctx->aes_key);
>  }
>  
>  void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
> @@ -146,6 +154,15 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
>  			ctx->S_i_ofs = 0;
>  		}
>  
> +		if (likely(ctx->S_i_ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
> +			aes_block_xor(m, ctx->S_i, m);
> +			m += AES_BLOCK_SIZE;
> +			m_len -= AES_BLOCK_SIZE;
> +			ctx->S_i_ctr += 1;
> +			aes_ccm_128_S_i(ctx, ctx->S_i, ctx->S_i_ctr);
> +			continue;
> +		}
> +
>  		m[0] ^= ctx->S_i[ctx->S_i_ofs];
>  		m += 1;
>  		m_len -= 1;
> @@ -156,15 +173,13 @@ void aes_ccm_128_crypt(struct aes_ccm_128_context *ctx,
>  void aes_ccm_128_digest(struct aes_ccm_128_context *ctx,
>  			uint8_t digest[AES_BLOCK_SIZE])
>  {
> -	uint8_t S_0[AES_BLOCK_SIZE];
> -
> -	aes_ccm_128_S_i(ctx, S_0, 0);
> +	/* prepare S_0 */
> +	aes_ccm_128_S_i(ctx, ctx->S_i, 0);
>  
>  	/*
>  	 * note X_i is T here
>  	 */
> -	aes_ccm_128_xor(ctx->X_i, S_0, digest);
> +	aes_block_xor(ctx->X_i, ctx->S_i, digest);
>  
> -	ZERO_STRUCT(S_0);
>  	ZERO_STRUCTP(ctx);
>  }
> diff --git a/lib/crypto/aes_ccm_128.h b/lib/crypto/aes_ccm_128.h
> index a98c754..1382ee7 100644
> --- a/lib/crypto/aes_ccm_128.h
> +++ b/lib/crypto/aes_ccm_128.h
> @@ -26,16 +26,20 @@
>  
>  struct aes_ccm_128_context {
>  	AES_KEY aes_key;
> +
>  	uint8_t nonce[AES_CCM_128_NONCE_SIZE];
>  
>  	size_t a_remain;
>  	size_t m_remain;
>  
> +	uint64_t __align;
> +
>  	uint8_t X_i[AES_BLOCK_SIZE];
>  	uint8_t B_i[AES_BLOCK_SIZE];
> -	size_t B_i_ofs;
> -
> +	uint8_t A_i[AES_BLOCK_SIZE];
>  	uint8_t S_i[AES_BLOCK_SIZE];
> +
> +	size_t B_i_ofs;
>  	size_t S_i_ofs;
>  	size_t S_i_ctr;
>  };
> -- 
> 1.9.1
> 
> 
> From cfe9e7843c6abdf86118fc851f4c6283a1878b1c Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 11:29:47 +0200
> Subject: [PATCH 08/11] lib/crypto: optimize aes_gcm_128
> 
> - We avoid variables in order to do a lazy cleanup
>   in aes_ccm_128_digest() via ZERO_STRUCTP(ctx)
> - We use the optimized aes_block_{xor,rshift}() functions
> - Align AES_BLOCK_SIZE arrays to 8 bytes
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_gcm_128.c | 46 ++++++++++++++++------------------------------
>  lib/crypto/aes_gcm_128.h | 15 +++++++++------
>  2 files changed, 25 insertions(+), 36 deletions(-)
> 
> diff --git a/lib/crypto/aes_gcm_128.c b/lib/crypto/aes_gcm_128.c
> index f59d659..bfbf176 100644
> --- a/lib/crypto/aes_gcm_128.c
> +++ b/lib/crypto/aes_gcm_128.c
> @@ -30,35 +30,12 @@ static inline void aes_gcm_128_inc32(uint8_t inout[AES_BLOCK_SIZE])
>  	RSIVAL(inout, AES_BLOCK_SIZE - 4, v);
>  }
>  
> -static inline void aes_gcm_128_xor(const uint8_t in1[AES_BLOCK_SIZE],
> -				   const uint8_t in2[AES_BLOCK_SIZE],
> -				   uint8_t out[AES_BLOCK_SIZE])
> -{
> -	uint8_t i;
> -
> -	for (i = 0; i < AES_BLOCK_SIZE; i++) {
> -		out[i] = in1[i] ^ in2[i];
> -	}
> -}
> -
> -static inline void aes_gcm_128_rightshift(uint8_t x[AES_BLOCK_SIZE])
> -{
> -	int8_t i;
> -
> -	for (i = AES_BLOCK_SIZE - 1; i >=0; i--) {
> -		x[i] >>= 1;
> -		if (i > 0) {
> -			x[i] |= (x[i-1] & 1) << 7;
> -		}
> -	}
> -}
> -
>  static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
>  				   const uint8_t y[AES_BLOCK_SIZE],
> +				   uint8_t v[AES_BLOCK_SIZE],
>  				   uint8_t z[AES_BLOCK_SIZE])
>  {
>  	uint8_t i;
> -	uint8_t v[AES_BLOCK_SIZE];
>  	/* 11100001 || 0^120 */
>  	static const uint8_t r[AES_BLOCK_SIZE] = {
>  		0xE1, 0x00, 0x00, 0x00,
> @@ -75,12 +52,12 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
>  		for (mask = 0x80; mask != 0 ; mask >>= 1) {
>  			uint8_t v_lsb = v[AES_BLOCK_SIZE-1] & 1;
>  			if (x[i] & mask) {
> -				aes_gcm_128_xor(z, v, z);
> +				aes_block_xor(z, v, z);
>  			}
>  
> -			aes_gcm_128_rightshift(v);
> +			aes_block_rshift(v, v);
>  			if (v_lsb != 0) {
> -				aes_gcm_128_xor(v, r, v);
> +				aes_block_xor(v, r, v);
>  			}
>  		}
>  	}
> @@ -89,8 +66,8 @@ static inline void aes_gcm_128_mul(const uint8_t x[AES_BLOCK_SIZE],
>  static inline void aes_gcm_128_ghash_block(struct aes_gcm_128_context *ctx,
>  					   const uint8_t in[AES_BLOCK_SIZE])
>  {
> -	aes_gcm_128_xor(ctx->Y, in, ctx->y.block);
> -	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->Y);
> +	aes_block_xor(ctx->Y, in, ctx->y.block);
> +	aes_gcm_128_mul(ctx->y.block, ctx->H, ctx->v.block, ctx->Y);
>  }
>  
>  void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
> @@ -184,6 +161,15 @@ static inline void aes_gcm_128_crypt_tmp(struct aes_gcm_128_context *ctx,
>  			tmp->ofs = 0;
>  		}
>  
> +		if (likely(tmp->ofs == 0 && m_len >= AES_BLOCK_SIZE)) {
> +			aes_block_xor(m, tmp->block, m);
> +			m += AES_BLOCK_SIZE;
> +			m_len -= AES_BLOCK_SIZE;
> +			aes_gcm_128_inc32(ctx->CB);
> +			AES_encrypt(ctx->CB, tmp->block, &ctx->aes_key);
> +			continue;
> +		}
> +
>  		m[0] ^= tmp->block[tmp->ofs];
>  		m += 1;
>  		m_len -= 1;
> @@ -215,7 +201,7 @@ void aes_gcm_128_digest(struct aes_gcm_128_context *ctx,
>  	aes_gcm_128_ghash_block(ctx, ctx->AC);
>  
>  	AES_encrypt(ctx->J0, ctx->c.block, &ctx->aes_key);
> -	aes_gcm_128_xor(ctx->c.block, ctx->Y, T);
> +	aes_block_xor(ctx->c.block, ctx->Y, T);
>  
>  	ZERO_STRUCTP(ctx);
>  }
> diff --git a/lib/crypto/aes_gcm_128.h b/lib/crypto/aes_gcm_128.h
> index 278b6db..8df11c2 100644
> --- a/lib/crypto/aes_gcm_128.h
> +++ b/lib/crypto/aes_gcm_128.h
> @@ -24,17 +24,20 @@
>  
>  struct aes_gcm_128_context {
>  	AES_KEY aes_key;
> +
> +	uint64_t __align;
> +
> +	struct aes_gcm_128_tmp {
> +		size_t ofs;
> +		size_t total;
> +		uint8_t block[AES_BLOCK_SIZE];
> +	} A, C, c, v, y;
> +
>  	uint8_t H[AES_BLOCK_SIZE];
>  	uint8_t J0[AES_BLOCK_SIZE];
>  	uint8_t CB[AES_BLOCK_SIZE];
>  	uint8_t Y[AES_BLOCK_SIZE];
>  	uint8_t AC[AES_BLOCK_SIZE];
> -
> -	struct aes_gcm_128_tmp {
> -		uint8_t block[AES_BLOCK_SIZE];
> -		size_t ofs;
> -		size_t total;
> -	} A, C, c, y;
>  };
>  
>  void aes_gcm_128_init(struct aes_gcm_128_context *ctx,
> -- 
> 1.9.1
> 
> 
> From ab4823f29187d2dcdf8cc4f5e5ec66cfb99efc13 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Fri, 14 Aug 2015 23:45:07 +0200
> Subject: [PATCH 09/11] lib/crypto: make use of aes_test.h in
>  aes_gcm_128_test.c
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes_gcm_128_test.c | 358 ++++++++++++++++--------------------------
>  1 file changed, 137 insertions(+), 221 deletions(-)
> 
> diff --git a/lib/crypto/aes_gcm_128_test.c b/lib/crypto/aes_gcm_128_test.c
> index f70d851..cb60cd0 100644
> --- a/lib/crypto/aes_gcm_128_test.c
> +++ b/lib/crypto/aes_gcm_128_test.c
> @@ -19,100 +19,141 @@
>  #include "replace.h"
>  #include "../lib/util/samba_util.h"
>  #include "../lib/crypto/crypto.h"
> +#include "../lib/crypto/aes_test.h"
>  
> +#ifndef AES_GCM_128_ONLY_TESTVECTORS
>  struct torture_context;
> -bool torture_local_crypto_aes_gcm_128(struct torture_context *torture);
> +bool torture_local_crypto_aes_gcm_128(struct torture_context *tctx);
>  
>  /*
>   This uses the test values from ...
>  */
> -bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
> +bool torture_local_crypto_aes_gcm_128(struct torture_context *tctx)
>  {
>  	bool ret = true;
>  	uint32_t i;
> -	struct {
> -		DATA_BLOB K;
> -		DATA_BLOB IV;
> -		DATA_BLOB A;
> -		DATA_BLOB P;
> -		DATA_BLOB C;
> -		DATA_BLOB T;
> -	} testarray[5];
> -
> -	TALLOC_CTX *tctx = talloc_new(torture);
> -	if (!tctx) { return false; };
> -
> -	ZERO_STRUCT(testarray);
> -
> -	testarray[0].K = strhex_to_data_blob(tctx,
> -				"00000000000000000000000000000000");
> -	testarray[0].IV = strhex_to_data_blob(tctx,
> -				"000000000000000000000000");
> -	testarray[0].A = data_blob_null;
> -	testarray[0].P = data_blob_null;
> -	testarray[0].C = data_blob_null;
> -	testarray[0].T = strhex_to_data_blob(tctx,
> -				"58e2fccefa7e3061367f1d57a4e7455a");
> -
> -	testarray[1].K = strhex_to_data_blob(tctx,
> -				"00000000000000000000000000000000");
> -	testarray[1].IV = strhex_to_data_blob(tctx,
> -				"000000000000000000000000");
> -	testarray[1].A = data_blob_null;
> -	testarray[1].P = strhex_to_data_blob(tctx,
> -				"00000000000000000000000000000000");
> -	testarray[1].C = strhex_to_data_blob(tctx,
> -				"0388dace60b6a392f328c2b971b2fe78");
> -	testarray[1].T = strhex_to_data_blob(tctx,
> -				"ab6e47d42cec13bdf53a67b21257bddf");
> -
> -	testarray[2].K = strhex_to_data_blob(tctx,
> -				"feffe9928665731c6d6a8f9467308308");
> -	testarray[2].IV = strhex_to_data_blob(tctx,
> -				"cafebabefacedbaddecaf888");
> -	testarray[2].A = data_blob_null;
> -	testarray[2].P = strhex_to_data_blob(tctx,
> -				"d9313225f88406e5a55909c5aff5269a"
> -				"86a7a9531534f7da2e4c303d8a318a72"
> -				"1c3c0c95956809532fcf0e2449a6b525"
> -				"b16aedf5aa0de657ba637b391aafd255");
> -	testarray[2].C = strhex_to_data_blob(tctx,
> -				"42831ec2217774244b7221b784d0d49c"
> -				"e3aa212f2c02a4e035c17e2329aca12e"
> -				"21d514b25466931c7d8f6a5aac84aa05"
> -				"1ba30b396a0aac973d58e091473f5985");
> -	testarray[2].T = strhex_to_data_blob(tctx,
> -				"4d5c2af327cd64a62cf35abd2ba6fab4");
> -
> -	testarray[3].K = strhex_to_data_blob(tctx,
> -				"feffe9928665731c6d6a8f9467308308");
> -	testarray[3].IV = strhex_to_data_blob(tctx,
> -				"cafebabefacedbaddecaf888");
> -	testarray[3].A = strhex_to_data_blob(tctx,
> -				"feedfacedeadbeeffeedfacedeadbeef"
> -				"abaddad2");
> -	testarray[3].P = strhex_to_data_blob(tctx,
> -				"d9313225f88406e5a55909c5aff5269a"
> -				"86a7a9531534f7da2e4c303d8a318a72"
> -				"1c3c0c95956809532fcf0e2449a6b525"
> -				"b16aedf5aa0de657ba637b39");
> -	testarray[3].C = strhex_to_data_blob(tctx,
> -				"42831ec2217774244b7221b784d0d49c"
> -				"e3aa212f2c02a4e035c17e2329aca12e"
> -				"21d514b25466931c7d8f6a5aac84aa05"
> -				"1ba30b396a0aac973d58e091");
> -	testarray[3].T = strhex_to_data_blob(tctx,
> -				"5bc94fbc3221a5db94fae95ae7121a47");
> -
> -	for (i=0; testarray[i].T.length != 0; i++) {
> +	struct aes_mode_testvector testarray[] = {
> +#endif /* AES_GCM_128_ONLY_TESTVECTORS */
> +#define AES_GCM_128_TESTVECTOR(_k, _n, _a, _p, _c, _t) \
> +	AES_MODE_TESTVECTOR(aes_gcm_128, _k, _n, _a, _p, _c, _t)
> +
> +	AES_GCM_128_TESTVECTOR(
> +		/* K */
> +		"8BF9FBC2B8149484FF11AB1F3A544FF6",
> +		/* N */
> +		"010000000000000077F7A8FF",
> +		/* A */
> +		"010000000000000077F7A80000000000"
> +		"A8000000000001004100002C00980000",
> +		/* P */
> +		"FE534D4240000100000000000B00811F"
> +		"00000000000000000600000000000000"
> +		"00000000010000004100002C00980000"
> +		"00000000000000000000000000000000"
> +		"3900000094010600FFFFFFFFFFFFFFFF"
> +		"FFFFFFFFFFFFFFFF7800000030000000"
> +		"000000007800000000000000FFFF0000"
> +		"0100000000000000"
> +		"03005C003100370032002E0033003100"
> +		"2E0039002E003100380033005C006E00"
> +		"650074006C006F0067006F006E000000",
> +		/* C */
> +		"863C07C1FBFA82D741A080C97DF52CFF"
> +		"432A63A37E5ACFA3865AE4E6E422D502"
> +		"FA7C6FBB9A7418F28C43F00A3869F687"
> +		"257CA665E25E62A0F458C42AA9E95DC4"
> +		"6CB351A0A497FABB7DCE58FEE5B20B08"
> +		"522E0E701B112FB93B36E7A0FB084D35"
> +		"62C0F3FDF0421079DD96BBCCA40949B3"
> +		"A7FC1AA635A72384"
> +		"2037DE3CA6385465D1884B29D7140790"
> +		"88AD3E770E2528D527B302536B7E5B1B"
> +		"430E048230AFE785DB89F4D87FC1F816",
> +		/* T */
> +		"BC9B5871EBFA89ADE21439ACDCD65D22"
> +	),
> +	AES_GCM_128_TESTVECTOR(
> +		/* K */
> +		"00000000000000000000000000000000",
> +		/* N */
> +		"000000000000000000000000",
> +		/* A */
> +		"",
> +		/* P */
> +		"",
> +		/* C */
> +		"",
> +		/* T */
> +		"58e2fccefa7e3061367f1d57a4e7455a"
> +	),
> +	AES_GCM_128_TESTVECTOR(
> +		/* K */
> +		"00000000000000000000000000000000",
> +		/* N */
> +		"000000000000000000000000",
> +		/* A */
> +		"",
> +		/* P */
> +		"00000000000000000000000000000000",
> +		/* C */
> +		"0388dace60b6a392f328c2b971b2fe78",
> +		/* T */
> +		"ab6e47d42cec13bdf53a67b21257bddf"
> +	),
> +	AES_GCM_128_TESTVECTOR(
> +		/* K */
> +		"feffe9928665731c6d6a8f9467308308",
> +		/* N */
> +		"cafebabefacedbaddecaf888",
> +		/* A */
> +		"",
> +		/* P */
> +		"d9313225f88406e5a55909c5aff5269a"
> +		"86a7a9531534f7da2e4c303d8a318a72"
> +		"1c3c0c95956809532fcf0e2449a6b525"
> +		"b16aedf5aa0de657ba637b391aafd255",
> +		/* C */
> +		"42831ec2217774244b7221b784d0d49c"
> +		"e3aa212f2c02a4e035c17e2329aca12e"
> +		"21d514b25466931c7d8f6a5aac84aa05"
> +		"1ba30b396a0aac973d58e091473f5985",
> +		/* T */
> +		"4d5c2af327cd64a62cf35abd2ba6fab4"
> +	),
> +	AES_GCM_128_TESTVECTOR(
> +		/* K */
> +		"feffe9928665731c6d6a8f9467308308",
> +		/* N */
> +		"cafebabefacedbaddecaf888",
> +		/* A */
> +		"feedfacedeadbeeffeedfacedeadbeef"
> +		"abaddad2",
> +		/* P */
> +		"d9313225f88406e5a55909c5aff5269a"
> +		"86a7a9531534f7da2e4c303d8a318a72"
> +		"1c3c0c95956809532fcf0e2449a6b525"
> +		"b16aedf5aa0de657ba637b39",
> +		/* C */
> +		"42831ec2217774244b7221b784d0d49c"
> +		"e3aa212f2c02a4e035c17e2329aca12e"
> +		"21d514b25466931c7d8f6a5aac84aa05"
> +		"1ba30b396a0aac973d58e091",
> +		/* T */
> +		"5bc94fbc3221a5db94fae95ae7121a47"
> +	),
> +#ifndef AES_GCM_128_ONLY_TESTVECTORS
> +	};
> +
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
>  		DATA_BLOB C;
>  		int e;
>  
>  		C = data_blob_dup_talloc(tctx, testarray[i].P);
>  
> -		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
> +		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
>  		aes_gcm_128_updateA(&ctx,
>  				    testarray[i].A.data,
>  				    testarray[i].A.length);
> @@ -122,61 +163,30 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  
>  		e = memcmp(testarray[i].T.data, T, sizeof(T));
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("C1\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("C2\n");
> -			dump_data(0, C.data, C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  
>  		e = memcmp(testarray[i].C.data, C.data, C.length);
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("C1\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("C2\n");
> -			dump_data(0, C.data, C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  	}
>  
> -	for (i=0; testarray[i].T.length != 0; i++) {
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
>  		DATA_BLOB C;
>  		int e;
>  		size_t j;
>  
>  		C = data_blob_dup_talloc(tctx, testarray[i].P);
>  
> -		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
> +		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
>  		for (j=0; j < testarray[i].A.length; j++) {
>  			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
>  		}
> @@ -188,61 +198,30 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  
>  		e = memcmp(testarray[i].T.data, T, sizeof(T));
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("C1\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("C2\n");
> -			dump_data(0, C.data, C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  
>  		e = memcmp(testarray[i].C.data, C.data, C.length);
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("C1\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("C2\n");
> -			dump_data(0, C.data, C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], NULL, &C, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  	}
>  
> -	for (i=0; testarray[i].T.length != 0; i++) {
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
>  		DATA_BLOB P;
>  		int e;
>  		size_t j;
>  
>  		P = data_blob_dup_talloc(tctx, testarray[i].C);
>  
> -		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
> +		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
>  		for (j=0; j < testarray[i].A.length; j++) {
>  			aes_gcm_128_updateA(&ctx, &testarray[i].A.data[j], 1);
>  		}
> @@ -254,60 +233,29 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  
>  		e = memcmp(testarray[i].T.data, T, sizeof(T));
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P1\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("P2\n");
> -			dump_data(0, P.data, P.length);
> -			printf("C\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  
>  		e = memcmp(testarray[i].P.data, P.data, P.length);
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P1\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("P2\n");
> -			dump_data(0, P.data, P.length);
> -			printf("C\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  	}
>  
> -	for (i=0; testarray[i].T.length != 0; i++) {
> +	for (i=0; i < ARRAY_SIZE(testarray); i++) {
>  		struct aes_gcm_128_context ctx;
>  		uint8_t T[AES_BLOCK_SIZE];
> +		DATA_BLOB _T = data_blob_const(T, sizeof(T));
>  		DATA_BLOB P;
>  		int e;
>  
>  		P = data_blob_dup_talloc(tctx, testarray[i].C);
>  
> -		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].IV.data);
> +		aes_gcm_128_init(&ctx, testarray[i].K.data, testarray[i].N.data);
>  		aes_gcm_128_updateA(&ctx, testarray[i].A.data, testarray[i].A.length);
>  		aes_gcm_128_updateC(&ctx, P.data, P.length);
>  		aes_gcm_128_crypt(&ctx, P.data, P.length);
> @@ -315,52 +263,20 @@ bool torture_local_crypto_aes_gcm_128(struct torture_context *torture)
>  
>  		e = memcmp(testarray[i].T.data, T, sizeof(T));
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P1\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("P2\n");
> -			dump_data(0, P.data, P.length);
> -			printf("C\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  
>  		e = memcmp(testarray[i].P.data, P.data, P.length);
>  		if (e != 0) {
> -			printf("%s: aes_gcm_128 test[%u]: failed\n", __location__, i);
> -			printf("K\n");
> -			dump_data(0, testarray[i].K.data, testarray[i].K.length);
> -			printf("IV\n");
> -			dump_data(0, testarray[i].IV.data, testarray[i].IV.length);
> -			printf("A\n");
> -			dump_data(0, testarray[i].A.data, testarray[i].A.length);
> -			printf("P1\n");
> -			dump_data(0, testarray[i].P.data, testarray[i].P.length);
> -			printf("P2\n");
> -			dump_data(0, P.data, P.length);
> -			printf("C\n");
> -			dump_data(0, testarray[i].C.data, testarray[i].C.length);
> -			printf("T1\n");
> -			dump_data(0, testarray[i].T.data, testarray[i].T.length);
> -			printf("T2\n");
> -			dump_data(0, T, sizeof(T));
> +			aes_mode_testvector_debug(&testarray[i], &P, NULL, &_T);
>  			ret = false;
>  			goto fail;
>  		}
>  	}
>  
>   fail:
> -	talloc_free(tctx);
>  	return ret;
>  }
> +#endif /* AES_GCM_128_ONLY_TESTVECTORS */
> -- 
> 1.9.1
> 
> 
> From 75b2ce38d6ec5f8492ae7a81cd0fd378a0e30441 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 12:58:49 +0200
> Subject: [PATCH 10/11] lib/crypto: sync AES_cfb8_encrypt() from heimdal
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes.c | 35 ++++++++++++++++++-----------------
>  lib/crypto/aes.h | 10 +++++++---
>  2 files changed, 25 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
> index a47a456..f7f9688 100644
> --- a/lib/crypto/aes.c
> +++ b/lib/crypto/aes.c
> @@ -113,24 +113,25 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
>      }
>  }
>  
> -void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
> -		      size_t length, const AES_KEY *key,
> -		      uint8_t *iv, int forward)
> +void
> +AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
> +                 unsigned long size, const AES_KEY *key,
> +                 unsigned char *iv, int forward_encrypt)
>  {
> -	size_t i;
> +    int i;
>  
> -	for (i=0; i < length; i++) {
> -		uint8_t tiv[AES_BLOCK_SIZE*2];
> +    for (i = 0; i < size; i++) {
> +        unsigned char tmp[AES_BLOCK_SIZE + 1];
>  
> -		memcpy(tiv, iv, AES_BLOCK_SIZE);
> -		AES_encrypt(iv, iv, key);
> -		if (!forward) {
> -			tiv[AES_BLOCK_SIZE] = in[i];
> -		}
> -		out[i] = in[i] ^ iv[0];
> -		if (forward) {
> -			tiv[AES_BLOCK_SIZE] = out[i];
> -		}
> -		memcpy(iv, tiv+1, AES_BLOCK_SIZE);
> -	}
> +        memcpy(tmp, iv, AES_BLOCK_SIZE);
> +        AES_encrypt(iv, iv, key);
> +        if (!forward_encrypt) {
> +            tmp[AES_BLOCK_SIZE] = in[i];
> +        }
> +        out[i] = in[i] ^ iv[0];
> +        if (forward_encrypt) {
> +            tmp[AES_BLOCK_SIZE] = out[i];
> +        }
> +        memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
> +    }
>  }
> diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
> index 2cfb587..7487486 100644
> --- a/lib/crypto/aes.h
> +++ b/lib/crypto/aes.h
> @@ -42,6 +42,7 @@
>  #define AES_encrypt samba_AES_encrypt
>  #define AES_decrypt samba_AES_decrypt
>  #define AES_cbc_encrypt samba_AES_cbc_encrypt
> +#define AES_cfb8_encrypt samba_AES_cfb8_encrypt
>  
>  /*
>   *
> @@ -72,9 +73,12 @@ void AES_cbc_encrypt(const unsigned char *, unsigned char *,
>  		     const unsigned long, const AES_KEY *,
>  		     unsigned char *, int);
>  
> -void aes_cfb8_encrypt(const uint8_t *in, uint8_t *out,
> -		      size_t length, const AES_KEY *key,
> -		      uint8_t *iv, int forward);
> +void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
> +		      unsigned long size, const AES_KEY *key,
> +		      unsigned char *iv, int forward_encrypt);
> +
> +#define aes_cfb8_encrypt(in, out, size, key, iv, forward_encrypt) \
> +	AES_cfb8_encrypt(in, out, size, key, iv, forward_encrypt)
>  
>  #ifdef  __cplusplus
>  }
> -- 
> 1.9.1
> 
> 
> From d8b06cf16e51e37ddfce2d1d7abe7fb886b729d2 Mon Sep 17 00:00:00 2001
> From: Stefan Metzmacher <metze at samba.org>
> Date: Wed, 12 Aug 2015 12:58:49 +0200
> Subject: [PATCH 11/11] lib/crypto: make it possible to use only parts of
>  aes.[ch]
> 
> This can be used in order to optimize some parts later.
> 
> BUG: https://bugzilla.samba.org/show_bug.cgi?id=11451
> 
> Signed-off-by: Stefan Metzmacher <metze at samba.org>
> ---
>  lib/crypto/aes.c | 8 +++++++-
>  lib/crypto/aes.h | 7 +++++++
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
> index f7f9688..800a97e 100644
> --- a/lib/crypto/aes.c
> +++ b/lib/crypto/aes.c
> @@ -32,9 +32,10 @@
>   */
>  
>  #include "replace.h"
> +#include "aes.h"
>  
> +#ifdef SAMBA_RIJNDAEL
>  #include "rijndael-alg-fst.h"
> -#include "aes.h"
>  
>  int
>  AES_set_encrypt_key(const unsigned char *userkey, const int bits, AES_KEY *key)
> @@ -65,7 +66,9 @@ AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
>  {
>      rijndaelDecrypt(key->key, key->rounds, in, out);
>  }
> +#endif /* SAMBA_RIJNDAEL */
>  
> +#ifdef SAMBA_AES_CBC_ENCRYPT
>  void
>  AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
>  		unsigned long size, const AES_KEY *key,
> @@ -112,7 +115,9 @@ AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
>  	}
>      }
>  }
> +#endif /* SAMBA_AES_CBC_ENCRYPT */
>  
> +#ifdef SAMBA_AES_CFB8_ENCRYPT
>  void
>  AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
>                   unsigned long size, const AES_KEY *key,
> @@ -135,3 +140,4 @@ AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
>          memcpy(iv, &tmp[1], AES_BLOCK_SIZE);
>      }
>  }
> +#endif /* SAMBA_AES_CFB8_ENCRYPT */
> diff --git a/lib/crypto/aes.h b/lib/crypto/aes.h
> index 7487486..48ea764 100644
> --- a/lib/crypto/aes.h
> +++ b/lib/crypto/aes.h
> @@ -36,6 +36,11 @@
>  #ifndef LIB_CRYPTO_AES_H
>  #define LIB_CRYPTO_AES_H 1
>  
> +#define SAMBA_RIJNDAEL 1
> +#define SAMBA_AES_CBC_ENCRYPT 1
> +#define SAMBA_AES_CFB8_ENCRYPT 1
> +#define SAMBA_AES_BLOCK_XOR 1
> +
>  /* symbol renaming */
>  #define AES_set_encrypt_key samba_AES_set_encrypt_key
>  #define AES_set_decrypt_key samba_AES_decrypt_key
> @@ -84,6 +89,7 @@ void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
>  }
>  #endif
>  
> +#ifdef SAMBA_AES_BLOCK_XOR
>  static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
>  				 const uint8_t in2[AES_BLOCK_SIZE],
>  				 uint8_t out[AES_BLOCK_SIZE])
> @@ -111,6 +117,7 @@ static inline void aes_block_xor(const uint8_t in1[AES_BLOCK_SIZE],
>  		memcpy(out, o, AES_BLOCK_SIZE);
>  	}
>  }
> +#endif /* SAMBA_AES_BLOCK_XOR */
>  
>  static inline void aes_block_lshift(const uint8_t in[AES_BLOCK_SIZE],
>  				    uint8_t out[AES_BLOCK_SIZE])
> -- 
> 1.9.1
> 






More information about the samba-technical mailing list