Sandboxie/SandboxieTools/ImBox/dc/crypto_fast/xts_fast.c

436 lines
18 KiB
C

/*
*
* Copyright (c) 2010-2012
* ntldr <ntldr@diskcryptor.net> PGP key ID - 0x1B6A24550F33E44A
*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 3 as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <intrin.h>
#include <emmintrin.h>
//#include <excpt.h>
#include "xts_fast.h"
#include "aes_asm.h"
#include "aes_padlock.h"
#include "xts_aes_ni.h"
#include "xts_serpent_sse2.h"
#include "xts_serpent_avx.h"
typedef __declspec(align(1)) union _m128 {
unsigned long v32[4];
unsigned __int64 v64[2];
} m128;
static xts_proc aes_selected_encrypt;
static xts_proc aes_selected_decrypt;
static xts_proc serpent_selected_encrypt;
static xts_proc serpent_selected_decrypt;
#ifdef _M_X64
#define def_tweak \
unsigned __int64 t0, t1; m128
#define load_tweak() do { \
t0 = t.v64[0]; t1 = t.v64[1]; \
} while (0)
#define tweak_xor(_in, _out) do { \
((unsigned __int64*)(_out))[0] = ((unsigned __int64*)(_in))[0] ^ t0; \
((unsigned __int64*)(_out))[1] = ((unsigned __int64*)(_in))[1] ^ t1; \
} while (0)
#define next_tweak() do { \
cf = (t1 >> 63) * 135; \
t1 = (t1 << 1) | (t0 >> 63); \
t0 = (t0 << 1) ^ cf; \
} while (0)
#define copy_tweak(_buf) do { \
((unsigned __int64*)(_buf))[0] = t0; \
((unsigned __int64*)(_buf))[1] = t1; \
} while (0)
#else
#define def_tweak m128
#define load_tweak()
#define tweak_xor(_in, _out) do { \
((unsigned __int64*)(_out))[0] = ((unsigned __int64*)(_in))[0] ^ t.v64[0]; \
((unsigned __int64*)(_out))[1] = ((unsigned __int64*)(_in))[1] ^ t.v64[1]; \
} while (0)
#define next_tweak() do { \
cf = (t.v32[3] >> 31) * 135; \
t.v64[1] <<= 1; \
t.v32[2] |= t.v32[1] >> 31; \
t.v64[0] <<= 1; \
t.v32[0] ^= cf; \
} while (0)
#define copy_tweak(_buf) do { \
memcpy(_buf, &t, sizeof(t)); \
} while (0)
#endif
#define DEF_XTS_PROC(func_name, tweak_name, crypt_name, key_field) \
\
static void _stdcall func_name(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key) \
{ \
def_tweak t; \
m128 idx; \
size_t cf; \
unsigned long i; \
\
idx.v64[0] = offset / XTS_SECTOR_SIZE; \
idx.v64[1] = 0; \
do \
{ \
/* update tweak unit index */ \
idx.v64[0]++; \
/* derive first tweak value */ \
tweak_name((unsigned char*)&idx, (unsigned char*)&t, &key->tweak_k.key_field); \
load_tweak(); \
\
for (i = 0; i < XTS_BLOCKS_IN_SECTOR; i++) \
{ \
tweak_xor(in, out); \
crypt_name(out, out, &key->crypt_k.key_field); \
tweak_xor(out, out); \
\
/* update pointers */ \
in += XTS_BLOCK_SIZE; out += XTS_BLOCK_SIZE; \
\
/* derive next tweak value */ \
next_tweak(); \
} \
} while (len -= XTS_SECTOR_SIZE); \
}
#define DEF_XTS_AES_PADLOCK(func_name, crypt_name) \
\
static void _stdcall func_name(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key) \
{ \
def_tweak __declspec(align(16)) t; \
m128 __declspec(align(16)) idx; \
unsigned char __declspec(align(16)) buff[XTS_SECTOR_SIZE], tweak[XTS_SECTOR_SIZE]; \
size_t cf, i; \
\
idx.v64[0] = offset / XTS_SECTOR_SIZE; \
idx.v64[1] = 0; \
do \
{ \
/* update tweak unit index */ \
idx.v64[0]++; \
/* derive first tweak value */ \
aes256_padlock_rekey(); \
aes256_padlock_encrypt((unsigned char*)&idx, (unsigned char*)&t, 1, &key->tweak_k.aes); \
load_tweak(); \
\
/* derive all tweak values for sector */ \
for (i = 0; i < XTS_BLOCKS_IN_SECTOR; i++) { \
copy_tweak(tweak + i*XTS_BLOCK_SIZE); \
next_tweak(); \
} \
for (i = 0; i < XTS_SECTOR_SIZE / sizeof(unsigned __int64); i++) { \
((unsigned __int64*)buff)[i] = ((unsigned __int64*)in)[i] ^ ((unsigned __int64*)tweak)[i]; \
} \
aes256_padlock_rekey(); \
crypt_name(buff, buff, XTS_BLOCKS_IN_SECTOR, &key->crypt_k.aes); \
\
for (i = 0; i < XTS_SECTOR_SIZE / sizeof(unsigned __int64); i++) { \
((unsigned __int64*)out)[i] = ((unsigned __int64*)buff)[i] ^ ((unsigned __int64*)tweak)[i]; \
} \
/* update pointers */ \
in += XTS_SECTOR_SIZE; out += XTS_SECTOR_SIZE; \
} while (len -= XTS_SECTOR_SIZE); \
}
DEF_XTS_PROC(xts_aes_basic_encrypt, aes256_asm_encrypt, aes256_asm_encrypt, aes);
DEF_XTS_PROC(xts_aes_basic_decrypt, aes256_asm_encrypt, aes256_asm_decrypt, aes);
DEF_XTS_PROC(xts_twofish_encrypt, twofish256_encrypt, twofish256_encrypt, twofish);
DEF_XTS_PROC(xts_twofish_decrypt, twofish256_encrypt, twofish256_decrypt, twofish);
#ifdef _M_IX86
DEF_XTS_PROC(xts_serpent_basic_encrypt, serpent256_encrypt, serpent256_encrypt, serpent);
DEF_XTS_PROC(xts_serpent_basic_decrypt, serpent256_encrypt, serpent256_decrypt, serpent);
#endif
DEF_XTS_AES_PADLOCK(xts_aes_padlock_encrypt, aes256_padlock_encrypt);
DEF_XTS_AES_PADLOCK(xts_aes_padlock_decrypt, aes256_padlock_decrypt);
#ifdef _M_IX86
static void _stdcall xts_aes_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
unsigned char fpustate[32];
xts_proc selected;
if ( (selected = aes_selected_encrypt) == xts_aes_ni_encrypt )
{
if (save_fpu_state(fpustate) >= 0) {
xts_aes_ni_encrypt(in, out, len, offset, key);
load_fpu_state(fpustate);
} else {
xts_aes_basic_encrypt(in, out, len, offset, key);
}
} else {
selected(in, out, len, offset, key);
}
}
static void _stdcall xts_aes_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
unsigned char fpustate[32];
xts_proc selected;
if ( (selected = aes_selected_decrypt) == xts_aes_ni_decrypt )
{
if (save_fpu_state(fpustate) >= 0) {
xts_aes_ni_decrypt(in, out, len, offset, key);
load_fpu_state(fpustate);
} else {
xts_aes_basic_decrypt(in, out, len, offset, key);
}
} else {
selected(in, out, len, offset, key);
}
}
static void _stdcall xts_serpent_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
unsigned char fpustate[32];
xts_proc selected = serpent_selected_encrypt;
if (selected != xts_serpent_basic_encrypt && save_fpu_state(fpustate) >= 0) {
selected(in, out, len, offset, key);
load_fpu_state(fpustate);
} else {
xts_serpent_basic_encrypt(in, out, len, offset, key);
}
}
static void _stdcall xts_serpent_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
unsigned char fpustate[32];
xts_proc selected = serpent_selected_decrypt;
if (selected != xts_serpent_basic_decrypt && save_fpu_state(fpustate) >= 0) {
selected(in, out, len, offset, key);
load_fpu_state(fpustate);
} else {
xts_serpent_basic_decrypt(in, out, len, offset, key);
}
}
#else
#define xts_aes_encrypt aes_selected_encrypt
#define xts_aes_decrypt aes_selected_decrypt
#define xts_serpent_encrypt serpent_selected_encrypt
#define xts_serpent_decrypt serpent_selected_decrypt
#endif
static void _stdcall xts_aes_twofish_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_twofish_encrypt(in, out, len, offset, key);
xts_aes_encrypt(out, out, len, offset, key);
}
static void _stdcall xts_aes_twofish_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_aes_decrypt(in, out, len, offset, key);
xts_twofish_decrypt(out, out, len, offset, key);
}
static void _stdcall xts_twofish_serpent_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_serpent_encrypt(in, out, len, offset, key);
xts_twofish_encrypt(out, out, len, offset, key);
}
static void _stdcall xts_twofish_serpent_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_twofish_decrypt(in, out, len, offset, key);
xts_serpent_decrypt(out, out, len, offset, key);
}
static void _stdcall xts_serpent_aes_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_aes_encrypt(in, out, len, offset, key);
xts_serpent_encrypt(out, out, len, offset, key);
}
static void _stdcall xts_serpent_aes_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_serpent_decrypt(in, out, len, offset, key);
xts_aes_decrypt(out, out, len, offset, key);
}
static void _stdcall xts_aes_twofish_serpent_encrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_serpent_encrypt(in, out, len, offset, key);
xts_twofish_encrypt(out, out, len, offset, key);
xts_aes_encrypt(out, out, len, offset, key);
}
static void _stdcall xts_aes_twofish_serpent_decrypt(const unsigned char *in, unsigned char *out, size_t len, unsigned __int64 offset, xts_key *key)
{
xts_aes_decrypt(in, out, len, offset, key);
xts_twofish_decrypt(out, out, len, offset, key);
xts_serpent_decrypt(out, out, len, offset, key);
}
void _stdcall xts_set_key(const unsigned char *key, int alg, xts_key *skey)
{
switch (alg)
{
case CF_AES:
aes256_asm_set_key(key, &skey->crypt_k.aes);
aes256_asm_set_key(key + XTS_KEY_SIZE, &skey->tweak_k.aes);
skey->encrypt = xts_aes_encrypt;
skey->decrypt = xts_aes_decrypt;
break;
case CF_TWOFISH:
twofish256_set_key(key, &skey->crypt_k.twofish);
twofish256_set_key(key + XTS_KEY_SIZE, &skey->tweak_k.twofish);
skey->encrypt = xts_twofish_encrypt;
skey->decrypt = xts_twofish_decrypt;
break;
case CF_SERPENT:
serpent256_set_key(key, &skey->crypt_k.serpent);
serpent256_set_key(key + XTS_KEY_SIZE, &skey->tweak_k.serpent);
skey->encrypt = xts_serpent_encrypt;
skey->decrypt = xts_serpent_decrypt;
break;
case CF_AES_TWOFISH:
twofish256_set_key(key, &skey->crypt_k.twofish);
aes256_asm_set_key(key + XTS_KEY_SIZE, &skey->crypt_k.aes);
twofish256_set_key(key + XTS_KEY_SIZE*2, &skey->tweak_k.twofish);
aes256_asm_set_key(key + XTS_KEY_SIZE*3, &skey->tweak_k.aes);
skey->encrypt = xts_aes_twofish_encrypt;
skey->decrypt = xts_aes_twofish_decrypt;
break;
case CF_TWOFISH_SERPENT:
serpent256_set_key(key, &skey->crypt_k.serpent);
twofish256_set_key(key + XTS_KEY_SIZE, &skey->crypt_k.twofish);
serpent256_set_key(key + XTS_KEY_SIZE*2, &skey->tweak_k.serpent);
twofish256_set_key(key + XTS_KEY_SIZE*3, &skey->tweak_k.twofish);
skey->encrypt = xts_twofish_serpent_encrypt;
skey->decrypt = xts_twofish_serpent_decrypt;
break;
case CF_SERPENT_AES:
aes256_asm_set_key(key, &skey->crypt_k.aes);
serpent256_set_key(key + XTS_KEY_SIZE, &skey->crypt_k.serpent);
aes256_asm_set_key(key + XTS_KEY_SIZE*2, &skey->tweak_k.aes);
serpent256_set_key(key + XTS_KEY_SIZE*3, &skey->tweak_k.serpent);
skey->encrypt = xts_serpent_aes_encrypt;
skey->decrypt = xts_serpent_aes_decrypt;
break;
case CF_AES_TWOFISH_SERPENT:
serpent256_set_key(key, &skey->crypt_k.serpent);
twofish256_set_key(key + XTS_KEY_SIZE, &skey->crypt_k.twofish);
aes256_asm_set_key(key + XTS_KEY_SIZE*2, &skey->crypt_k.aes);
serpent256_set_key(key + XTS_KEY_SIZE*3, &skey->tweak_k.serpent);
twofish256_set_key(key + XTS_KEY_SIZE*4, &skey->tweak_k.twofish);
aes256_asm_set_key(key + XTS_KEY_SIZE*5, &skey->tweak_k.aes);
skey->encrypt = xts_aes_twofish_serpent_encrypt;
skey->decrypt = xts_aes_twofish_serpent_decrypt;
break;
}
}
#ifdef _M_IX86
long save_fpu_state(unsigned char state[32]) {
//if (KeGetCurrentIrql() > DISPATCH_LEVEL) return STATUS_UNSUCCESSFUL;
//return KeSaveFloatingPointState((PKFLOATING_SAVE)state);
return 1;
}
void load_fpu_state(unsigned char state[32]) {
//KeRestoreFloatingPointState((PKFLOATING_SAVE)state);
}
#endif
int _declspec(noinline) _stdcall xts_aes_ni_available()
{
int CPUInfo[4], res = 0;
__m128i enc;
#ifdef _M_IX86
unsigned char fpustate[32];
#endif
// check for AES-NI support via CPUID.01H:ECX.AES[bit 25]
__cpuid(CPUInfo, 1);
if ( CPUInfo[2] & 0x02000000 ) return 1;
// Special workaround for AES-NI on Hyper-V server and virtual machines
if ( (CPUInfo[2] & 0x80000000) == 0 ) return 0;
__cpuid(CPUInfo, 0x40000000);
if ( CPUInfo[1] != 'rciM' || CPUInfo[2] != 'foso' || CPUInfo[3] != 'vH t' ) return 0;
#ifdef _M_IX86
if (save_fpu_state(fpustate) >= 0)
{
#endif
//__try {
enc = _mm_aesenc_si128(_mm_set_epi32(0,1,2,3), _mm_set_epi32(4,5,6,7));
res = enc.m128i_u64[0] == 0x5f77774d4b7b7b54 && enc.m128i_u64[1] == 0x63636367427c7c58;
//}
//__except(/*EXCEPTION_EXECUTE_HANDLER*/ 1) {
// res = 0;
//}
#ifdef _M_IX86
load_fpu_state(fpustate);
}
#endif
return res;
}
int _stdcall xts_init(int hw_crypt)
{
#ifdef _M_IX86
if (xts_serpent_sse2_available() != 0) {
serpent_selected_encrypt = xts_serpent_sse2_encrypt;
serpent_selected_decrypt = xts_serpent_sse2_decrypt;
} else {
serpent_selected_encrypt = xts_serpent_basic_encrypt;
serpent_selected_decrypt = xts_serpent_basic_decrypt;
}
#else
serpent_selected_encrypt = xts_serpent_sse2_encrypt;
serpent_selected_decrypt = xts_serpent_sse2_decrypt;
#endif
if (xts_serpent_avx_available() != 0) {
serpent_selected_encrypt = xts_serpent_avx_encrypt;
serpent_selected_decrypt = xts_serpent_avx_decrypt;
}
if ( hw_crypt != 0 && xts_aes_ni_available() != 0 ) {
aes_selected_encrypt = xts_aes_ni_encrypt;
aes_selected_decrypt = xts_aes_ni_decrypt;
return 1;
}
if ( hw_crypt != 0 && aes256_padlock_available() != 0 )
{
aes_selected_encrypt = xts_aes_padlock_encrypt;
aes_selected_decrypt = xts_aes_padlock_decrypt;
return 2;
}
aes_selected_encrypt = xts_aes_basic_encrypt;
aes_selected_decrypt = xts_aes_basic_decrypt;
return 0;
}