Sandboxie/SandboxieTools/ImBox/dc/crypto_fast/amd64/twofish_amd64.asm

;***************************************************************************
;*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
;*   adapted for DiskCryptor by ntldr <ntldr@diskcryptor.net>              *
;*       PGP key ID - 0x1B6A24550F33E44A                                   *
;*                                                                         *
;*   This program is free software; you can redistribute it and/or modify  *
;*   it under the terms of the GNU General Public License as published by  *
;*   the Free Software Foundation; either version 2 of the License, or     *
;*   (at your option) any later version.                                   *
;*                                                                         *
;*   This program is distributed in the hope that it will be useful,       *
;*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
;*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
;*   GNU General Public License for more details.                          *
;*                                                                         *
;*   You should have received a copy of the GNU General Public License     *
;*   along with this program; if not, write to the                         *
;*   Free Software Foundation, Inc.,                                       *
;*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
;***************************************************************************

%define a_offset 0
%define b_offset 4
%define c_offset 8
%define d_offset 12

; Structure of the crypto context struct
%define s0  0	    ; S0 Array 256 Words each
%define s1  1024    ; S1 Array
%define s2  2048    ; S2 Array
%define s3  3072    ; S3 Array
%define w   4096    ; 8 whitening keys (word)
%define k   4128    ; key 1-32 ( word )

; define a few register aliases to allow macro substitution
%define R0Q rax
%define R0D eax
%define R0B al
%define R0H ah

%define R1Q rbx
%define R1D ebx
%define R1B bl
%define R1H bh

%define R2Q rcx
%define R2D ecx
%define R2B cl
%define R2H ch

%define R3Q rdx
%define R3D edx
%define R3B dl
%define R3H dh


; performs input whitening
%macro input_whitening 3
  xor %1, [w+(%2)+%3]
%endmacro

; performs input whitening
%macro output_whitening 3
  xor %1, [w+16+(%2)+%3]
%endmacro


; * a input register containing a (rotated 16)
; * b input register containing b
; * c input register containing c
; * d input register containing d (already rol $1)
; * operations on a and b are interleaved to increase performance
%macro encrypt_round 5
	movzx	edi,  %2B
	mov	r11d, [r8+rdi*4+s1]
	movzx	edi,  %1B
	mov	r9d,  [r8+rdi*4+s2]
	movzx	edi,  %2H
	ror	%2D,  16
	xor	r11d, [r8+rdi*4+s2]
	movzx	edi,  %1H
	ror	%1D,  16
	xor	r9d,  [r8+rdi*4+s3]
	movzx	edi,  %2B
	xor	r11d, [r8+rdi*4+s3]
	movzx	edi,  %1B
	xor	r9d,  [r8+rdi*4]
	movzx	edi,  %2H
	ror	%2D,  15
	xor	r11d, [r8+rdi*4]
	movzx	edi,  %1H
	xor	r9d,  [r8+rdi*4+s1]
	add	r9d,  r11d
	add	r11d, r9d
	add	r9d,  [r8+k+%5]
	xor	%3D,  r9d
	rol	%3D,  15
	add	r11d, [r8+k+4+%5]
	xor	%4D,  r11d
%endmacro

; * a input register containing a(rotated 16)
; * b input register containing b
; * c input register containing c
; * d input register containing d (already rol $1)
; * operations on a and b are interleaved to increase performance
; * during the %5 a and b are prepared for the output whitening
%macro encrypt_last_round 5
	mov	r10d, %2D
	shl	r10,  32
	movzx	edi,  %2B
	mov	r11d, [r8+rdi*4+s1]
	movzx	edi,  %1B
	mov	r9d,  [r8+rdi*4+s2]
	movzx	edi,  %2H
	ror	%2D,  16
	xor	r11d, [r8+rdi*4+s2]
	movzx	edi,  %1H
	ror	%1D,  16
	xor	r9d,  [r8+rdi*4+s3]
	movzx	edi,  %2B
	xor	r11d, [r8+rdi*4+s3]
	movzx	edi,  %1B
	xor	r9d,  [r8+rdi*4]
	xor	r10,  %1Q
	movzx	edi,  %2H
	xor	r11d, [r8+rdi*4]
	movzx	edi,  %1H
	xor	r9d,  [r8+rdi*4+s1]
	add	r9d,  r11d
	add	r11d, r9d
	add	r9d,  [r8+k+%5]
	xor	%3D,  r9d
	ror	%3D,  1
	add	r11d, [r8+k+4+%5]
	xor	%4D,  r11d
%endmacro

; * a input register containing a
; * b input register containing b (rotated 16)
; * c input register containing c (already rol $1)
; * d input register containing d
; * operations on a and b are interleaved to increase performance
%macro decrypt_round 5
	movzx	edi,  %1B
	mov	r9d,  [r8+rdi*4]
	movzx	edi,  %2B
	mov	r11d, [r8+rdi*4+s3]
	movzx	edi,  %1H
	ror	%1D,  16
	xor	r9d,  [r8+rdi*4+s1]
	movzx	edi,  %2H
	ror	%2D,  16
	xor	r11d, [r8+rdi*4]
	movzx	edi,  %1B
	xor	r9d,  [r8+rdi*4+s2]
	movzx	edi,  %2B
	xor	r11d, [r8+rdi*4+s1]
	movzx	edi,  %1H
	ror	%1D,  15
	xor	r9d,  [r8+rdi*4+s3]
	movzx	edi,  %2H
	xor	r11d, [r8+rdi*4+s2]
	add	r9d,  r11d
	add	r11d, r9d
	add	r9d,  [r8+k+%5]
	xor	%3D,  r9d
	add	r11d, [r8+k+4+%5]
	xor	%4D,  r11d
	rol	%4D,  15
%endmacro

; * a input register containing a
; * b input register containing b
; * c input register containing c (already rol $1)
; * d input register containing d
; * operations on a and b are interleaved to increase performance
; * during the %5 a and b are prepared for the output whitening
%macro decrypt_last_round 5
	movzx	edi,  %1B
	mov	r9d,  [r8+rdi*4]
	movzx	edi,  %2B
	mov	r11d, [r8+rdi*4+s3]
	movzx	edi,  %2H
	ror	%2D,  16
	xor	r11d, [r8+rdi*4]
	movzx	edi,  %1H
	mov	r10d, %2D
	shl	r10,  32
	xor	r10,  %1Q
	ror	%1D,  16
	xor	r9d,  [r8+rdi*4+s1]
	movzx	edi,  %2B
	xor	r11d, [r8+rdi*4+s1]
	movzx	edi,  %1B
	xor	r9d,  [r8+rdi*4+s2]
	movzx	edi,  %2H
	xor	r11d, [r8+rdi*4+s2]
	movzx	edi,  %1H
	xor	r9d,  [r8+rdi*4+s3]
	add	r9d,  r11d
	add	r11d, r9d
	add	r9d,  [r8+k+%5]
	xor	%3D,  r9d
	add	r11d, [r8+k+4+%5]
	xor	%4D,  r11d
	ror	%4D,  1
%endmacro

global twofish256_encrypt
global twofish256_decrypt

align 16
twofish256_encrypt:
	push	R1Q
	push	rsi
	push	rdi
	; r8 contains the crypto tfm adress
	; rdx contains the output adress
	; rcx contains the input adress
	mov	rsi, rdx

	mov	R1Q, [rcx]
	mov	R3Q, [rcx+8]

	input_whitening R1Q, r8, a_offset
	input_whitening R3Q, r8, c_offset
	mov	R0D, R1D
	rol	R0D, 16
	shr	R1Q, 32
	mov	R2D, R3D
	shr	R3Q, 32
	rol	R3D, 1

	encrypt_round R0,R1,R2,R3,0
	encrypt_round R2,R3,R0,R1,8
	encrypt_round R0,R1,R2,R3,2*8
	encrypt_round R2,R3,R0,R1,3*8
	encrypt_round R0,R1,R2,R3,4*8
	encrypt_round R2,R3,R0,R1,5*8
	encrypt_round R0,R1,R2,R3,6*8
	encrypt_round R2,R3,R0,R1,7*8

	encrypt_round R0,R1,R2,R3,8*8
	encrypt_round R2,R3,R0,R1,9*8
	encrypt_round R0,R1,R2,R3,10*8
	encrypt_round R2,R3,R0,R1,11*8
	encrypt_round R0,R1,R2,R3,12*8
	encrypt_round R2,R3,R0,R1,13*8
	encrypt_round R0,R1,R2,R3,14*8
	encrypt_last_round R2,R3,R0,R1,15*8

	output_whitening r10, r8, a_offset
	mov	[rsi], r10

	shl	R1Q, 32
	xor	R1Q, R0Q

	output_whitening R1Q, r8, c_offset
	mov	[rsi+8], R1Q

	pop	rdi
	pop	rsi

	pop	R1Q
	ret

align 16
twofish256_decrypt:
	push	R1Q
	push	rsi
	push	rdi
	; r8 contains the crypto tfm adress
	; rdx contains the output adress
	; rcx contains the input adress
	mov	rsi, rdx

	mov	R1Q, [rcx]
	mov	R3Q, [rcx+8]

	output_whitening R1Q, r8, a_offset
	output_whitening R3Q, r8, c_offset
	mov	R0D, R1D
	shr	R1Q, 32
	rol	R1D, 16
	mov	R2D, R3D
	shr	R3Q, 32
	rol	R2D, 1

	decrypt_round R0,R1,R2,R3,15*8
	decrypt_round R2,R3,R0,R1,14*8
	decrypt_round R0,R1,R2,R3,13*8
	decrypt_round R2,R3,R0,R1,12*8
	decrypt_round R0,R1,R2,R3,11*8
	decrypt_round R2,R3,R0,R1,10*8
	decrypt_round R0,R1,R2,R3,9*8
	decrypt_round R2,R3,R0,R1,8*8
	decrypt_round R0,R1,R2,R3,7*8
	decrypt_round R2,R3,R0,R1,6*8
	decrypt_round R0,R1,R2,R3,5*8
	decrypt_round R2,R3,R0,R1,4*8
	decrypt_round R0,R1,R2,R3,3*8
	decrypt_round R2,R3,R0,R1,2*8
	decrypt_round R0,R1,R2,R3,1*8
	decrypt_last_round R2,R3,R0,R1,0

	input_whitening r10, r8, a_offset
	mov	[rsi], r10

	shl	R1Q, 32
	xor	R1Q, R0Q

	input_whitening R1Q, r8, c_offset
	mov	[rsi+8], R1Q

	pop	rdi
	pop	rsi

	pop	R1Q
	ret
ImBox 2023-08-24 17:39:00 +01:00			`;***************************************************************************`
			`;* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *`
			`;* adapted for DiskCryptor by ntldr <ntldr@diskcryptor.net> *`
			`;* PGP key ID - 0x1B6A24550F33E44A *`
			`;* *`
			`;* This program is free software; you can redistribute it and/or modify *`
			`;* it under the terms of the GNU General Public License as published by *`
			`;* the Free Software Foundation; either version 2 of the License, or *`
			`;* (at your option) any later version. *`
			`;* *`
			`;* This program is distributed in the hope that it will be useful, *`
			`;* but WITHOUT ANY WARRANTY; without even the implied warranty of *`
			`;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *`
			`;* GNU General Public License for more details. *`
			`;* *`
			`;* You should have received a copy of the GNU General Public License *`
			`;* along with this program; if not, write to the *`
			`;* Free Software Foundation, Inc., *`
			`;* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *`
			`;***************************************************************************`

			`%define a_offset 0`
			`%define b_offset 4`
			`%define c_offset 8`
			`%define d_offset 12`

			`; Structure of the crypto context struct`
			`%define s0 0 ; S0 Array 256 Words each`
			`%define s1 1024 ; S1 Array`
			`%define s2 2048 ; S2 Array`
			`%define s3 3072 ; S3 Array`
			`%define w 4096 ; 8 whitening keys (word)`
			`%define k 4128 ; key 1-32 ( word )`

			`; define a few register aliases to allow macro substitution`
			`%define R0Q rax`
			`%define R0D eax`
			`%define R0B al`
			`%define R0H ah`

			`%define R1Q rbx`
			`%define R1D ebx`
			`%define R1B bl`
			`%define R1H bh`

			`%define R2Q rcx`
			`%define R2D ecx`
			`%define R2B cl`
			`%define R2H ch`

			`%define R3Q rdx`
			`%define R3D edx`
			`%define R3B dl`
			`%define R3H dh`


			`; performs input whitening`
			`%macro input_whitening 3`
			`xor %1, [w+(%2)+%3]`
			`%endmacro`

			`; performs input whitening`
			`%macro output_whitening 3`
			`xor %1, [w+16+(%2)+%3]`
			`%endmacro`


			`; * a input register containing a (rotated 16)`
			`; * b input register containing b`
			`; * c input register containing c`
			`; * d input register containing d (already rol $1)`
			`; * operations on a and b are interleaved to increase performance`
			`%macro encrypt_round 5`
			`movzx edi, %2B`
			`mov r11d, [r8+rdi*4+s1]`
			`movzx edi, %1B`
			`mov r9d, [r8+rdi*4+s2]`
			`movzx edi, %2H`
			`ror %2D, 16`
			`xor r11d, [r8+rdi*4+s2]`
			`movzx edi, %1H`
			`ror %1D, 16`
			`xor r9d, [r8+rdi*4+s3]`
			`movzx edi, %2B`
			`xor r11d, [r8+rdi*4+s3]`
			`movzx edi, %1B`
			`xor r9d, [r8+rdi*4]`
			`movzx edi, %2H`
			`ror %2D, 15`
			`xor r11d, [r8+rdi*4]`
			`movzx edi, %1H`
			`xor r9d, [r8+rdi*4+s1]`
			`add r9d, r11d`
			`add r11d, r9d`
			`add r9d, [r8+k+%5]`
			`xor %3D, r9d`
			`rol %3D, 15`
			`add r11d, [r8+k+4+%5]`
			`xor %4D, r11d`
			`%endmacro`

			`; * a input register containing a(rotated 16)`
			`; * b input register containing b`
			`; * c input register containing c`
			`; * d input register containing d (already rol $1)`
			`; * operations on a and b are interleaved to increase performance`
			`; * during the %5 a and b are prepared for the output whitening`
			`%macro encrypt_last_round 5`
			`mov r10d, %2D`
			`shl r10, 32`
			`movzx edi, %2B`
			`mov r11d, [r8+rdi*4+s1]`
			`movzx edi, %1B`
			`mov r9d, [r8+rdi*4+s2]`
			`movzx edi, %2H`
			`ror %2D, 16`
			`xor r11d, [r8+rdi*4+s2]`
			`movzx edi, %1H`
			`ror %1D, 16`
			`xor r9d, [r8+rdi*4+s3]`
			`movzx edi, %2B`
			`xor r11d, [r8+rdi*4+s3]`
			`movzx edi, %1B`
			`xor r9d, [r8+rdi*4]`
			`xor r10, %1Q`
			`movzx edi, %2H`
			`xor r11d, [r8+rdi*4]`
			`movzx edi, %1H`
			`xor r9d, [r8+rdi*4+s1]`
			`add r9d, r11d`
			`add r11d, r9d`
			`add r9d, [r8+k+%5]`
			`xor %3D, r9d`
			`ror %3D, 1`
			`add r11d, [r8+k+4+%5]`
			`xor %4D, r11d`
			`%endmacro`

			`; * a input register containing a`
			`; * b input register containing b (rotated 16)`
			`; * c input register containing c (already rol $1)`
			`; * d input register containing d`
			`; * operations on a and b are interleaved to increase performance`
			`%macro decrypt_round 5`
			`movzx edi, %1B`
			`mov r9d, [r8+rdi*4]`
			`movzx edi, %2B`
			`mov r11d, [r8+rdi*4+s3]`
			`movzx edi, %1H`
			`ror %1D, 16`
			`xor r9d, [r8+rdi*4+s1]`
			`movzx edi, %2H`
			`ror %2D, 16`
			`xor r11d, [r8+rdi*4]`
			`movzx edi, %1B`
			`xor r9d, [r8+rdi*4+s2]`
			`movzx edi, %2B`
			`xor r11d, [r8+rdi*4+s1]`
			`movzx edi, %1H`
			`ror %1D, 15`
			`xor r9d, [r8+rdi*4+s3]`
			`movzx edi, %2H`
			`xor r11d, [r8+rdi*4+s2]`
			`add r9d, r11d`
			`add r11d, r9d`
			`add r9d, [r8+k+%5]`
			`xor %3D, r9d`
			`add r11d, [r8+k+4+%5]`
			`xor %4D, r11d`
			`rol %4D, 15`
			`%endmacro`

			`; * a input register containing a`
			`; * b input register containing b`
			`; * c input register containing c (already rol $1)`
			`; * d input register containing d`
			`; * operations on a and b are interleaved to increase performance`
			`; * during the %5 a and b are prepared for the output whitening`
			`%macro decrypt_last_round 5`
			`movzx edi, %1B`
			`mov r9d, [r8+rdi*4]`
			`movzx edi, %2B`
			`mov r11d, [r8+rdi*4+s3]`
			`movzx edi, %2H`
			`ror %2D, 16`
			`xor r11d, [r8+rdi*4]`
			`movzx edi, %1H`
			`mov r10d, %2D`
			`shl r10, 32`
			`xor r10, %1Q`
			`ror %1D, 16`
			`xor r9d, [r8+rdi*4+s1]`
			`movzx edi, %2B`
			`xor r11d, [r8+rdi*4+s1]`
			`movzx edi, %1B`
			`xor r9d, [r8+rdi*4+s2]`
			`movzx edi, %2H`
			`xor r11d, [r8+rdi*4+s2]`
			`movzx edi, %1H`
			`xor r9d, [r8+rdi*4+s3]`
			`add r9d, r11d`
			`add r11d, r9d`
			`add r9d, [r8+k+%5]`
			`xor %3D, r9d`
			`add r11d, [r8+k+4+%5]`
			`xor %4D, r11d`
			`ror %4D, 1`
			`%endmacro`

			`global twofish256_encrypt`
			`global twofish256_decrypt`

			`align 16`
			`twofish256_encrypt:`
			`push R1Q`
			`push rsi`
			`push rdi`
			`; r8 contains the crypto tfm adress`
			`; rdx contains the output adress`
			`; rcx contains the input adress`
			`mov rsi, rdx`

			`mov R1Q, [rcx]`
			`mov R3Q, [rcx+8]`

			`input_whitening R1Q, r8, a_offset`
			`input_whitening R3Q, r8, c_offset`
			`mov R0D, R1D`
			`rol R0D, 16`
			`shr R1Q, 32`
			`mov R2D, R3D`
			`shr R3Q, 32`
			`rol R3D, 1`

			`encrypt_round R0,R1,R2,R3,0`
			`encrypt_round R2,R3,R0,R1,8`
			`encrypt_round R0,R1,R2,R3,2*8`
			`encrypt_round R2,R3,R0,R1,3*8`
			`encrypt_round R0,R1,R2,R3,4*8`
			`encrypt_round R2,R3,R0,R1,5*8`
			`encrypt_round R0,R1,R2,R3,6*8`
			`encrypt_round R2,R3,R0,R1,7*8`

			`encrypt_round R0,R1,R2,R3,8*8`
			`encrypt_round R2,R3,R0,R1,9*8`
			`encrypt_round R0,R1,R2,R3,10*8`
			`encrypt_round R2,R3,R0,R1,11*8`
			`encrypt_round R0,R1,R2,R3,12*8`
			`encrypt_round R2,R3,R0,R1,13*8`
			`encrypt_round R0,R1,R2,R3,14*8`
			`encrypt_last_round R2,R3,R0,R1,15*8`

			`output_whitening r10, r8, a_offset`
			`mov [rsi], r10`

			`shl R1Q, 32`
			`xor R1Q, R0Q`

			`output_whitening R1Q, r8, c_offset`
			`mov [rsi+8], R1Q`

			`pop rdi`
			`pop rsi`

			`pop R1Q`
			`ret`

			`align 16`
			`twofish256_decrypt:`
			`push R1Q`
			`push rsi`
			`push rdi`
			`; r8 contains the crypto tfm adress`
			`; rdx contains the output adress`
			`; rcx contains the input adress`
			`mov rsi, rdx`

			`mov R1Q, [rcx]`
			`mov R3Q, [rcx+8]`

			`output_whitening R1Q, r8, a_offset`
			`output_whitening R3Q, r8, c_offset`
			`mov R0D, R1D`
			`shr R1Q, 32`
			`rol R1D, 16`
			`mov R2D, R3D`
			`shr R3Q, 32`
			`rol R2D, 1`

			`decrypt_round R0,R1,R2,R3,15*8`
			`decrypt_round R2,R3,R0,R1,14*8`
			`decrypt_round R0,R1,R2,R3,13*8`
			`decrypt_round R2,R3,R0,R1,12*8`
			`decrypt_round R0,R1,R2,R3,11*8`
			`decrypt_round R2,R3,R0,R1,10*8`
			`decrypt_round R0,R1,R2,R3,9*8`
			`decrypt_round R2,R3,R0,R1,8*8`
			`decrypt_round R0,R1,R2,R3,7*8`
			`decrypt_round R2,R3,R0,R1,6*8`
			`decrypt_round R0,R1,R2,R3,5*8`
			`decrypt_round R2,R3,R0,R1,4*8`
			`decrypt_round R0,R1,R2,R3,3*8`
			`decrypt_round R2,R3,R0,R1,2*8`
			`decrypt_round R0,R1,R2,R3,1*8`
			`decrypt_last_round R2,R3,R0,R1,0`

			`input_whitening r10, r8, a_offset`
			`mov [rsi], r10`

			`shl R1Q, 32`
			`xor R1Q, R0Q`

			`input_whitening R1Q, r8, c_offset`
			`mov [rsi+8], R1Q`

			`pop rdi`
			`pop rsi`

			`pop R1Q`
			`ret`