head	1.2;
access;
symbols
	RPM_4_2:1.1.1.1
	RPM_4_1_1:1.1.1.1
	RPM_4_1:1.1.1.1
	RPM:1.1.1;
locks; strict;
comment	@;; @;


1.2
date	2003.08.24.12.57.36;	author rse;	state dead;
branches;
next	1.1;

1.1
date	2002.06.14.20.31.06;	author rse;	state Exp;
branches
	1.1.1.1;
next	;

1.1.1.1
date	2002.06.14.20.31.06;	author rse;	state Exp;
branches;
next	;


desc
@@


1.2
log
@Resolve: RPM 4.2 -> RPM 4.2.1
@
text
@;
; aesopt.i586.asm
;
; Assembler optimized AES routines for Intel Pentium processors
;
; Compile target is Microsoft Macro Assembler
;
; Copyright (c) 2002 Bob Deblier <bob@@virtualunlimited.com>
;
; This library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public
; License as published by the Free Software Foundation; either
; version 2.1 of the License, or (at your option) any later version.
;
; This library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with this library; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
;

	.586
	.model flat,C

EXTRN _ae0:DWORD
EXTRN _ae1:DWORD
EXTRN _ae2:DWORD
EXTRN _ae3:DWORD
EXTRN _ae4:DWORD

EXTRN _ad0:DWORD
EXTRN _ad1:DWORD
EXTRN _ad2:DWORD
EXTRN _ad3:DWORD
EXTRN _ad4:DWORD

	.code

; esp points to s and t (on stack; 32 bytes altogether)
; ebp points to rk
; edi points to dst
; esi points to src

sxrk	macro
	; compute swap(src) xor rk
	mov eax,dword ptr [esi   ]
	mov ebx,dword ptr [esi+ 4]
	mov ecx,dword ptr [esi+ 8]
	mov edx,dword ptr [esi+12]
	bswap eax
	bswap ebx
	bswap ecx
	bswap edx
	xor eax,dword ptr [ebp   ]
	xor ebx,dword ptr [ebp+ 4]
	xor ecx,dword ptr [ebp+ 8]
	xor edx,dword ptr [ebp+12]
	mov dword ptr [esp   ],eax
	mov dword ptr [esp+ 4],ebx
	mov dword ptr [esp+ 8],ecx
	mov dword ptr [esp+12],edx
	endm

etfs	macro	offset
	; compute t0 and t1
	mov ecx,[ebp+offset  ]
	mov edx,[ebp+offset+4]

	movzx eax,byte ptr [esp+ 3]
	movzx ebx,byte ptr [esp+ 7]
	xor ecx,dword ptr [eax*4+_ae0]
	xor edx,dword ptr [ebx*4+_ae0]

	movzx eax,byte ptr [esp+ 6]
	movzx ebx,byte ptr [esp+10]
	xor ecx,dword ptr [eax*4+_ae1]
	xor edx,dword ptr [ebx*4+_ae1]

	movzx eax,byte ptr [esp+ 9]
	movzx ebx,byte ptr [esp+13]
	xor ecx,dword ptr [eax*4+_ae2]
	xor edx,dword ptr [ebx*4+_ae2]

	movzx eax,byte ptr [esp+12]
	movzx ebx,byte ptr [esp   ]
	xor ecx,dword ptr [eax*4+_ae3]
	xor edx,dword ptr [ebx*4+_ae3]

	mov dword ptr [esp+16],ecx
	mov dword ptr [esp+20],edx

	; compute t2 and t3
	mov ecx,dword ptr [ebp+offset+ 8]
	mov edx,dword ptr [ebp+offset+12]

	movzx eax,byte ptr [esp+11]
	movzx ebx,byte ptr [esp+15]
	xor ecx,dword ptr [eax*4+_ae0]
	xor edx,dword ptr [ebx*4+_ae0]

	movzx eax,byte ptr [esp+14]
	movzx ebx,byte ptr [esp+ 2]
	xor ecx,dword ptr [eax*4+_ae1]
	xor edx,dword ptr [ebx*4+_ae1]

	movzx eax,byte ptr [esp+ 1]
	movzx ebx,byte ptr [esp+ 5]
	xor ecx,dword ptr [eax*4+_ae2]
	xor edx,dword ptr [ebx*4+_ae2]

	movzx eax,byte ptr [esp+ 4]
	movzx ebx,byte ptr [esp+ 8]
	xor ecx,dword ptr [eax*4+_ae3]
	xor edx,dword ptr [ebx*4+_ae3]

	mov dword ptr [esp+24],ecx
	mov dword ptr [esp+28],edx
	endm

esft	macro	offset
	; compute s0 and s1
	mov ecx,[ebp+offset  ]
	mov edx,[ebp+offset+4]

	movzx eax,byte ptr [esp+19]
	movzx ebx,byte ptr [esp+23]
	xor ecx,dword ptr [eax*4+_ae0]
	xor edx,dword ptr [ebx*4+_ae0]

	movzx eax,byte ptr [esp+22]
	movzx ebx,byte ptr [esp+26]
	xor ecx,dword ptr [eax*4+_ae1]
	xor edx,dword ptr [ebx*4+_ae1]

	movzx eax,byte ptr [esp+25]
	movzx ebx,byte ptr [esp+29]
	xor ecx,dword ptr [eax*4+_ae2]
	xor edx,dword ptr [ebx*4+_ae2]

	movzx eax,byte ptr [esp+28]
	movzx ebx,byte ptr [esp+16]
	xor ecx,dword ptr [eax*4+_ae3]
	xor edx,dword ptr [ebx*4+_ae3]

	mov dword ptr [esp   ],ecx
	mov dword ptr [esp+ 4],edx

	; compute s2 and s3
	mov ecx,dword ptr [ebp+offset+ 8]
	mov edx,dword ptr [ebp+offset+12]

	movzx eax,byte ptr [esp+27]
	movzx ebx,byte ptr [esp+31]
	xor ecx,dword ptr [eax*4+_ae0]
	xor edx,dword ptr [ebx*4+_ae0]

	movzx eax,byte ptr [esp+30]
	movzx ebx,byte ptr [esp+18]
	xor ecx,dword ptr [eax*4+_ae1]
	xor edx,dword ptr [ebx*4+_ae1]

	movzx eax,byte ptr [esp+17]
	movzx ebx,byte ptr [esp+21]
	xor ecx,dword ptr [eax*4+_ae2]
	xor edx,dword ptr [ebx*4+_ae2]

	movzx eax,byte ptr [esp+20]
	movzx ebx,byte ptr [esp+24]
	xor ecx,dword ptr [eax*4+_ae3]
	xor edx,dword ptr [ebx*4+_ae3]

	mov dword ptr [esp+ 8],ecx
	mov dword ptr [esp+12],edx
	endm

elr	macro
	mov ecx,dword ptr [ebp+ 0]
	mov edx,dword ptr [ebp+ 4]

	movzx eax,byte ptr [esp+19]
	movzx ebx,byte ptr [esp+23]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff000000h
	and ebx,0ff000000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+22]
	movzx ebx,byte ptr [esp+26]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff0000h
	and ebx,0ff0000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+25]
	movzx ebx,byte ptr [esp+29]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff00h
	and ebx,0ff00h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+28]
	movzx ebx,byte ptr [esp+16]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ffh
	and ebx,0ffh
	xor ecx,eax
	xor edx,ebx

	mov dword ptr [esp+ 0],ecx
	mov dword ptr [esp+ 4],edx

	mov ecx,dword ptr [ebp+ 8]
	mov edx,dword ptr [ebp+12]

	movzx eax,byte ptr [esp+27]
	movzx ebx,byte ptr [esp+31]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff000000h
	and ebx,0ff000000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+30]
	movzx ebx,byte ptr [esp+18]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff0000h
	and ebx,0ff0000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+17]
	movzx ebx,byte ptr [esp+21]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ff00h
	and ebx,0ff00h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+20]
	movzx ebx,byte ptr [esp+24]
	mov eax,dword ptr [eax*4+_ae4]
	mov ebx,dword ptr [ebx*4+_ae4]
	and eax,0ffh
	and ebx,0ffh
	xor ecx,eax
	xor edx,ebx

	mov dword ptr [esp+ 8],ecx
	mov dword ptr [esp+12],edx
	endm

eblock	macro	label
	; load initial values for s0 thru s3
	sxrk

	; do 9 rounds
	etfs 16
	esft 32
	etfs 48
	esft 64
	etfs 80
	esft 96
	etfs 112
	esft 128
	etfs 144
	; test if we had to do 10 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,10
	je @@label
	; do two more rounds
	esft 160
	etfs 176
	; test if we had to do 12 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,12
	je @@label
	; do two more rounds
	esft 192
	etfs 208
	; prepare for last round
	mov eax,dword ptr [ebp+256]
@@label:
	; add 16 times the number of rounds to ebp
	sal eax,4
	add ebp,eax
	; do last round
	elr
	endm

eblockc	macro	label
	; encrypt block in cbc mode
	sxrfxrk

	; do 9 rounds
	etfs 16
	esft 32
	etfs 48
	esft 64
	etfs 80
	esft 96
	etfs 112
	esft 128
	etfs 144
	; test if we had to do 10 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,10
	je @@label
	; do two more rounds
	esft 160
	etfs 176
	; test if we had to do 12 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,12
	je @@label
	; do two more rounds
	esft 192
	etfs 208
	; prepare for last round
	mov eax,dword ptr [ebp+256]
@@label:
	; add 16 times the number of rounds to ebp
	sal eax,4
	add ebp,eax
	; do last round
	elr
	endm

dtfs	macro	offset
	; compute t0 and t1
	mov ecx,[ebp+offset  ]
	mov edx,[ebp+offset+4]

	movzx eax,byte ptr [esp+ 3]
	movzx ebx,byte ptr [esp+ 7]
	xor ecx,dword ptr [eax*4+_ad0]
	xor edx,dword ptr [ebx*4+_ad0]

	movzx eax,byte ptr [esp+14]
	movzx ebx,byte ptr [esp+ 2]
	xor ecx,dword ptr [eax*4+_ad1]
	xor edx,dword ptr [ebx*4+_ad1]

	movzx eax,byte ptr [esp+ 9]
	movzx ebx,byte ptr [esp+13]
	xor ecx,dword ptr [eax*4+_ad2]
	xor edx,dword ptr [ebx*4+_ad2]

	movzx eax,byte ptr [esp+ 4]
	movzx ebx,byte ptr [esp+ 8]
	xor ecx,dword ptr [eax*4+_ad3]
	xor edx,dword ptr [ebx*4+_ad3]

	mov dword ptr [esp+16],ecx
	mov dword ptr [esp+20],edx

	; compute t2 and t3
	mov ecx,dword ptr [ebp+offset+ 8]
	mov edx,dword ptr [ebp+offset+12]

	movzx eax,byte ptr [esp+11]
	movzx ebx,byte ptr [esp+15]
	xor ecx,dword ptr [eax*4+_ad0]
	xor edx,dword ptr [ebx*4+_ad0]

	movzx eax,byte ptr [esp+ 6]
	movzx ebx,byte ptr [esp+10]
	xor ecx,dword ptr [eax*4+_ad1]
	xor edx,dword ptr [ebx*4+_ad1]

	movzx eax,byte ptr [esp+ 1]
	movzx ebx,byte ptr [esp+ 5]
	xor ecx,dword ptr [eax*4+_ad2]
	xor edx,dword ptr [ebx*4+_ad2]

	movzx eax,byte ptr [esp+12]
	movzx ebx,byte ptr [esp   ]
	xor ecx,dword ptr [eax*4+_ad3]
	xor edx,dword ptr [ebx*4+_ad3]

	mov dword ptr [esp+24],ecx
	mov dword ptr [esp+28],edx
	endm

dsft	macro	offset
	; compute s0 and s1
	mov ecx,[ebp+offset  ]
	mov edx,[ebp+offset+4]

	movzx eax,byte ptr [esp+19]
	movzx ebx,byte ptr [esp+23]
	xor ecx,dword ptr [eax*4+_ad0]
	xor edx,dword ptr [ebx*4+_ad0]

	movzx eax,byte ptr [esp+30]
	movzx ebx,byte ptr [esp+18]
	xor ecx,dword ptr [eax*4+_ad1]
	xor edx,dword ptr [ebx*4+_ad1]

	movzx eax,byte ptr [esp+25]
	movzx ebx,byte ptr [esp+29]
	xor ecx,dword ptr [eax*4+_ad2]
	xor edx,dword ptr [ebx*4+_ad2]

	movzx eax,byte ptr [esp+20]
	movzx ebx,byte ptr [esp+24]
	xor ecx,dword ptr [eax*4+_ad3]
	xor edx,dword ptr [ebx*4+_ad3]

	mov dword ptr [esp   ],ecx
	mov dword ptr [esp+ 4],edx

	; compute s2 and s3
	mov ecx,dword ptr [ebp+offset+ 8]
	mov edx,dword ptr [ebp+offset+12]

	movzx eax,byte ptr [esp+27]
	movzx ebx,byte ptr [esp+31]
	xor ecx,dword ptr [eax*4+_ad0]
	xor edx,dword ptr [ebx*4+_ad0]

	movzx eax,byte ptr [esp+22]
	movzx ebx,byte ptr [esp+26]
	xor ecx,dword ptr [eax*4+_ad1]
	xor edx,dword ptr [ebx*4+_ad1]

	movzx eax,byte ptr [esp+17]
	movzx ebx,byte ptr [esp+21]
	xor ecx,dword ptr [eax*4+_ad2]
	xor edx,dword ptr [ebx*4+_ad2]

	movzx eax,byte ptr [esp+28]
	movzx ebx,byte ptr [esp+16]
	xor ecx,dword ptr [eax*4+_ad3]
	xor edx,dword ptr [ebx*4+_ad3]

	mov dword ptr [esp+ 8],ecx
	mov dword ptr [esp+12],edx
	endm

dlr	macro
	mov ecx,dword ptr [ebp+ 0]
	mov edx,dword ptr [ebp+ 4]

	movzx eax,byte ptr [esp+19]
	movzx ebx,byte ptr [esp+23]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff000000h
	and ebx,0ff000000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+30]
	movzx ebx,byte ptr [esp+18]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff0000h
	and ebx,0ff0000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+25]
	movzx ebx,byte ptr [esp+29]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff00h
	and ebx,0ff00h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+20]
	movzx ebx,byte ptr [esp+24]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ffh
	and ebx,0ffh
	xor ecx,eax
	xor edx,ebx

	mov dword ptr [esp+ 0],ecx
	mov dword ptr [esp+ 4],edx

	mov ecx,dword ptr [ebp+ 8]
	mov edx,dword ptr [ebp+12]

	movzx eax,byte ptr [esp+27]
	movzx ebx,byte ptr [esp+31]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff000000h
	and ebx,0ff000000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+22]
	movzx ebx,byte ptr [esp+26]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff0000h
	and ebx,0ff0000h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+17]
	movzx ebx,byte ptr [esp+21]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ff00h
	and ebx,0ff00h
	xor ecx,eax
	xor edx,ebx

	movzx eax,byte ptr [esp+28]
	movzx ebx,byte ptr [esp+16]
	mov eax,dword ptr [eax*4+_ad4]
	mov ebx,dword ptr [ebx*4+_ad4]
	and eax,0ffh
	and ebx,0ffh
	xor ecx,eax
	xor edx,ebx

	mov dword ptr [esp+ 8],ecx
	mov dword ptr [esp+12],edx
	endm

dblock	macro	label
	; load initial values for s0 thru s3
	sxrk

	; do 9 rounds
	dtfs 16
	dsft 32
	dtfs 48
	dsft 64
	dtfs 80
	dsft 96
	dtfs 112
	dsft 128
	dtfs 144
	; test if we had to do 10 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,10
	je @@label
	; do two more rounds
	dsft 160
	dtfs 176
	; test if we had to do 12 rounds, if yes jump to last round
	mov eax,dword ptr [ebp+256]
	cmp eax,12
	je @@label
	; do two more rounds
	dsft 192
	dtfs 208
	; prepare for last round
	mov eax,dword ptr [ebp+256]
@@label:
	; add 16 times the number of rounds to ebp
	sal eax,4
	add ebp,eax
	; do last round
	dlr
	endm

aesEncrypt proc
	push edi
	push esi
	push ebp
	push ebx

	; set pointers
	mov ebp,dword ptr [esp+20] ; rk
	mov edi,dword ptr [esp+24] ; dst
	mov esi,dword ptr [esp+28] ; src

	; add local storage for s and t variables, 32 bytes total
	sub esp,32

	eblock e

	; save stuff back
	mov eax,dword ptr [esp+ 0]
	mov ebx,dword ptr [esp+ 4]
	mov ecx,dword ptr [esp+ 8]
	mov edx,dword ptr [esp+12]
	bswap eax
	bswap ebx
	bswap ecx
	bswap edx
	mov dword ptr [edi   ],eax
	mov dword ptr [edi+ 4],ebx
	mov dword ptr [edi+ 8],ecx
	mov dword ptr [edi+12],edx

	; remove local storage
	add esp,32

	xor eax,eax

	pop ebx
	pop ebp
	pop esi
	pop edi
	ret
aesEncrypt endp

aesDecrypt proc
	push edi
	push esi
	push ebp
	push ebx

	; set pointers
	mov ebp,dword ptr [esp+20] ; rk
	mov edi,dword ptr [esp+24] ; dst
	mov esi,dword ptr [esp+28] ; src

	; add local storage for s and t variables, 32 bytes total
	sub esp,32

	dblock d

	; save stuff back
	mov eax,dword ptr [esp+ 0]
	mov ebx,dword ptr [esp+ 4]
	mov ecx,dword ptr [esp+ 8]
	mov edx,dword ptr [esp+12]
	bswap eax
	bswap ebx
	bswap ecx
	bswap edx
	mov dword ptr [edi   ],eax
	mov dword ptr [edi+ 4],ebx
	mov dword ptr [edi+ 8],ecx
	mov dword ptr [edi+12],edx

	; remove local storage
	add esp,32

	xor eax,eax

	pop ebx
	pop ebp
	pop esi
	pop edi
	ret
aesDecrypt endp

	end
@


1.1
log
@Initial revision
@
text
@@


1.1.1.1
log
@Import: RPM 4.1
@
text
@@
