/* This file is part of the ATMEL AVR32-UC3-SoftwareFramework-1.6.0 Release */

	.global	aes_crypt_ecb
	
	
/*One Round	
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7] ;  */

#define		TE0	r4
#define		TE1	r5
#define		TE2	r6
#define		TE3	r7
#define		TD0	r4
#define		TD1	r5
#define		TD2	r6
#define		TD3	r7
#define		TE4	r4
#define		TD4	r4
	
#define		tmp	lr
#define		RK	r12	
		
	.macro	RoundOddEncrypt s0, s1, s2, s3, t0, t1, t2, t3
	
	ld.w		\t0, TE0[\s0:t << 2]
	ld.w		\t3, TE1[\s0:u << 2]
	ld.w		\t2, TE2[\s0:l << 2]
	ld.w		tmp, RK[16]
	ld.w		\t1, TE3[\s0:b << 2]

	ld.w		\s0, TE1[\s1:u << 2]
	eor		\t0, tmp
	ld.w		tmp, TE2[\s1:l << 2]
	eor		\t0, \s0
	ld.w		\s0, RK[28]
	eor		\t3, tmp
	ld.w		tmp, TE3[\s1:b << 2]
	eor		\t3, \s0
	ld.w		\s0, RK[24]
	eor		\t2, tmp
	ld.w		tmp, TE0[\s1:t << 2]
	eor		\t2, \s0
	ld.w		\s0, RK[20]
	eor		\t1, tmp
	ld.w		tmp, TE2[\s2:l << 2]	
	eor		\t1, \s0
	 			
	ld.w		\s0, TE3[\s2:b << 2]	
	ld.w		\s1, TE0[\s2:t << 2]
	eor		\t0, tmp
	ld.w		tmp, TE1[\s2:u << 2]		
	eor		\t3, \s0
	eor		\t2, \s1
	ld.w		\s0, TE3[\s3:b << 2]	
	ld.w		\s1, TE0[\s3:t << 2]		
	eor		\t1, tmp		

	ld.w		\s2, TE1[\s3:u << 2]		
	eor		\t0, \s0
	ld.w		tmp, TE2[\s3:l << 2]			
	eor		\t3, \s1
	eor		\t2, \s2
	eor		\t1, tmp		
	sub		RK, -32
	.endm
		
	.macro	RoundEvenEncrypt s0, s1, s2, s3, t0, t1, t2, t3
		
	ld.w		\t0, TE0[\s0:t << 2]
	ld.w		\t3, TE1[\s0:u << 2]
	ld.w		\t2, TE2[\s0:l << 2]
	ld.w		tmp, RK[0]
	ld.w		\t1, TE3[\s0:b << 2]

	ld.w		\s0, TE1[\s1:u << 2]
	eor		\t0, tmp
	ld.w		tmp, TE2[\s1:l << 2]
	eor		\t0, \s0
	ld.w		\s0, RK[12]
	eor		\t3, tmp
	ld.w		tmp, TE3[\s1:b << 2]
	eor		\t3, \s0
	ld.w		\s0, RK[8]
	eor		\t2, tmp
	ld.w		tmp, TE0[\s1:t << 2]
	eor		\t2, \s0
	ld.w		\s0, RK[4]
	eor		\t1, tmp
	ld.w		tmp, TE2[\s2:l << 2]	
	eor		\t1, \s0
	 			
	ld.w		\s0, TE3[\s2:b << 2]	
	ld.w		\s1, TE0[\s2:t << 2]
	eor		\t0, tmp
	ld.w		tmp, TE1[\s2:u << 2]		
	eor		\t3, \s0
	eor		\t2, \s1
	ld.w		\s0, TE3[\s3:b << 2]	
	ld.w		\s1, TE0[\s3:t << 2]		
	eor		\t1, tmp		

	ld.w		\s2, TE1[\s3:u << 2]		
	eor		\t0, \s0
	ld.w		tmp, TE2[\s3:l << 2]			
	eor		\t3, \s1
	eor		\t2, \s2
	eor		\t1, tmp		
	.endm	
	
	.macro	RoundLastEncrypt s0, s1, s2, s3, t0, t1, t2, t3
	bfextu		\t0, \s0, 24, 8
	ld.ub		\t0, TE4[\t0]		
	bfextu		\t3, \s0, 16, 8
	ld.ub		\t3, TE4[\t3]		
	bfextu		\t2, \s0, 8, 8
	ld.ub		\t2, TE4[\t2]		
	ld.w		tmp, RK[0]
	bfextu		\t1, \s0, 0, 8
	ld.ub		\t1, TE4[\t1]		
	bfextu		\s0, \s1, 16, 8
	ld.ub		\s0, TE4[\s0]		
	eor		\t0, tmp, \t0 << 24
	bfextu		tmp, \s1, 8, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t0, \t0, \s0 << 16
	ld.w		\s0, RK[12]
	eor		\t3, tmp, \t3 << 8
	bfextu		tmp, \s1, 0, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t3, \s0, \t3 << 8
	ld.w		\s0, RK[8]
	eor		\t2, tmp, \t2 << 8 
	bfextu		tmp, \s1, 24, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t2, \s0
	ld.w		\s0, RK[4]
	eor		\t1, \t1, tmp << 24
	bfextu		tmp, \s2, 8, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t1, \s0
	bfextu		\s0, \s2, 0, 8
	ld.ub		\s0, TE4[\s0]		
	bfextu		\s1, \s2, 24, 8
	ld.ub		\s1, TE4[\s1]		
	eor		\t0, \t0, tmp << 8
	bfextu		tmp, \s2, 16, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t3, \s0
	eor		\t2, \t2, \s1 << 24
	bfextu		\s0, \s3, 0, 8
	ld.ub		\s0, TE4[\s0]		
	bfextu		\s1, \s3, 24, 8
	ld.ub		\s1, TE4[\s1]		
	eor		\t1, \t1, tmp << 16		
	bfextu		\s2, \s3, 16, 8
	ld.ub		\s2, TE4[\s2]		
	eor		\t0, \s0
	bfextu		tmp, \s3, 8, 8
	ld.ub		tmp, TE4[tmp]		
	eor		\t3, \t3, \s1 << 24
	eor		\t2, \t2, \s2 << 16
	eor		\t1, \t1, tmp << 8		
	.endm	

	.macro	RoundOddDecrypt	s0, s1, s2, s3, t0, t1, t2, t3
		
	ld.w		\t0, TD0[\s0:t << 2]
	ld.w		\t3, TD3[\s0:b << 2]
	ld.w		\t2, TD2[\s0:l << 2]
	ld.w		tmp, RK[16]
	ld.w		\t1, TD1[\s0:u << 2]

	ld.w		\s0, TD3[\s1:b << 2]
	eor		\t0, tmp
	ld.w		tmp, TD2[\s1:l << 2]
	eor		\t0, \s0
	ld.w		\s0, RK[28]
	eor		\t3, tmp
	ld.w		tmp, TD1[\s1:u << 2]
	eor		\t3, \s0
	ld.w		\s0, RK[24]
	eor		\t2, tmp
	ld.w		tmp, TD0[\s1:t << 2]
	eor		\t2, \s0
	ld.w		\s0, RK[20]
	eor		\t1, tmp
	ld.w		tmp, TD2[\s2:l << 2]	
	eor		\t1, \s0
	 			
	ld.w		\s0, TD1[\s2:u << 2]	
	ld.w		\s1, TD0[\s2:t << 2]
	eor		\t0, tmp
	ld.w		tmp, TD3[\s2:b << 2]		
	eor		\t3, \s0
	eor		\t2, \s1
	ld.w		\s0, TD1[\s3:u << 2]	
	ld.w		\s1, TD0[\s3:t << 2]		
	eor		\t1, tmp		

	ld.w		\s2, TD3[\s3:b << 2]		
	eor		\t0, \s0
	ld.w		tmp, TD2[\s3:l << 2]			
	eor		\t3, \s1
	eor		\t2, \s2
	eor		\t1, tmp		
	sub		RK, -32
	.endm	
		
	.macro	RoundEvenDecrypt s0, s1, s2, s3, t0, t1, t2, t3
		
	ld.w		\t0, TD0[\s0:t << 2]
	ld.w		\t3, TD3[\s0:b << 2]
	ld.w		\t2, TD2[\s0:l << 2]
	ld.w		tmp, RK[0]
	ld.w		\t1, TD1[\s0:u << 2]

	ld.w		\s0, TD3[\s1:b << 2]
	eor		\t0, tmp
	ld.w		tmp, TD2[\s1:l << 2]
	eor		\t0, \s0
	ld.w		\s0, RK[12]
	eor		\t3, tmp
	ld.w		tmp, TD1[\s1:u << 2]
	eor		\t3, \s0
	ld.w		\s0, RK[8]
	eor		\t2, tmp
	ld.w		tmp, TD0[\s1:t << 2]
	eor		\t2, \s0
	ld.w		\s0, RK[4]
	eor		\t1, tmp
	ld.w		tmp, TD2[\s2:l << 2]	
	eor		\t1, \s0
	 			
	ld.w		\s0, TD1[\s2:u << 2]	
	ld.w		\s1, TD0[\s2:t << 2]
	eor		\t0, tmp
	ld.w		tmp, TD3[\s2:b << 2]		
	eor		\t3, \s0
	eor		\t2, \s1
	ld.w		\s0, TD1[\s3:u << 2]	
	ld.w		\s1, TD0[\s3:t << 2]		
	eor		\t1, tmp		

	ld.w		\s2, TD3[\s3:b << 2]		
	eor		\t0, \s0
	ld.w		tmp, TD2[\s3:l << 2]			
	eor		\t3, \s1
	eor		\t2, \s2
	eor		\t1, tmp		
	.endm
		
	.macro	RoundLastDecrypt s0, s1, s2, s3, t0, t1, t2, t3
	bfextu		\t0, \s0, 24, 8
	ld.ub		\t0, TD4[\t0]
	bfextu		\t3, \s0, 0, 8
	ld.ub		\t3, TD4[\t3]
	bfextu		\t2, \s0, 8, 8
	ld.ub		\t2, TD4[\t2]
	ld.w		tmp, RK[0]
	bfextu		\t1, \s0, 16, 8
	ld.ub		\t1, TD4[\t1]
	bfextu		\s0, \s1, 0, 8
	ld.ub		\s0, TD4[\s0]
	eor		\t0, tmp, \t0 << 24
	bfextu		tmp, \s1, 8, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t0, \s0
 	ld.w		\s0, RK[12]
	eor		\t3, \t3, tmp << 8
	bfextu		tmp, \s1, 16, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t3, \s0
	ld.w		\s0, RK[8]
	eor		\t2, \t2, tmp << 8
	bfextu		tmp, \s1, 24, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t2, \s0, \t2 << 8
	ld.w		\s0, RK[4]
	eor		\t1, \t1, tmp << 8
	bfextu		tmp, \s2, 8, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t1, \s0, \t1 << 16
	bfextu		\s0, \s2, 16, 8
	ld.ub		\s0, TD4[\s0]
	bfextu		\s1, \s2, 24, 8
	ld.ub		\s1, TD4[\s1]
	eor		\t0, \t0, tmp << 8
	bfextu		tmp, \s2, 0, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t3, \t3, \s0 << 16
	eor		\t2, \t2, \s1 << 24
	bfextu		\s0, \s3, 16, 8
	ld.ub		\s0, TD4[\s0]
	bfextu		\s1, \s3, 24, 8
	ld.ub		\s1, TD4[\s1]
	eor		\t1, tmp		
	bfextu		\s2, \s3, 0, 8
	ld.ub		\s2, TD4[\s2]
	eor		\t0, \t0, \s0 << 16
	bfextu		tmp, \s3, 8, 8
	ld.ub		tmp, TD4[tmp]
	eor		\t3, \t3, \s1 << 24
	eor		\t2, \s2
	eor		\t1, \t1, tmp << 8		
	.endm	


	.text
	.align 2
/*void aes_crypt_ecb( aes_context *ctx,
                    int mode,
                    unsigned char input[16],
                    unsigned char output[16] )*/
aes_crypt_ecb:	
	pushm		r0-r3, r4-r7, lr

/*	s0 = GETU32(pt     ) ^ rk[0];
	s1 = GETU32(pt +  4) ^ rk[1];
	s2 = GETU32(pt +  8) ^ rk[2];
	\s3 = GETU32(pt + 12) ^ rk[3]; */
	st.w		--sp, r9	
	ld.w		r9, r12[0]
	ld.w		RK, r12[4]
	ldm		RK, r0-r3	
	cp.w		r11, 0
	breq		aes_decrypt
	lda.w		TE0, FT0
	ld.w		lr, r10[0]
	lda.w		TE1, FT1
	eor		r3, lr		// r3 = *pt ^ rk[0] 
	ld.w		lr, r10[4]
	lda.w		TE2, FT2
	eor		r2, lr		// r2 = *(pt + 4) ^ rk[1]
	ld.w		lr, r10[8]
	lda.w		TE3, FT3
	eor		r1, lr		// r1 = *(pt + 8) ^ rk[2]
	ld.w		lr, r10[12]

	eor		r0, lr		// r0 = *(pt + 12) ^ rk[3]
	lsr		r9, 1
	st.w		--sp, r9	

encryption_rounds:
	RoundOddEncrypt	r3, r2, r1, r0, r8, r9, r10, r11
	ld.w		lr, sp[0]
	sub		lr, 1
	st.w		sp[0], lr
	breq		last_encryption_round	
	RoundEvenEncrypt r8, r9, r10, r11, r3, r2, r1, r0
	rjmp		encryption_rounds
	
last_encryption_round:	
	lda.w		TE4, FSb
	RoundLastEncrypt r8, r9, r10, r11, r3, r2, r1, r0
	ld.w		lr, sp[4] // lr = out
	sub		sp, -8
	stm		lr, r0-r3
	popm		r0-r3, r4-r7, pc
	
aes_decrypt:	
	lda.w		TD0, RT0
	ld.w		lr, r10[0]
	lda.w		TD1, RT1
	eor		r3, lr		// r3 = *ct ^ rk[0] 
	ld.w		lr, r10[4]
	lda.w		TD2, RT2
	eor		r2, lr		// r2 = *(ct + 4) ^ rk[1]
	ld.w		lr, r10[8]
	lda.w		TD3, RT3
	eor		r1, r1, lr << 0	// r1 = *(ct + 8) ^ rk[2]
	ld.w		lr, r10[12]

	eor		r0, lr		// r0 = *(ct + 12) ^ rk[3]
	lsr		r9, 1
	st.w		--sp, r9	

decryption_rounds:
	RoundOddDecrypt	r3, r2, r1, r0, r8, r9, r10, r11
	ld.w		lr, sp[0]
	sub		lr, 1
	st.w		sp[0], lr
	breq		last_decryption_round	
	RoundEvenDecrypt r8, r9, r10, r11, r3, r2, r1, r0
	rjmp		decryption_rounds
	
last_decryption_round:	
	lda.w		TD4, RSb
	RoundLastDecrypt r8, r9, r10, r11, r3, r2, r1, r0
	ld.w		lr, sp[4] // lr = out
	sub		sp, -8
	stm		lr, r0-r3
	popm		r0-r3, r4-r7, pc
			
