...

Text file src/pkg/crypto/aes/asm_arm64.s

     1	// Copyright 2017 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01
     7	DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609
     8	GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16
     9	DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00
    10	DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508
    11	GLOBL invSRows<>(SB), (NOPTR+RODATA), $16
    12	// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    13	TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
    14		MOVD	nr+0(FP), R9
    15		MOVD	xk+8(FP), R10
    16		MOVD	dst+16(FP), R11
    17		MOVD	src+24(FP), R12
    18	
    19		VLD1	(R12), [V0.B16]
    20	
    21		CMP	$12, R9
    22		BLT	enc128
    23		BEQ	enc196
    24	enc256:
    25		VLD1.P	32(R10), [V1.B16, V2.B16]
    26		AESE	V1.B16, V0.B16
    27		AESMC	V0.B16, V0.B16
    28		AESE	V2.B16, V0.B16
    29		AESMC	V0.B16, V0.B16
    30	enc196:
    31		VLD1.P	32(R10), [V3.B16, V4.B16]
    32		AESE	V3.B16, V0.B16
    33		AESMC	V0.B16, V0.B16
    34		AESE	V4.B16, V0.B16
    35		AESMC	V0.B16, V0.B16
    36	enc128:
    37		VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    38		VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    39		VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    40		AESE	V5.B16, V0.B16
    41		AESMC	V0.B16, V0.B16
    42		AESE	V6.B16, V0.B16
    43		AESMC	V0.B16, V0.B16
    44		AESE	V7.B16, V0.B16
    45		AESMC	V0.B16, V0.B16
    46		AESE	V8.B16, V0.B16
    47		AESMC	V0.B16, V0.B16
    48		AESE	V9.B16, V0.B16
    49		AESMC	V0.B16, V0.B16
    50		AESE	V10.B16, V0.B16
    51		AESMC	V0.B16, V0.B16
    52		AESE	V11.B16, V0.B16
    53		AESMC	V0.B16, V0.B16
    54		AESE	V12.B16, V0.B16
    55		AESMC	V0.B16, V0.B16
    56		AESE	V13.B16, V0.B16
    57		AESMC	V0.B16, V0.B16
    58		AESE	V14.B16, V0.B16
    59		VEOR    V0.B16, V15.B16, V0.B16
    60		VST1	[V0.B16], (R11)
    61		RET
    62	
    63	// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    64	TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    65		MOVD	nr+0(FP), R9
    66		MOVD	xk+8(FP), R10
    67		MOVD	dst+16(FP), R11
    68		MOVD	src+24(FP), R12
    69	
    70		VLD1	(R12), [V0.B16]
    71	
    72		CMP	$12, R9
    73		BLT	dec128
    74		BEQ	dec196
    75	dec256:
    76		VLD1.P	32(R10), [V1.B16, V2.B16]
    77		AESD	V1.B16, V0.B16
    78		AESIMC	V0.B16, V0.B16
    79		AESD	V2.B16, V0.B16
    80		AESIMC	V0.B16, V0.B16
    81	dec196:
    82		VLD1.P	32(R10), [V3.B16, V4.B16]
    83		AESD	V3.B16, V0.B16
    84		AESIMC	V0.B16, V0.B16
    85		AESD	V4.B16, V0.B16
    86		AESIMC	V0.B16, V0.B16
    87	dec128:
    88		VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    89		VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    90		VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    91		AESD	V5.B16, V0.B16
    92		AESIMC	V0.B16, V0.B16
    93		AESD	V6.B16, V0.B16
    94		AESIMC	V0.B16, V0.B16
    95		AESD	V7.B16, V0.B16
    96		AESIMC	V0.B16, V0.B16
    97		AESD	V8.B16, V0.B16
    98		AESIMC	V0.B16, V0.B16
    99		AESD	V9.B16, V0.B16
   100		AESIMC	V0.B16, V0.B16
   101		AESD	V10.B16, V0.B16
   102		AESIMC	V0.B16, V0.B16
   103		AESD	V11.B16, V0.B16
   104		AESIMC	V0.B16, V0.B16
   105		AESD	V12.B16, V0.B16
   106		AESIMC	V0.B16, V0.B16
   107		AESD	V13.B16, V0.B16
   108		AESIMC	V0.B16, V0.B16
   109		AESD	V14.B16, V0.B16
   110		VEOR    V0.B16, V15.B16, V0.B16
   111		VST1	[V0.B16], (R11)
   112		RET
   113	
   114	// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   115	// Note that round keys are stored in uint128 format, not uint32
   116	TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   117		MOVD	nr+0(FP), R8
   118		MOVD	key+8(FP), R9
   119		MOVD	enc+16(FP), R10
   120		MOVD	dec+24(FP), R11
   121		LDP	rotInvSRows<>(SB), (R0, R1)
   122		VMOV	R0, V3.D[0]
   123		VMOV	R1, V3.D[1]
   124		VEOR	V0.B16, V0.B16, V0.B16 // All zeroes
   125		MOVW	$1, R13
   126		TBZ	$1, R8, ks192
   127		TBNZ	$2, R8, ks256
   128		LDPW	(R9), (R4, R5)
   129		LDPW	8(R9), (R6, R7)
   130		STPW.P	(R4, R5), 8(R10)
   131		STPW.P	(R6, R7), 8(R10)
   132		MOVW	$0x1b, R14
   133	ks128Loop:
   134			VMOV	R7, V2.S[0]
   135			WORD	$0x4E030042       // TBL V3.B16, [V2.B16], V2.B16
   136			AESE	V0.B16, V2.B16    // Use AES to compute the SBOX
   137			EORW	R13, R4
   138			LSLW	$1, R13           // Compute next Rcon
   139			ANDSW	$0x100, R13, ZR
   140			CSELW	NE, R14, R13, R13 // Fake modulo
   141			SUBS	$1, R8
   142			VMOV	V2.S[0], R0
   143			EORW	R0, R4
   144			EORW	R4, R5
   145			EORW	R5, R6
   146			EORW	R6, R7
   147			STPW.P	(R4, R5), 8(R10)
   148			STPW.P	(R6, R7), 8(R10)
   149		BNE	ks128Loop
   150		CBZ	R11, ksDone       // If dec is nil we are done
   151		SUB	$176, R10
   152	        // Decryption keys are encryption keys with InverseMixColumns applied
   153		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   154		VMOV	V0.B16, V7.B16
   155		AESIMC	V1.B16, V6.B16
   156		AESIMC	V2.B16, V5.B16
   157		AESIMC	V3.B16, V4.B16
   158		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   159		AESIMC	V0.B16, V11.B16
   160		AESIMC	V1.B16, V10.B16
   161		AESIMC	V2.B16, V9.B16
   162		AESIMC	V3.B16, V8.B16
   163		VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   164		AESIMC	V0.B16, V14.B16
   165		AESIMC	V1.B16, V13.B16
   166		VMOV	V2.B16, V12.B16
   167		VST1.P	[V12.B16, V13.B16, V14.B16], 48(R11)
   168		VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   169		VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   170		B	ksDone
   171	ks192:
   172		LDPW	(R9), (R2, R3)
   173		LDPW	8(R9), (R4, R5)
   174		LDPW	16(R9), (R6, R7)
   175		STPW.P	(R2, R3), 8(R10)
   176		STPW.P	(R4, R5), 8(R10)
   177		SUB	$4, R8
   178	ks192Loop:
   179			STPW.P	(R6, R7), 8(R10)
   180			VMOV	R7, V2.S[0]
   181			WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   182			AESE	V0.B16, V2.B16
   183			EORW	R13, R2
   184			LSLW	$1, R13
   185			SUBS	$1, R8
   186			VMOV	V2.S[0], R0
   187			EORW	R0, R2
   188			EORW	R2, R3
   189			EORW	R3, R4
   190			EORW	R4, R5
   191			EORW	R5, R6
   192			EORW	R6, R7
   193			STPW.P	(R2, R3), 8(R10)
   194			STPW.P	(R4, R5), 8(R10)
   195		BNE	ks192Loop
   196		CBZ	R11, ksDone
   197		SUB	$208, R10
   198		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   199		VMOV	V0.B16, V7.B16
   200		AESIMC	V1.B16, V6.B16
   201		AESIMC	V2.B16, V5.B16
   202		AESIMC	V3.B16, V4.B16
   203		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   204		AESIMC	V0.B16, V11.B16
   205		AESIMC	V1.B16, V10.B16
   206		AESIMC	V2.B16, V9.B16
   207		AESIMC	V3.B16, V8.B16
   208		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   209		AESIMC	V0.B16, V15.B16
   210		AESIMC	V1.B16, V14.B16
   211		AESIMC	V2.B16, V13.B16
   212		AESIMC	V3.B16, V12.B16
   213		VLD1	(R10), [V0.B16]
   214		VST1.P	[V0.B16], 16(R11)
   215		VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   216		VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   217		VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   218		B	ksDone
   219	ks256:
   220		LDP	invSRows<>(SB), (R0, R1)
   221		VMOV	R0, V4.D[0]
   222		VMOV	R1, V4.D[1]
   223		LDPW	(R9), (R0, R1)
   224		LDPW	8(R9), (R2, R3)
   225		LDPW	16(R9), (R4, R5)
   226		LDPW	24(R9), (R6, R7)
   227		STPW.P	(R0, R1), 8(R10)
   228		STPW.P	(R2, R3), 8(R10)
   229		SUB	$7, R8
   230	ks256Loop:
   231			STPW.P	(R4, R5), 8(R10)
   232			STPW.P	(R6, R7), 8(R10)
   233			VMOV	R7, V2.S[0]
   234			WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   235			AESE	V0.B16, V2.B16
   236			EORW	R13, R0
   237			LSLW	$1, R13
   238			SUBS	$1, R8
   239			VMOV	V2.S[0], R9
   240			EORW	R9, R0
   241			EORW	R0, R1
   242			EORW	R1, R2
   243			EORW	R2, R3
   244			VMOV	R3, V2.S[0]
   245			WORD	$0x4E040042 //TBL	V3.B16, [V2.B16], V2.B16
   246			AESE	V0.B16, V2.B16
   247			VMOV	V2.S[0], R9
   248			EORW	R9, R4
   249			EORW	R4, R5
   250			EORW	R5, R6
   251			EORW	R6, R7
   252			STPW.P	(R0, R1), 8(R10)
   253			STPW.P	(R2, R3), 8(R10)
   254		BNE	ks256Loop
   255		CBZ	R11, ksDone
   256		SUB	$240, R10
   257		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   258		VMOV	V0.B16, V7.B16
   259		AESIMC	V1.B16, V6.B16
   260		AESIMC	V2.B16, V5.B16
   261		AESIMC	V3.B16, V4.B16
   262		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   263		AESIMC	V0.B16, V11.B16
   264		AESIMC	V1.B16, V10.B16
   265		AESIMC	V2.B16, V9.B16
   266		AESIMC	V3.B16, V8.B16
   267		VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   268		AESIMC	V0.B16, V15.B16
   269		AESIMC	V1.B16, V14.B16
   270		AESIMC	V2.B16, V13.B16
   271		AESIMC	V3.B16, V12.B16
   272		VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   273		AESIMC	V0.B16, V18.B16
   274		AESIMC	V1.B16, V17.B16
   275		VMOV	V2.B16, V16.B16
   276		VST1.P	[V16.B16, V17.B16, V18.B16], 48(R11)
   277		VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   278		VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   279		VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   280	ksDone:
   281		RET

View as plain text