...

Text file src/pkg/crypto/aes/asm_amd64.s

     1	// Copyright 2012 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
     8	TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
     9		MOVQ nr+0(FP), CX
    10		MOVQ xk+8(FP), AX
    11		MOVQ dst+16(FP), DX
    12		MOVQ src+24(FP), BX
    13		MOVUPS 0(AX), X1
    14		MOVUPS 0(BX), X0
    15		ADDQ $16, AX
    16		PXOR X1, X0
    17		SUBQ $12, CX
    18		JE Lenc196
    19		JB Lenc128
    20	Lenc256:
    21		MOVUPS 0(AX), X1
    22		AESENC X1, X0
    23		MOVUPS 16(AX), X1
    24		AESENC X1, X0
    25		ADDQ $32, AX
    26	Lenc196:
    27		MOVUPS 0(AX), X1
    28		AESENC X1, X0
    29		MOVUPS 16(AX), X1
    30		AESENC X1, X0
    31		ADDQ $32, AX
    32	Lenc128:
    33		MOVUPS 0(AX), X1
    34		AESENC X1, X0
    35		MOVUPS 16(AX), X1
    36		AESENC X1, X0
    37		MOVUPS 32(AX), X1
    38		AESENC X1, X0
    39		MOVUPS 48(AX), X1
    40		AESENC X1, X0
    41		MOVUPS 64(AX), X1
    42		AESENC X1, X0
    43		MOVUPS 80(AX), X1
    44		AESENC X1, X0
    45		MOVUPS 96(AX), X1
    46		AESENC X1, X0
    47		MOVUPS 112(AX), X1
    48		AESENC X1, X0
    49		MOVUPS 128(AX), X1
    50		AESENC X1, X0
    51		MOVUPS 144(AX), X1
    52		AESENCLAST X1, X0
    53		MOVUPS X0, 0(DX)
    54		RET
    55	
    56	// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    57	TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    58		MOVQ nr+0(FP), CX
    59		MOVQ xk+8(FP), AX
    60		MOVQ dst+16(FP), DX
    61		MOVQ src+24(FP), BX
    62		MOVUPS 0(AX), X1
    63		MOVUPS 0(BX), X0
    64		ADDQ $16, AX
    65		PXOR X1, X0
    66		SUBQ $12, CX
    67		JE Ldec196
    68		JB Ldec128
    69	Ldec256:
    70		MOVUPS 0(AX), X1
    71		AESDEC X1, X0
    72		MOVUPS 16(AX), X1
    73		AESDEC X1, X0
    74		ADDQ $32, AX
    75	Ldec196:
    76		MOVUPS 0(AX), X1
    77		AESDEC X1, X0
    78		MOVUPS 16(AX), X1
    79		AESDEC X1, X0
    80		ADDQ $32, AX
    81	Ldec128:
    82		MOVUPS 0(AX), X1
    83		AESDEC X1, X0
    84		MOVUPS 16(AX), X1
    85		AESDEC X1, X0
    86		MOVUPS 32(AX), X1
    87		AESDEC X1, X0
    88		MOVUPS 48(AX), X1
    89		AESDEC X1, X0
    90		MOVUPS 64(AX), X1
    91		AESDEC X1, X0
    92		MOVUPS 80(AX), X1
    93		AESDEC X1, X0
    94		MOVUPS 96(AX), X1
    95		AESDEC X1, X0
    96		MOVUPS 112(AX), X1
    97		AESDEC X1, X0
    98		MOVUPS 128(AX), X1
    99		AESDEC X1, X0
   100		MOVUPS 144(AX), X1
   101		AESDECLAST X1, X0
   102		MOVUPS X0, 0(DX)
   103		RET
   104	
   105	// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   106	// Note that round keys are stored in uint128 format, not uint32
   107	TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   108		MOVQ nr+0(FP), CX
   109		MOVQ key+8(FP), AX
   110		MOVQ enc+16(FP), BX
   111		MOVQ dec+24(FP), DX
   112		MOVUPS (AX), X0
   113		// enc
   114		MOVUPS X0, (BX)
   115		ADDQ $16, BX
   116		PXOR X4, X4 // _expand_key_* expect X4 to be zero
   117		CMPL CX, $12
   118		JE Lexp_enc196
   119		JB Lexp_enc128
   120	Lexp_enc256:
   121		MOVUPS 16(AX), X2
   122		MOVUPS X2, (BX)
   123		ADDQ $16, BX
   124		AESKEYGENASSIST $0x01, X2, X1
   125		CALL _expand_key_256a<>(SB)
   126		AESKEYGENASSIST $0x01, X0, X1
   127		CALL _expand_key_256b<>(SB)
   128		AESKEYGENASSIST $0x02, X2, X1
   129		CALL _expand_key_256a<>(SB)
   130		AESKEYGENASSIST $0x02, X0, X1
   131		CALL _expand_key_256b<>(SB)
   132		AESKEYGENASSIST $0x04, X2, X1
   133		CALL _expand_key_256a<>(SB)
   134		AESKEYGENASSIST $0x04, X0, X1
   135		CALL _expand_key_256b<>(SB)
   136		AESKEYGENASSIST $0x08, X2, X1
   137		CALL _expand_key_256a<>(SB)
   138		AESKEYGENASSIST $0x08, X0, X1
   139		CALL _expand_key_256b<>(SB)
   140		AESKEYGENASSIST $0x10, X2, X1
   141		CALL _expand_key_256a<>(SB)
   142		AESKEYGENASSIST $0x10, X0, X1
   143		CALL _expand_key_256b<>(SB)
   144		AESKEYGENASSIST $0x20, X2, X1
   145		CALL _expand_key_256a<>(SB)
   146		AESKEYGENASSIST $0x20, X0, X1
   147		CALL _expand_key_256b<>(SB)
   148		AESKEYGENASSIST $0x40, X2, X1
   149		CALL _expand_key_256a<>(SB)
   150		JMP Lexp_dec
   151	Lexp_enc196:
   152		MOVQ 16(AX), X2
   153		AESKEYGENASSIST $0x01, X2, X1
   154		CALL _expand_key_192a<>(SB)
   155		AESKEYGENASSIST $0x02, X2, X1
   156		CALL _expand_key_192b<>(SB)
   157		AESKEYGENASSIST $0x04, X2, X1
   158		CALL _expand_key_192a<>(SB)
   159		AESKEYGENASSIST $0x08, X2, X1
   160		CALL _expand_key_192b<>(SB)
   161		AESKEYGENASSIST $0x10, X2, X1
   162		CALL _expand_key_192a<>(SB)
   163		AESKEYGENASSIST $0x20, X2, X1
   164		CALL _expand_key_192b<>(SB)
   165		AESKEYGENASSIST $0x40, X2, X1
   166		CALL _expand_key_192a<>(SB)
   167		AESKEYGENASSIST $0x80, X2, X1
   168		CALL _expand_key_192b<>(SB)
   169		JMP Lexp_dec
   170	Lexp_enc128:
   171		AESKEYGENASSIST $0x01, X0, X1
   172		CALL _expand_key_128<>(SB)
   173		AESKEYGENASSIST $0x02, X0, X1
   174		CALL _expand_key_128<>(SB)
   175		AESKEYGENASSIST $0x04, X0, X1
   176		CALL _expand_key_128<>(SB)
   177		AESKEYGENASSIST $0x08, X0, X1
   178		CALL _expand_key_128<>(SB)
   179		AESKEYGENASSIST $0x10, X0, X1
   180		CALL _expand_key_128<>(SB)
   181		AESKEYGENASSIST $0x20, X0, X1
   182		CALL _expand_key_128<>(SB)
   183		AESKEYGENASSIST $0x40, X0, X1
   184		CALL _expand_key_128<>(SB)
   185		AESKEYGENASSIST $0x80, X0, X1
   186		CALL _expand_key_128<>(SB)
   187		AESKEYGENASSIST $0x1b, X0, X1
   188		CALL _expand_key_128<>(SB)
   189		AESKEYGENASSIST $0x36, X0, X1
   190		CALL _expand_key_128<>(SB)
   191	Lexp_dec:
   192		// dec
   193		SUBQ $16, BX
   194		MOVUPS (BX), X1
   195		MOVUPS X1, (DX)
   196		DECQ CX
   197	Lexp_dec_loop:
   198		MOVUPS -16(BX), X1
   199		AESIMC X1, X0
   200		MOVUPS X0, 16(DX)
   201		SUBQ $16, BX
   202		ADDQ $16, DX
   203		DECQ CX
   204		JNZ Lexp_dec_loop
   205		MOVUPS -16(BX), X0
   206		MOVUPS X0, 16(DX)
   207		RET
   208	
   209	TEXT _expand_key_128<>(SB),NOSPLIT,$0
   210		PSHUFD $0xff, X1, X1
   211		SHUFPS $0x10, X0, X4
   212		PXOR X4, X0
   213		SHUFPS $0x8c, X0, X4
   214		PXOR X4, X0
   215		PXOR X1, X0
   216		MOVUPS X0, (BX)
   217		ADDQ $16, BX
   218		RET
   219	
   220	TEXT _expand_key_192a<>(SB),NOSPLIT,$0
   221		PSHUFD $0x55, X1, X1
   222		SHUFPS $0x10, X0, X4
   223		PXOR X4, X0
   224		SHUFPS $0x8c, X0, X4
   225		PXOR X4, X0
   226		PXOR X1, X0
   227	
   228		MOVAPS X2, X5
   229		MOVAPS X2, X6
   230		PSLLDQ $0x4, X5
   231		PSHUFD $0xff, X0, X3
   232		PXOR X3, X2
   233		PXOR X5, X2
   234	
   235		MOVAPS X0, X1
   236		SHUFPS $0x44, X0, X6
   237		MOVUPS X6, (BX)
   238		SHUFPS $0x4e, X2, X1
   239		MOVUPS X1, 16(BX)
   240		ADDQ $32, BX
   241		RET
   242	
   243	TEXT _expand_key_192b<>(SB),NOSPLIT,$0
   244		PSHUFD $0x55, X1, X1
   245		SHUFPS $0x10, X0, X4
   246		PXOR X4, X0
   247		SHUFPS $0x8c, X0, X4
   248		PXOR X4, X0
   249		PXOR X1, X0
   250	
   251		MOVAPS X2, X5
   252		PSLLDQ $0x4, X5
   253		PSHUFD $0xff, X0, X3
   254		PXOR X3, X2
   255		PXOR X5, X2
   256	
   257		MOVUPS X0, (BX)
   258		ADDQ $16, BX
   259		RET
   260	
   261	TEXT _expand_key_256a<>(SB),NOSPLIT,$0
   262		JMP _expand_key_128<>(SB)
   263	
   264	TEXT _expand_key_256b<>(SB),NOSPLIT,$0
   265		PSHUFD $0xaa, X1, X1
   266		SHUFPS $0x10, X2, X4
   267		PXOR X4, X2
   268		SHUFPS $0x8c, X2, X4
   269		PXOR X4, X2
   270		PXOR X1, X2
   271	
   272		MOVUPS X2, (BX)
   273		ADDQ $16, BX
   274		RET

View as plain text