...

Text file src/pkg/crypto/aes/asm_ppc64le.s

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Based on CRYPTOGAMS code with the following comment:
     6	// # ====================================================================
     7	// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     8	// # project. The module is, however, dual licensed under OpenSSL and
     9	// # CRYPTOGAMS licenses depending on where you obtain it. For further
    10	// # details see http://www.openssl.org/~appro/cryptogams/.
    11	// # ====================================================================
    12	
    13	// Original code can be found at the link below:
    14	// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
    15	
    16	// I changed some function names in order to be more likely to go standards.
    17	// For instance, function aes_p8_set_{en,de}crypt_key become
    18	// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
    19	// and a new session was created (doEncryptKeyAsm). This was necessary to
    20	// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
    21	// There were other modifications as well but kept the same functionality.
    22	
    23	#include "textflag.h"
    24	
    25	// For set{En,De}cryptKeyAsm
    26	#define INP     R3
    27	#define BITS    R4
    28	#define OUT     R5
    29	#define PTR     R6
    30	#define CNT     R7
    31	#define ROUNDS  R8
    32	#define TEMP    R19
    33	#define ZERO    V0
    34	#define IN0     V1
    35	#define IN1     V2
    36	#define KEY     V3
    37	#define RCON    V4
    38	#define MASK    V5
    39	#define TMP     V6
    40	#define STAGE   V7
    41	#define OUTPERM V8
    42	#define OUTMASK V9
    43	#define OUTHEAD V10
    44	#define OUTTAIL V11
    45	
    46	// For {en,de}cryptBlockAsm
    47	#define BLK_INP    R3
    48	#define BLK_OUT    R4
    49	#define BLK_KEY    R5
    50	#define BLK_ROUNDS R6
    51	#define BLK_IDX    R7
    52	
    53	DATA  ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
    54	DATA  ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
    55	DATA  ·rcon+0x10(SB)/8, $0x1b0000001b000000
    56	DATA  ·rcon+0x18(SB)/8, $0x1b0000001b000000
    57	DATA  ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    58	DATA  ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
    59	DATA  ·rcon+0x30(SB)/8, $0x0000000000000000
    60	DATA  ·rcon+0x38(SB)/8, $0x0000000000000000
    61	GLOBL ·rcon(SB), RODATA, $64
    62	
    63	// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
    64	TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
    65		// Load the arguments inside the registers
    66		MOVD key+0(FP), INP
    67		MOVD keylen+8(FP), BITS
    68		MOVD enc+16(FP), OUT
    69		JMP ·doEncryptKeyAsm(SB)
    70	
    71	// This text is used both setEncryptKeyAsm and setDecryptKeyAsm
    72	TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
    73		// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
    74	
    75		// Check arguments
    76		MOVD $-1, PTR                  // li    6,-1       exit code to -1 (255)
    77		CMPU INP, $0                   // cmpldi r3,0      input key pointer set?
    78		BC 0x0E, 2, enc_key_abort      // beq-  .Lenc_key_abort
    79		CMPU OUT, $0                   // cmpldi r5,0      output key pointer set?
    80		BC 0x0E, 2, enc_key_abort      // beq-  .Lenc_key_abort
    81		MOVD $-2, PTR                  // li    6,-2       exit code to -2 (254)
    82		CMPW BITS, $128                // cmpwi 4,128      greater or equal to 128
    83		BC 0x0E, 0, enc_key_abort      // blt-  .Lenc_key_abort
    84		CMPW BITS, $256                // cmpwi 4,256      lesser or equal to 256
    85		BC 0x0E, 1, enc_key_abort      // bgt-  .Lenc_key_abort
    86		ANDCC $0x3f, BITS, TEMP        // andi. 0,4,0x3f   multiple of 64
    87		BC 0x06, 2, enc_key_abort      // bne-  .Lenc_key_abort
    88	
    89		MOVD $·rcon(SB), PTR           // PTR point to rcon addr
    90	
    91		// Get key from memory and write aligned into VR
    92		NEG INP, R9                    // neg   9,3        R9 is ~INP + 1
    93		LVX (INP)(R0), IN0             // lvx   1,0,3      Load key inside IN0
    94		ADD $15, INP, INP              // addi  3,3,15     Add 15B to INP addr
    95		LVSR (R9)(R0), KEY             // lvsr  3,0,9
    96		MOVD $0x20, R8                 // li    8,0x20     R8 = 32
    97		CMPW BITS, $192                // cmpwi 4,192      Key size == 192?
    98		LVX (INP)(R0), IN1             // lvx   2,0,3
    99		VSPLTISB $0x0f, MASK           // vspltisb 5,0x0f  0x0f0f0f0f... mask
   100		LVX (PTR)(R0), RCON            // lvx   4,0,6      Load first 16 bytes into RCON
   101		VXOR KEY, MASK, KEY            // vxor  3,3,5      Adjust for byte swap
   102		LVX (PTR)(R8), MASK            // lvx   5,8,6
   103		ADD $0x10, PTR, PTR            // addi  6,6,0x10   PTR to next 16 bytes of RCON
   104		VPERM IN0, IN1, KEY, IN0       // vperm 1,1,2,3    Align
   105		MOVD $8, CNT                   // li    7,8        CNT = 8
   106		VXOR ZERO, ZERO, ZERO          // vxor  0,0,0      Zero to be zero :)
   107		MOVD CNT, CTR                  // mtctr 7          Set the counter to 8 (rounds)
   108	
   109		LVSL (OUT)(R0), OUTPERM        // lvsl  8,0,5
   110		VSPLTISB $-1, OUTMASK          // vspltisb      9,-1
   111		LVX (OUT)(R0), OUTHEAD         // lvx   10,0,5
   112		VPERM OUTMASK, ZERO, OUTPERM, OUTMASK  // vperm 9,9,0,8
   113	
   114		BLT loop128                    // blt   .Loop128
   115		ADD $8, INP, INP               // addi  3,3,8
   116		BEQ l192                       // beq   .L192
   117		ADD $8, INP, INP               // addi  3,3,8
   118		JMP l256                       // b     .L256
   119	
   120	loop128:
   121		// Key schedule (Round 1 to 8)
   122		VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5         Rotate-n-splat
   123		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   124		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8    Rotate
   125		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   126		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   127		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   128		STVX STAGE, (OUT+R0)           // stvx 7,0,5        Write to output
   129		ADD $16, OUT, OUT              // addi 5,5,16       Point to the next round
   130	
   131		VXOR IN0, TMP, IN0             // vxor 1,1,6
   132		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   133		VXOR IN0, TMP, IN0             // vxor 1,1,6
   134		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   135		VXOR IN0, TMP, IN0             // vxor 1,1,6
   136		VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   137		VXOR IN0, KEY, IN0             // vxor 1,1,3
   138		BC 0x10, 0, loop128            // bdnz .Loop128
   139	
   140		LVX (PTR)(R0), RCON            // lvx 4,0,6     Last two round keys
   141	
   142		// Key schedule (Round 9)
   143		VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5   Rotate-n-spat
   144		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   145		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8  Rotate
   146		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   147		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   148		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   149		STVX STAGE, (OUT+R0)           // stvx 7,0,5   Round 9
   150		ADD $16, OUT, OUT              // addi 5,5,16
   151	
   152		// Key schedule (Round 10)
   153		VXOR IN0, TMP, IN0             // vxor 1,1,6
   154		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   155		VXOR IN0, TMP, IN0             // vxor 1,1,6
   156		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   157		VXOR IN0, TMP, IN0             // vxor 1,1,6
   158		VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   159		VXOR IN0, KEY, IN0             // vxor 1,1,3
   160	
   161		VPERM IN0, IN0, MASK, KEY      // vperm 3,1,1,5   Rotate-n-splat
   162		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   163		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8  Rotate
   164		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   165		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   166		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   167		STVX STAGE, (OUT+R0)           // stvx 7,0,5    Round 10
   168		ADD $16, OUT, OUT              // addi 5,5,16
   169	
   170		// Key schedule (Round 11)
   171		VXOR IN0, TMP, IN0             // vxor 1,1,6
   172		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   173		VXOR IN0, TMP, IN0             // vxor 1,1,6
   174		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   175		VXOR IN0, TMP, IN0             // vxor 1,1,6
   176		VXOR IN0, KEY, IN0             // vxor 1,1,3
   177		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   178		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   179		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   180		STVX STAGE, (OUT+R0)           // stvx 7,0,5  Round 11
   181	
   182		ADD $15, OUT, INP              // addi  3,5,15
   183		ADD $0x50, OUT, OUT            // addi  5,5,0x50
   184	
   185		MOVD $10, ROUNDS               // li    8,10
   186		JMP done                       // b     .Ldone
   187	
   188	l192:
   189		LVX (INP)(R0), TMP             // lvx 6,0,3
   190		MOVD $4, CNT                   // li 7,4
   191		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   192		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   193		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   194		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   195		ADD $16, OUT, OUT              // addi 5,5,16
   196		VPERM IN1, TMP, KEY, IN1       // vperm 2,2,6,3
   197		VSPLTISB $8, KEY               // vspltisb 3,8
   198		MOVD CNT, CTR                  // mtctr 7
   199		VSUBUBM MASK, KEY, MASK        // vsububm 5,5,3
   200	
   201	loop192:
   202		VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   203		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   204		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   205	
   206		VXOR IN0, TMP, IN0             // vxor 1,1,6
   207		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   208		VXOR IN0, TMP, IN0             // vxor 1,1,6
   209		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   210		VXOR IN0, TMP, IN0             // vxor 1,1,6
   211	
   212		VSLDOI $8, ZERO, IN1, STAGE    // vsldoi 7,0,2,8
   213		VSPLTW $3, IN0, TMP            // vspltw 6,1,3
   214		VXOR TMP, IN1, TMP             // vxor 6,6,2
   215		VSLDOI $12, ZERO, IN1, IN1     // vsldoi 2,0,2,12
   216		VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   217		VXOR IN1, TMP, IN1             // vxor 2,2,6
   218		VXOR IN0, KEY, IN0             // vxor 1,1,3
   219		VXOR IN1, KEY, IN1             // vxor 2,2,3
   220		VSLDOI $8, STAGE, IN0, STAGE   // vsldoi 7,7,1,8
   221	
   222		VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   223		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   224		VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
   225		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   226		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   227		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   228		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   229		ADD $16, OUT, OUT              // addi 5,5,16
   230	
   231		VSLDOI $8, IN0, IN1, STAGE     // vsldoi 7,1,2,8
   232		VXOR IN0, TMP, IN0             // vxor 1,1,6
   233		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   234		VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8
   235		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   236		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   237		VXOR IN0, TMP, IN0             // vxor 1,1,6
   238		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   239		VXOR IN0, TMP, IN0             // vxor 1,1,6
   240		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   241		ADD $16, OUT, OUT              // addi 5,5,16
   242	
   243		VSPLTW $3, IN0, TMP            // vspltw 6,1,3
   244		VXOR TMP, IN1, TMP             // vxor 6,6,2
   245		VSLDOI $12, ZERO, IN1, IN1     // vsldoi 2,0,2,12
   246		VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   247		VXOR IN1, TMP, IN1             // vxor 2,2,6
   248		VXOR IN0, KEY, IN0             // vxor 1,1,3
   249		VXOR IN1, KEY, IN1             // vxor 2,2,3
   250		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   251		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   252		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   253		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   254		ADD $15, OUT, INP              // addi 3,5,15
   255		ADD $16, OUT, OUT              // addi 5,5,16
   256		BC 0x10, 0, loop192           // bdnz .Loop192
   257	
   258		MOVD $12, ROUNDS               // li 8,12
   259		ADD $0x20, OUT, OUT            // addi 5,5,0x20
   260		JMP done                       // b .Ldone
   261	
   262	l256:
   263		LVX (INP)(R0), TMP             // lvx 6,0,3
   264		MOVD $7, CNT                   // li 7,7
   265		MOVD $14, ROUNDS               // li 8,14
   266		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   267		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   268		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   269		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   270		ADD $16, OUT, OUT              // addi 5,5,16
   271		VPERM IN1, TMP, KEY, IN1       // vperm 2,2,6,3
   272		MOVD CNT, CTR                  // mtctr 7
   273	
   274	loop256:
   275		VPERM IN1, IN1, MASK, KEY      // vperm 3,2,2,5
   276		VSLDOI $12, ZERO, IN0, TMP     // vsldoi 6,0,1,12
   277		VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8
   278		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   279		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   280		VCIPHERLAST KEY, RCON, KEY     // vcipherlast 3,3,4
   281		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   282		ADD $16, OUT, OUT              // addi 5,5,16
   283	
   284		VXOR IN0, TMP, IN0             // vxor 1,1,6
   285		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   286		VXOR IN0, TMP, IN0             // vxor 1,1,6
   287		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   288		VXOR IN0, TMP, IN0             // vxor 1,1,6
   289		VADDUWM RCON, RCON, RCON       // vadduwm 4,4,4
   290		VXOR IN0, KEY, IN0             // vxor 1,1,3
   291		VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8
   292		VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
   293		VOR OUTTAIL, OUTTAIL, OUTHEAD  // vor 10,11,11
   294		STVX STAGE, (OUT+R0)           // stvx 7,0,5
   295		ADD $15, OUT, INP              // addi 3,5,15
   296		ADD $16, OUT, OUT              // addi 5,5,16
   297		BC 0x12, 0, done               // bdz .Ldone
   298	
   299		VSPLTW $3, IN0, KEY            // vspltw 3,1,3
   300		VSLDOI $12, ZERO, IN1, TMP     // vsldoi 6,0,2,12
   301		VSBOX KEY, KEY                 // vsbox 3,3
   302	
   303		VXOR IN1, TMP, IN1             // vxor 2,2,6
   304		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   305		VXOR IN1, TMP, IN1             // vxor 2,2,6
   306		VSLDOI $12, ZERO, TMP, TMP     // vsldoi 6,0,6,12
   307		VXOR IN1, TMP, IN1             // vxor 2,2,6
   308	
   309		VXOR IN1, KEY, IN1             // vxor 2,2,3
   310		JMP loop256                    // b .Loop256
   311	
   312	done:
   313		LVX (INP)(R0), IN1             // lvx   2,0,3
   314		VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
   315		STVX IN1, (INP+R0)             // stvx  2,0,3
   316		MOVD $0, PTR                   // li    6,0    set PTR to 0 (exit code 0)
   317		MOVW ROUNDS, 0(OUT)            // stw   8,0(5)
   318	
   319	enc_key_abort:
   320		MOVD PTR, INP                  // mr    3,6    set exit code with PTR value
   321		MOVD INP, ret+24(FP)           // Put return value into the FP
   322		RET                            // blr
   323	
   324	// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
   325	TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0
   326		// Load the arguments inside the registers
   327		MOVD key+0(FP), INP
   328		MOVD keylen+8(FP), BITS
   329		MOVD dec+16(FP), OUT
   330	
   331		MOVD LR, R10                   // mflr 10
   332		CALL ·doEncryptKeyAsm(SB)
   333		MOVD R10, LR                   // mtlr 10
   334	
   335		CMPW INP, $0                   // cmpwi 3,0  exit 0 = ok
   336		BC 0x06, 2, dec_key_abort      // bne- .Ldec_key_abort
   337	
   338		// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
   339		SLW $4, ROUNDS, CNT            // slwi 7,8,4
   340		SUB $240, OUT, INP             // subi 3,5,240
   341		SRW $1, ROUNDS, ROUNDS         // srwi 8,8,1
   342		ADD R7, INP, OUT               // add 5,3,7
   343		MOVD ROUNDS, CTR               // mtctr 8
   344	
   345	// dec_key will invert the key sequence in order to be used for decrypt
   346	dec_key:
   347		MOVWZ 0(INP), TEMP             // lwz 0, 0(3)
   348		MOVWZ 4(INP), R6               // lwz 6, 4(3)
   349		MOVWZ 8(INP), R7               // lwz 7, 8(3)
   350		MOVWZ 12(INP), R8              // lwz 8, 12(3)
   351		ADD $16, INP, INP              // addi 3,3,16
   352		MOVWZ 0(OUT), R9               // lwz 9, 0(5)
   353		MOVWZ 4(OUT), R10              // lwz 10,4(5)
   354		MOVWZ 8(OUT), R11              // lwz 11,8(5)
   355		MOVWZ 12(OUT), R12             // lwz 12,12(5)
   356		MOVW TEMP, 0(OUT)              // stw 0, 0(5)
   357		MOVW R6, 4(OUT)                // stw 6, 4(5)
   358		MOVW R7, 8(OUT)                // stw 7, 8(5)
   359		MOVW R8, 12(OUT)               // stw 8, 12(5)
   360		SUB $16, OUT, OUT              // subi 5,5,16
   361		MOVW R9, -16(INP)              // stw 9, -16(3)
   362		MOVW R10, -12(INP)             // stw 10,-12(3)
   363		MOVW R11, -8(INP)              // stw 11,-8(3)
   364		MOVW R12, -4(INP)              // stw 12,-4(3)
   365		BC 0x10, 0, dec_key            // bdnz .Ldeckey
   366	
   367		XOR R3, R3, R3                 // xor 3,3,3      Clean R3
   368	
   369	dec_key_abort:
   370		MOVD R3, ret+24(FP)            // Put return value into the FP
   371		RET                            // blr
   372	
   373	
   374	// func encryptBlockAsm(dst, src *byte, enc *uint32)
   375	TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
   376		// Load the arguments inside the registers
   377		MOVD dst+0(FP), BLK_OUT
   378		MOVD src+8(FP), BLK_INP
   379		MOVD enc+16(FP), BLK_KEY
   380	
   381		MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   382		MOVD $15, BLK_IDX              // li 7,15
   383	
   384		LVX (BLK_INP)(R0), ZERO        // lvx 0,0,3
   385		NEG BLK_OUT, R11               // neg 11,4
   386		LVX (BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   387		LVSL (BLK_INP)(R0), IN1        // lvsl 2,0,3
   388		VSPLTISB $0x0f, RCON           // vspltisb 4,0x0f
   389		LVSR (R11)(R0), KEY            // lvsr 3,0,11
   390		VXOR IN1, RCON, IN1            // vxor 2,2,4
   391		MOVD $16, BLK_IDX              // li 7,16
   392		VPERM ZERO, IN0, IN1, ZERO     // vperm 0,0,1,2
   393		LVX (BLK_KEY)(R0), IN0         // lvx 1,0,5
   394		LVSR (BLK_KEY)(R0), MASK       // lvsr 5,0,5
   395		SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   396		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   397		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   398		SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   399		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   400	
   401		VXOR ZERO, IN0, ZERO           // vxor 0,0,1
   402		LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   403		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   404		MOVD BLK_ROUNDS, CTR           // mtctr 6
   405	
   406	loop_enc:
   407		VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   408		VCIPHER ZERO, IN1, ZERO        // vcipher 0,0,2
   409		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   410		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   411		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   412		VCIPHER ZERO, IN0, ZERO        // vcipher 0,0,1
   413		LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   414		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   415		BC 0x10, 0, loop_enc           // bdnz .Loop_enc
   416	
   417		VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   418		VCIPHER ZERO, IN1, ZERO        // vcipher 0,0,2
   419		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   420		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   421		VCIPHERLAST ZERO, IN0, ZERO    // vcipherlast 0,0,1
   422	
   423		VSPLTISB $-1, IN1              // vspltisb 2,-1
   424		VXOR IN0, IN0, IN0             // vxor 1,1,1
   425		MOVD $15, BLK_IDX              // li 7,15
   426		VPERM IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   427		VXOR KEY, RCON, KEY            // vxor 3,3,4
   428		LVX (BLK_OUT)(R0), IN0         // lvx 1,0,4
   429		VPERM ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   430		VSEL IN0, ZERO, IN1, IN0       // vsel 1,1,0,2
   431		LVX (BLK_OUT)(BLK_IDX), RCON   // lvx 4,7,4
   432		STVX IN0, (BLK_OUT+R0)         // stvx 1,0,4
   433		VSEL ZERO, RCON, IN1, ZERO     // vsel 0,0,4,2
   434		STVX ZERO, (BLK_OUT+BLK_IDX)   // stvx 0,7,4
   435	
   436		RET                            // blr
   437	
   438	
   439	// func decryptBlockAsm(dst, src *byte, dec *uint32)
   440	TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0
   441		// Load the arguments inside the registers
   442		MOVD dst+0(FP), BLK_OUT
   443		MOVD src+8(FP), BLK_INP
   444		MOVD dec+16(FP), BLK_KEY
   445	
   446		MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
   447		MOVD $15, BLK_IDX              // li 7,15
   448	
   449		LVX (BLK_INP)(R0), ZERO        // lvx 0,0,3
   450		NEG BLK_OUT, R11               // neg 11,4
   451		LVX (BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
   452		LVSL (BLK_INP)(R0), IN1        // lvsl 2,0,3
   453		VSPLTISB $0x0f, RCON           // vspltisb 4,0x0f
   454		LVSR (R11)(R0), KEY            // lvsr 3,0,11
   455		VXOR IN1, RCON, IN1            // vxor 2,2,4
   456		MOVD $16, BLK_IDX              // li 7,16
   457		VPERM ZERO, IN0, IN1, ZERO     // vperm 0,0,1,2
   458		LVX (BLK_KEY)(R0), IN0         // lvx 1,0,5
   459		LVSR (BLK_KEY)(R0), MASK       // lvsr 5,0,5
   460		SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
   461		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   462		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   463		SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
   464		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   465	
   466		VXOR ZERO, IN0, ZERO           // vxor 0,0,1
   467		LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   468		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   469		MOVD BLK_ROUNDS, CTR           // mtctr 6
   470	
   471	loop_dec:
   472		VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   473		VNCIPHER ZERO, IN1, ZERO       // vncipher 0,0,2
   474		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   475		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   476		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   477		VNCIPHER ZERO, IN0, ZERO       // vncipher 0,0,1
   478		LVX (BLK_KEY)(BLK_IDX), IN0    // lvx 1,7,5
   479		ADD $16, BLK_IDX, BLK_IDX      // addi 7,7,16
   480		BC 0x10, 0, loop_dec           // bdnz .Loop_dec
   481	
   482		VPERM IN0, IN1, MASK, IN1      // vperm 2,1,2,5
   483		VNCIPHER ZERO, IN1, ZERO       // vncipher 0,0,2
   484		LVX (BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
   485		VPERM IN1, IN0, MASK, IN0      // vperm 1,2,1,5
   486		VNCIPHERLAST ZERO, IN0, ZERO   // vncipherlast 0,0,1
   487	
   488		VSPLTISB $-1, IN1              // vspltisb 2,-1
   489		VXOR IN0, IN0, IN0             // vxor 1,1,1
   490		MOVD $15, BLK_IDX              // li 7,15
   491		VPERM IN1, IN0, KEY, IN1       // vperm 2,2,1,3
   492		VXOR KEY, RCON, KEY            // vxor 3,3,4
   493		LVX (BLK_OUT)(R0), IN0         // lvx 1,0,4
   494		VPERM ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
   495		VSEL IN0, ZERO, IN1, IN0       // vsel 1,1,0,2
   496		LVX (BLK_OUT)(BLK_IDX), RCON   // lvx 4,7,4
   497		STVX IN0, (BLK_OUT+R0)         // stvx 1,0,4
   498		VSEL ZERO, RCON, IN1, ZERO     // vsel 0,0,4,2
   499		STVX ZERO, (BLK_OUT+BLK_IDX)   // stvx 0,7,4
   500	
   501		RET                            // blr

View as plain text