...

Text file src/crypto/sha256/sha256block_ppc64le.s

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Based on CRYPTOGAMS code with the following comment:
     6	// # ====================================================================
     7	// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     8	// # project. The module is, however, dual licensed under OpenSSL and
     9	// # CRYPTOGAMS licenses depending on where you obtain it. For further
    10	// # details see http://www.openssl.org/~appro/cryptogams/.
    11	// # ====================================================================
    12	
    13	#include "textflag.h"
    14	
    15	// SHA256 block routine. See sha256block.go for Go equivalent.
    16	//
    17	// The algorithm is detailed in FIPS 180-4:
    18	//
    19	//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    20	//
    21	// Wt = Mt; for 0 <= t <= 15
    22	// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    23	//
    24	// a = H0
    25	// b = H1
    26	// c = H2
    27	// d = H3
    28	// e = H4
    29	// f = H5
    30	// g = H6
    31	// h = H7
    32	//
    33	// for t = 0 to 63 {
    34	//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    35	//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    36	//    h = g
    37	//    g = f
    38	//    f = e
    39	//    e = d + T1
    40	//    d = c
    41	//    c = b
    42	//    b = a
    43	//    a = T1 + T2
    44	// }
    45	//
    46	// H0 = a + H0
    47	// H1 = b + H1
    48	// H2 = c + H2
    49	// H3 = d + H3
    50	// H4 = e + H4
    51	// H5 = f + H5
    52	// H6 = g + H6
    53	// H7 = h + H7
    54	
    55	#define CTX	R3
    56	#define INP	R4
    57	#define END	R5
    58	#define TBL	R6
    59	#define IDX	R7
    60	#define CNT	R8
    61	#define LEN	R9
    62	#define OFFLOAD	R11
    63	#define TEMP	R12
    64	
    65	#define HEX00	R0
    66	#define HEX10	R10
    67	#define HEX20	R25
    68	#define HEX30	R26
    69	#define HEX40	R27
    70	#define HEX50	R28
    71	#define HEX60	R29
    72	#define HEX70	R31
    73	
    74	// V0-V7 are A-H
    75	// V8-V23 are used for the message schedule
    76	#define KI	V24
    77	#define FUNC	V25
    78	#define S0	V26
    79	#define S1	V27
    80	#define s0	V28
    81	#define s1	V29
    82	#define LEMASK	V31	// Permutation control register for little endian
    83	
    84	// 4 copies of each Kt, to fill all 4 words of a vector register
    85	DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    86	DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    87	DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
    88	DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
    89	DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
    90	DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
    91	DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
    92	DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
    93	DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
    94	DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
    95	DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
    96	DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
    97	DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
    98	DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
    99	DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
   100	DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
   101	DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
   102	DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   103	DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   104	DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   105	DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   106	DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   107	DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   108	DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   109	DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   110	DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   111	DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   112	DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   113	DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   114	DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   115	DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   116	DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   117	DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   118	DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   119	DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   120	DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   121	DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   122	DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   123	DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   124	DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   125	DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   126	DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   127	DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   128	DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   129	DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   130	DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   131	DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   132	DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   133	DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   134	DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   135	DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   136	DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   137	DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   138	DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   139	DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   140	DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   141	DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   142	DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   143	DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   144	DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   145	DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   146	DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   147	DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   148	DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   149	DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   150	DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   151	DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   152	DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   153	DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   154	DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   155	DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   156	DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   157	DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   158	DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   159	DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   160	DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   161	DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   162	DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   163	DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   164	DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   165	DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   166	DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   167	DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   168	DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   169	DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   170	DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   171	DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   172	DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   173	DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   174	DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   175	DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   176	DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   177	DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   178	DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   179	DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   180	DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   181	DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   182	DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   183	DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   184	DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   185	DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   186	DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   187	DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   188	DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   189	DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   190	DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   191	DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   192	DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   193	DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   194	DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   195	DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   196	DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   197	DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   198	DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   199	DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   200	DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   201	DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   202	DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   203	DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   204	DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   205	DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   206	DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   207	DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   208	DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   209	DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   210	DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   211	DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   212	DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   213	DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   214	DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   215	DATA  ·kcon+0x410(SB)/8, $0x1011121310111213	// permutation control vectors
   216	DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   217	DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   218	DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   219	DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   220	DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   221	GLOBL ·kcon(SB), RODATA, $1088
   222	
   223	#define SHA256ROUND0(a, b, c, d, e, f, g, h, xi) \
   224		VSEL		g, f, e, FUNC; \
   225		VSHASIGMAW	$15, e, $1, S1; \
   226		VADDUWM		xi, h, h; \
   227		VSHASIGMAW	$0, a, $1, S0; \
   228		VADDUWM		FUNC, h, h; \
   229		VXOR		b, a, FUNC; \
   230		VADDUWM		S1, h, h; \
   231		VSEL		b, c, FUNC, FUNC; \
   232		VADDUWM		KI, g, g; \
   233		VADDUWM		h, d, d; \
   234		VADDUWM		FUNC, S0, S0; \
   235		LVX		(TBL)(IDX), KI; \
   236		ADD		$16, IDX; \
   237		VADDUWM		S0, h, h
   238	
   239	#define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \
   240		VSHASIGMAW	$0, xj_1, $0, s0; \
   241		VSEL		g, f, e, FUNC; \
   242		VSHASIGMAW	$15, e, $1, S1; \
   243		VADDUWM		xi, h, h; \
   244		VSHASIGMAW	$0, a, $1, S0; \
   245		VSHASIGMAW	$15, xj_14, $0, s1; \
   246		VADDUWM		FUNC, h, h; \
   247		VXOR		b, a, FUNC; \
   248		VADDUWM		xj_9, xj, xj; \
   249		VADDUWM		S1, h, h; \
   250		VSEL		b, c, FUNC, FUNC; \
   251		VADDUWM		KI, g, g; \
   252		VADDUWM		h, d, d; \
   253		VADDUWM		FUNC, S0, S0; \
   254		VADDUWM		s0, xj, xj; \
   255		LVX		(TBL)(IDX), KI; \
   256		ADD		$16, IDX; \
   257		VADDUWM		S0, h, h; \
   258		VADDUWM		s1, xj, xj
   259	
   260	// func block(dig *digest, p []byte)
   261	TEXT ·block(SB),0,$128-32
   262		MOVD	dig+0(FP), CTX
   263		MOVD	p_base+8(FP), INP
   264		MOVD	p_len+16(FP), LEN
   265	
   266		SRD	$6, LEN
   267		SLD	$6, LEN
   268	
   269		ADD	INP, LEN, END
   270	
   271		CMP	INP, END
   272		BEQ	end
   273	
   274		MOVD	$·kcon(SB), TBL
   275		MOVD	R1, OFFLOAD
   276	
   277		MOVD	R0, CNT
   278		MOVWZ	$0x10, HEX10
   279		MOVWZ	$0x20, HEX20
   280		MOVWZ	$0x30, HEX30
   281		MOVWZ	$0x40, HEX40
   282		MOVWZ	$0x50, HEX50
   283		MOVWZ	$0x60, HEX60
   284		MOVWZ	$0x70, HEX70
   285	
   286		MOVWZ	$8, IDX
   287		LVSL	(IDX)(R0), LEMASK
   288		VSPLTISB	$0x0F, KI
   289		VXOR	KI, LEMASK, LEMASK
   290	
   291		LXVW4X	(CTX)(HEX00), VS32	// v0 = vs32
   292		LXVW4X	(CTX)(HEX10), VS36	// v4 = vs36
   293	
   294		// unpack the input values into vector registers
   295		VSLDOI	$4, V0, V0, V1
   296		VSLDOI	$8, V0, V0, V2
   297		VSLDOI	$12, V0, V0, V3
   298		VSLDOI	$4, V4, V4, V5
   299		VSLDOI	$8, V4, V4, V6
   300		VSLDOI	$12, V4, V4, V7
   301	
   302	loop:
   303		LVX	(TBL)(HEX00), KI
   304		MOVWZ	$16, IDX
   305	
   306		LXVD2X	(INP)(R0), VS40	// load v8 (=vs40) in advance
   307		ADD	$16, INP
   308	
   309		STVX	V0, (OFFLOAD+HEX00)
   310		STVX	V1, (OFFLOAD+HEX10)
   311		STVX	V2, (OFFLOAD+HEX20)
   312		STVX	V3, (OFFLOAD+HEX30)
   313		STVX	V4, (OFFLOAD+HEX40)
   314		STVX	V5, (OFFLOAD+HEX50)
   315		STVX	V6, (OFFLOAD+HEX60)
   316		STVX	V7, (OFFLOAD+HEX70)
   317	
   318		VADDUWM	KI, V7, V7	// h+K[i]
   319		LVX	(TBL)(IDX), KI
   320		ADD	$16, IDX
   321	
   322		VPERM	V8, V8, LEMASK, V8
   323		SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8)
   324		VSLDOI	$4, V8, V8, V9
   325		SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9)
   326		VSLDOI	$4, V9, V9, V10
   327		SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10)
   328		LXVD2X	(INP)(R0), VS44	// load v12 (=vs44) in advance
   329		ADD	$16, INP, INP
   330		VSLDOI	$4, V10, V10, V11
   331		SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11)
   332		VPERM	V12, V12, LEMASK, V12
   333		SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12)
   334		VSLDOI	$4, V12, V12, V13
   335		SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13)
   336		VSLDOI	$4, V13, V13, V14
   337		SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14)
   338		LXVD2X	(INP)(R0), VS48	// load v16 (=vs48) in advance
   339		ADD	$16, INP, INP
   340		VSLDOI	$4, V14, V14, V15
   341		SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15)
   342		VPERM	V16, V16, LEMASK, V16
   343		SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16)
   344		VSLDOI	$4, V16, V16, V17
   345		SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17)
   346		VSLDOI	$4, V17, V17, V18
   347		SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18)
   348		VSLDOI	$4, V18, V18, V19
   349		LXVD2X	(INP)(R0), VS52	// load v20 (=vs52) in advance
   350		ADD	$16, INP, INP
   351		SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19)
   352		VPERM	V20, V20, LEMASK, V20
   353		SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20)
   354		VSLDOI	$4, V20, V20, V21
   355		SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21)
   356		VSLDOI	$4, V21, V21, V22
   357		SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22)
   358		VSLDOI	$4, V22, V22, V23
   359		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   360	
   361		MOVWZ	$3, TEMP
   362		MOVWZ	TEMP, CTR
   363	
   364	L16_xx:
   365		SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23)
   366		SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8)
   367		SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9)
   368		SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10)
   369		SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11)
   370		SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12)
   371		SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13)
   372		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14)
   373		SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15)
   374		SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16)
   375		SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17)
   376		SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18)
   377		SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19)
   378		SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20)
   379		SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21)
   380		SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22)
   381	
   382		BC	0x10, 0, L16_xx		// bdnz
   383	
   384		LVX	(OFFLOAD)(HEX00), V10
   385	
   386		LVX	(OFFLOAD)(HEX10), V11
   387		VADDUWM	V10, V0, V0
   388		LVX	(OFFLOAD)(HEX20), V12
   389		VADDUWM	V11, V1, V1
   390		LVX	(OFFLOAD)(HEX30), V13
   391		VADDUWM	V12, V2, V2
   392		LVX	(OFFLOAD)(HEX40), V14
   393		VADDUWM	V13, V3, V3
   394		LVX	(OFFLOAD)(HEX50), V15
   395		VADDUWM	V14, V4, V4
   396		LVX	(OFFLOAD)(HEX60), V16
   397		VADDUWM	V15, V5, V5
   398		LVX	(OFFLOAD)(HEX70), V17
   399		VADDUWM	V16, V6, V6
   400		VADDUWM	V17, V7, V7
   401	
   402		CMPU	INP, END
   403		BLT	loop
   404	
   405		LVX	(TBL)(IDX), V8
   406		ADD	$16, IDX
   407		VPERM	V0, V1, KI, V0
   408		LVX	(TBL)(IDX), V9
   409		VPERM	V4, V5, KI, V4
   410		VPERM	V0, V2, V8, V0
   411		VPERM	V4, V6, V8, V4
   412		VPERM	V0, V3, V9, V0
   413		VPERM	V4, V7, V9, V4
   414		STXVD2X	VS32, (CTX+HEX00)	// v0 = vs32
   415		STXVD2X	VS36, (CTX+HEX10)	// v4 = vs36
   416	
   417	end:
   418		RET
   419	

View as plain text