...

Text file src/pkg/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s

     1	// Copyright 2019 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64le,!gccgo,!appengine
     6	
     7	#include "textflag.h"
     8	
     9	// This was ported from the amd64 implementation.
    10	
    11	#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
    12		MOVD (msg), t0;  \
    13		MOVD 8(msg), t1; \
    14		MOVD $1, t2;     \
    15		ADDC t0, h0, h0; \
    16		ADDE t1, h1, h1; \
    17		ADDE t2, h2;     \
    18		ADD  $16, msg
    19	
    20	#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
    21		MULLD  r0, h0, t0;  \
    22		MULLD  r0, h1, t4;  \
    23		MULHDU r0, h0, t1;  \
    24		MULHDU r0, h1, t5;  \
    25		ADDC   t4, t1, t1;  \
    26		MULLD  r0, h2, t2;  \
    27		ADDZE  t5;          \
    28		MULHDU r1, h0, t4;  \
    29		MULLD  r1, h0, h0;  \
    30		ADD    t5, t2, t2;  \
    31		ADDC   h0, t1, t1;  \
    32		MULLD  h2, r1, t3;  \
    33		ADDZE  t4, h0;      \
    34		MULHDU r1, h1, t5;  \
    35		MULLD  r1, h1, t4;  \
    36		ADDC   t4, t2, t2;  \
    37		ADDE   t5, t3, t3;  \
    38		ADDC   h0, t2, t2;  \
    39		MOVD   $-4, t4;     \
    40		MOVD   t0, h0;      \
    41		MOVD   t1, h1;      \
    42		ADDZE  t3;          \
    43		ANDCC  $3, t2, h2;  \
    44		AND    t2, t4, t0;  \
    45		ADDC   t0, h0, h0;  \
    46		ADDE   t3, h1, h1;  \
    47		SLD    $62, t3, t4; \
    48		SRD    $2, t2;      \
    49		ADDZE  h2;          \
    50		OR     t4, t2, t2;  \
    51		SRD    $2, t3;      \
    52		ADDC   t2, h0, h0;  \
    53		ADDE   t3, h1, h1;  \
    54		ADDZE  h2
    55	
    56	DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
    57	DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
    58	GLOBL ·poly1305Mask<>(SB), RODATA, $16
    59	
    60	// func update(state *[7]uint64, msg []byte)
    61	
    62	TEXT ·update(SB), $0-32
    63		MOVD state+0(FP), R3
    64		MOVD msg_base+8(FP), R4
    65		MOVD msg_len+16(FP), R5
    66	
    67		MOVD 0(R3), R8   // h0
    68		MOVD 8(R3), R9   // h1
    69		MOVD 16(R3), R10 // h2
    70		MOVD 24(R3), R11 // r0
    71		MOVD 32(R3), R12 // r1
    72	
    73		CMP R5, $16
    74		BLT bytes_between_0_and_15
    75	
    76	loop:
    77		POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
    78	
    79	multiply:
    80		POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
    81		ADD $-16, R5
    82		CMP R5, $16
    83		BGE loop
    84	
    85	bytes_between_0_and_15:
    86		CMP  $0, R5
    87		BEQ  done
    88		MOVD $0, R16 // h0
    89		MOVD $0, R17 // h1
    90	
    91	flush_buffer:
    92		CMP R5, $8
    93		BLE just1
    94	
    95		MOVD $8, R21
    96		SUB  R21, R5, R21
    97	
    98		// Greater than 8 -- load the rightmost remaining bytes in msg
    99		// and put into R17 (h1)
   100		MOVD (R4)(R21), R17
   101		MOVD $16, R22
   102	
   103		// Find the offset to those bytes
   104		SUB R5, R22, R22
   105		SLD $3, R22
   106	
   107		// Shift to get only the bytes in msg
   108		SRD R22, R17, R17
   109	
   110		// Put 1 at high end
   111		MOVD $1, R23
   112		SLD  $3, R21
   113		SLD  R21, R23, R23
   114		OR   R23, R17, R17
   115	
   116		// Remainder is 8
   117		MOVD $8, R5
   118	
   119	just1:
   120		CMP R5, $8
   121		BLT less8
   122	
   123		// Exactly 8
   124		MOVD (R4), R16
   125	
   126		CMP $0, R17
   127	
   128		// Check if we've already set R17; if not
   129		// set 1 to indicate end of msg.
   130		BNE  carry
   131		MOVD $1, R17
   132		BR   carry
   133	
   134	less8:
   135		MOVD  $0, R16   // h0
   136		MOVD  $0, R22   // shift count
   137		CMP   R5, $4
   138		BLT   less4
   139		MOVWZ (R4), R16
   140		ADD   $4, R4
   141		ADD   $-4, R5
   142		MOVD  $32, R22
   143	
   144	less4:
   145		CMP   R5, $2
   146		BLT   less2
   147		MOVHZ (R4), R21
   148		SLD   R22, R21, R21
   149		OR    R16, R21, R16
   150		ADD   $16, R22
   151		ADD   $-2, R5
   152		ADD   $2, R4
   153	
   154	less2:
   155		CMP   $0, R5
   156		BEQ   insert1
   157		MOVBZ (R4), R21
   158		SLD   R22, R21, R21
   159		OR    R16, R21, R16
   160		ADD   $8, R22
   161	
   162	insert1:
   163		// Insert 1 at end of msg
   164		MOVD $1, R21
   165		SLD  R22, R21, R21
   166		OR   R16, R21, R16
   167	
   168	carry:
   169		// Add new values to h0, h1, h2
   170		ADDC R16, R8
   171		ADDE R17, R9
   172		ADDE $0, R10
   173		MOVD $16, R5
   174		ADD  R5, R4
   175		BR   multiply
   176	
   177	done:
   178		// Save h0, h1, h2 in state
   179		MOVD R8, 0(R3)
   180		MOVD R9, 8(R3)
   181		MOVD R10, 16(R3)
   182		RET
   183	
   184	// func initialize(state *[7]uint64, key *[32]byte)
   185	TEXT ·initialize(SB), $0-16
   186		MOVD state+0(FP), R3
   187		MOVD key+8(FP), R4
   188	
   189		// state[0...7] is initialized with zero
   190		// Load key
   191		MOVD 0(R4), R5
   192		MOVD 8(R4), R6
   193		MOVD 16(R4), R7
   194		MOVD 24(R4), R8
   195	
   196		// Address of key mask
   197		MOVD $·poly1305Mask<>(SB), R9
   198	
   199		// Save original key in state
   200		MOVD R7, 40(R3)
   201		MOVD R8, 48(R3)
   202	
   203		// Get mask
   204		MOVD (R9), R7
   205		MOVD 8(R9), R8
   206	
   207		// And with key
   208		AND R5, R7, R5
   209		AND R6, R8, R6
   210	
   211		// Save masked key in state
   212		MOVD R5, 24(R3)
   213		MOVD R6, 32(R3)
   214		RET
   215	
   216	// func finalize(tag *[TagSize]byte, state *[7]uint64)
   217	TEXT ·finalize(SB), $0-16
   218		MOVD tag+0(FP), R3
   219		MOVD state+8(FP), R4
   220	
   221		// Get h0, h1, h2 from state
   222		MOVD 0(R4), R5
   223		MOVD 8(R4), R6
   224		MOVD 16(R4), R7
   225	
   226		// Save h0, h1
   227		MOVD  R5, R8
   228		MOVD  R6, R9
   229		MOVD  $3, R20
   230		MOVD  $-1, R21
   231		SUBC  $-5, R5
   232		SUBE  R21, R6
   233		SUBE  R20, R7
   234		MOVD  $0, R21
   235		SUBZE R21
   236	
   237		// Check for carry
   238		CMP  $0, R21
   239		ISEL $2, R5, R8, R5
   240		ISEL $2, R6, R9, R6
   241		MOVD 40(R4), R8
   242		MOVD 48(R4), R9
   243		ADDC R8, R5
   244		ADDE R9, R6
   245		MOVD R5, 0(R3)
   246		MOVD R6, 8(R3)
   247		RET

View as plain text