...

Text file src/crypto/cipher/xor_ppc64x.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64 ppc64le
     6	
     7	#include "textflag.h"
     8	
     9	// func xorBytesVSX(dst, a, b *byte, n int)
    10	TEXT ·xorBytesVSX(SB), NOSPLIT, $0
    11		MOVD	dst+0(FP), R3	// R3 = dst
    12		MOVD	a+8(FP), R4	// R4 = a
    13		MOVD	b+16(FP), R5	// R5 = b
    14		MOVD	n+24(FP), R6	// R6 = n
    15	
    16		CMPU	R6, $16, CR7	// Check if n ≥ 16 bytes
    17		MOVD	R0, R8		// R8 = index
    18		CMPU	R6, $8, CR6	// Check if 8 ≤ n < 16 bytes
    19		BGE	CR7, preloop16
    20		BLT	CR6, small
    21	
    22		// Case for 8 ≤ n < 16 bytes
    23		MOVD	(R4)(R8), R14	// R14 = a[i,...,i+7]
    24		MOVD	(R5)(R8), R15	// R15 = b[i,...,i+7]
    25		XOR	R14, R15, R16	// R16 = a[] ^ b[]
    26		SUB	$8, R6		// n = n - 8
    27		MOVD	R16, (R3)(R8)	// Store to dst
    28		ADD	$8, R8
    29	
    30		// Check if we're finished
    31		CMP	R6, R0
    32		BGT	small
    33		JMP	done
    34	
    35		// Case for n ≥ 16 bytes
    36	preloop16:
    37		SRD	$4, R6, R7	// Setup loop counter
    38		MOVD	R7, CTR
    39		ANDCC	$15, R6, R9	// Check for tailing bytes for later
    40	loop16:
    41		LXVD2X		(R4)(R8), VS32		// VS32 = a[i,...,i+15]
    42		LXVD2X		(R5)(R8), VS33		// VS33 = b[i,...,i+15]
    43		XXLXOR		VS32, VS33, VS34	// VS34 = a[] ^ b[]
    44		STXVD2X		VS34, (R3)(R8)		// Store to dst
    45		ADD		$16, R8			// Update index
    46		BC		16, 0, loop16		// bdnz loop16
    47	
    48		BEQ		CR0, done
    49		SLD		$4, R7
    50		SUB		R7, R6			// R6 = n - (R7 * 16)
    51	
    52		// Case for n < 8 bytes and tailing bytes from the
    53		// previous cases.
    54	small:
    55		MOVD	R6, CTR		// Setup loop counter
    56	
    57	loop:
    58		MOVBZ	(R4)(R8), R14	// R14 = a[i]
    59		MOVBZ	(R5)(R8), R15	// R15 = b[i]
    60		XOR	R14, R15, R16	// R16 = a[i] ^ b[i]
    61		MOVB	R16, (R3)(R8)	// Store to dst
    62		ADD	$1, R8
    63		BC	16, 0, loop	// bdnz loop
    64	
    65	done:
    66		RET

View as plain text