...

Text file src/crypto/cipher/xor_amd64.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// func xorBytesSSE2(dst, a, b *byte, n int)
     8	TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
     9		MOVQ  dst+0(FP), BX
    10		MOVQ  a+8(FP), SI
    11		MOVQ  b+16(FP), CX
    12		MOVQ  n+24(FP), DX
    13		TESTQ $15, DX            // AND 15 & len, if not zero jump to not_aligned.
    14		JNZ   not_aligned
    15	
    16	aligned:
    17		MOVQ $0, AX // position in slices
    18	
    19	loop16b:
    20		MOVOU (SI)(AX*1), X0   // XOR 16byte forwards.
    21		MOVOU (CX)(AX*1), X1
    22		PXOR  X1, X0
    23		MOVOU X0, (BX)(AX*1)
    24		ADDQ  $16, AX
    25		CMPQ  DX, AX
    26		JNE   loop16b
    27		RET
    28	
    29	loop_1b:
    30		SUBQ  $1, DX           // XOR 1byte backwards.
    31		MOVB  (SI)(DX*1), DI
    32		MOVB  (CX)(DX*1), AX
    33		XORB  AX, DI
    34		MOVB  DI, (BX)(DX*1)
    35		TESTQ $7, DX           // AND 7 & len, if not zero jump to loop_1b.
    36		JNZ   loop_1b
    37		CMPQ  DX, $0           // if len is 0, ret.
    38		JE    ret
    39		TESTQ $15, DX          // AND 15 & len, if zero jump to aligned.
    40		JZ    aligned
    41	
    42	not_aligned:
    43		TESTQ $7, DX           // AND $7 & len, if not zero jump to loop_1b.
    44		JNE   loop_1b
    45		SUBQ  $8, DX           // XOR 8bytes backwards.
    46		MOVQ  (SI)(DX*1), DI
    47		MOVQ  (CX)(DX*1), AX
    48		XORQ  AX, DI
    49		MOVQ  DI, (BX)(DX*1)
    50		CMPQ  DX, $16          // if len is greater or equal 16 here, it must be aligned.
    51		JGE   aligned
    52	
    53	ret:
    54		RET

View as plain text