...

Text file src/internal/bytealg/equal_amd64p32.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	// memequal(a, b unsafe.Pointer, size uintptr) bool
     9	TEXT runtime·memequal(SB),NOSPLIT,$0-17
    10		MOVL	a+0(FP), SI
    11		MOVL	b+4(FP), DI
    12		CMPL	SI, DI
    13		JEQ	eq
    14		MOVL	size+8(FP), BX
    15		CALL	memeqbody<>(SB)
    16		MOVB	AX, ret+16(FP)
    17		RET
    18	eq:
    19		MOVB    $1, ret+16(FP)
    20		RET
    21	
    22	// memequal_varlen(a, b unsafe.Pointer) bool
    23	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    24		MOVL    a+0(FP), SI
    25		MOVL    b+4(FP), DI
    26		CMPL    SI, DI
    27		JEQ     eq
    28		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    29		CALL    memeqbody<>(SB)
    30		MOVB    AX, ret+8(FP)
    31		RET
    32	eq:
    33		MOVB    $1, ret+8(FP)
    34		RET
    35	
    36	// a in SI
    37	// b in DI
    38	// count in BX
    39	TEXT memeqbody<>(SB),NOSPLIT,$0-0
    40		XORQ	AX, AX
    41	
    42		CMPQ	BX, $8
    43		JB	small
    44	
    45		// 64 bytes at a time using xmm registers
    46	hugeloop:
    47		CMPQ	BX, $64
    48		JB	bigloop
    49		MOVOU	(SI), X0
    50		MOVOU	(DI), X1
    51		MOVOU	16(SI), X2
    52		MOVOU	16(DI), X3
    53		MOVOU	32(SI), X4
    54		MOVOU	32(DI), X5
    55		MOVOU	48(SI), X6
    56		MOVOU	48(DI), X7
    57		PCMPEQB	X1, X0
    58		PCMPEQB	X3, X2
    59		PCMPEQB	X5, X4
    60		PCMPEQB	X7, X6
    61		PAND	X2, X0
    62		PAND	X6, X4
    63		PAND	X4, X0
    64		PMOVMSKB X0, DX
    65		ADDQ	$64, SI
    66		ADDQ	$64, DI
    67		SUBQ	$64, BX
    68		CMPL	DX, $0xffff
    69		JEQ	hugeloop
    70		RET
    71	
    72		// 8 bytes at a time using 64-bit register
    73	bigloop:
    74		CMPQ	BX, $8
    75		JBE	leftover
    76		MOVQ	(SI), CX
    77		MOVQ	(DI), DX
    78		ADDQ	$8, SI
    79		ADDQ	$8, DI
    80		SUBQ	$8, BX
    81		CMPQ	CX, DX
    82		JEQ	bigloop
    83		RET
    84	
    85		// remaining 0-8 bytes
    86	leftover:
    87		ADDQ	BX, SI
    88		ADDQ	BX, DI
    89		MOVQ	-8(SI), CX
    90		MOVQ	-8(DI), DX
    91		CMPQ	CX, DX
    92		SETEQ	AX
    93		RET
    94	
    95	small:
    96		CMPQ	BX, $0
    97		JEQ	equal
    98	
    99		LEAQ	0(BX*8), CX
   100		NEGQ	CX
   101	
   102		CMPB	SI, $0xf8
   103		JA	si_high
   104	
   105		// load at SI won't cross a page boundary.
   106		MOVQ	(SI), SI
   107		JMP	si_finish
   108	si_high:
   109		// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   110		MOVQ	BX, DX
   111		ADDQ	SI, DX
   112		MOVQ	-8(DX), SI
   113		SHRQ	CX, SI
   114	si_finish:
   115	
   116		// same for DI.
   117		CMPB	DI, $0xf8
   118		JA	di_high
   119		MOVQ	(DI), DI
   120		JMP	di_finish
   121	di_high:
   122		MOVQ	BX, DX
   123		ADDQ	DI, DX
   124		MOVQ	-8(DX), DI
   125		SHRQ	CX, DI
   126	di_finish:
   127	
   128		SUBQ	SI, DI
   129		SHLQ	CX, DI
   130	equal:
   131		SETEQ	AX
   132		RET

View as plain text