...

Text file src/internal/bytealg/equal_386.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	// memequal(a, b unsafe.Pointer, size uintptr) bool
     9	TEXT runtime·memequal(SB),NOSPLIT,$0-13
    10		MOVL	a+0(FP), SI
    11		MOVL	b+4(FP), DI
    12		CMPL	SI, DI
    13		JEQ	eq
    14		MOVL	size+8(FP), BX
    15		LEAL	ret+12(FP), AX
    16		JMP	memeqbody<>(SB)
    17	eq:
    18		MOVB    $1, ret+12(FP)
    19		RET
    20	
    21	// memequal_varlen(a, b unsafe.Pointer) bool
    22	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
    23		MOVL    a+0(FP), SI
    24		MOVL    b+4(FP), DI
    25		CMPL    SI, DI
    26		JEQ     eq
    27		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
    28		LEAL	ret+8(FP), AX
    29		JMP	memeqbody<>(SB)
    30	eq:
    31		MOVB    $1, ret+8(FP)
    32		RET
    33	
    34	// a in SI
    35	// b in DI
    36	// count in BX
    37	// address of result byte in AX
    38	TEXT memeqbody<>(SB),NOSPLIT,$0-0
    39		CMPL	BX, $4
    40		JB	small
    41	
    42		// 64 bytes at a time using xmm registers
    43	hugeloop:
    44		CMPL	BX, $64
    45		JB	bigloop
    46		CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
    47		JNE	bigloop
    48		MOVOU	(SI), X0
    49		MOVOU	(DI), X1
    50		MOVOU	16(SI), X2
    51		MOVOU	16(DI), X3
    52		MOVOU	32(SI), X4
    53		MOVOU	32(DI), X5
    54		MOVOU	48(SI), X6
    55		MOVOU	48(DI), X7
    56		PCMPEQB	X1, X0
    57		PCMPEQB	X3, X2
    58		PCMPEQB	X5, X4
    59		PCMPEQB	X7, X6
    60		PAND	X2, X0
    61		PAND	X6, X4
    62		PAND	X4, X0
    63		PMOVMSKB X0, DX
    64		ADDL	$64, SI
    65		ADDL	$64, DI
    66		SUBL	$64, BX
    67		CMPL	DX, $0xffff
    68		JEQ	hugeloop
    69		MOVB	$0, (AX)
    70		RET
    71	
    72		// 4 bytes at a time using 32-bit register
    73	bigloop:
    74		CMPL	BX, $4
    75		JBE	leftover
    76		MOVL	(SI), CX
    77		MOVL	(DI), DX
    78		ADDL	$4, SI
    79		ADDL	$4, DI
    80		SUBL	$4, BX
    81		CMPL	CX, DX
    82		JEQ	bigloop
    83		MOVB	$0, (AX)
    84		RET
    85	
    86		// remaining 0-4 bytes
    87	leftover:
    88		MOVL	-4(SI)(BX*1), CX
    89		MOVL	-4(DI)(BX*1), DX
    90		CMPL	CX, DX
    91		SETEQ	(AX)
    92		RET
    93	
    94	small:
    95		CMPL	BX, $0
    96		JEQ	equal
    97	
    98		LEAL	0(BX*8), CX
    99		NEGL	CX
   100	
   101		MOVL	SI, DX
   102		CMPB	DX, $0xfc
   103		JA	si_high
   104	
   105		// load at SI won't cross a page boundary.
   106		MOVL	(SI), SI
   107		JMP	si_finish
   108	si_high:
   109		// address ends in 111111xx. Load up to bytes we want, move to correct position.
   110		MOVL	-4(SI)(BX*1), SI
   111		SHRL	CX, SI
   112	si_finish:
   113	
   114		// same for DI.
   115		MOVL	DI, DX
   116		CMPB	DX, $0xfc
   117		JA	di_high
   118		MOVL	(DI), DI
   119		JMP	di_finish
   120	di_high:
   121		MOVL	-4(DI)(BX*1), DI
   122		SHRL	CX, DI
   123	di_finish:
   124	
   125		SUBL	SI, DI
   126		SHLL	CX, DI
   127	equal:
   128		SETEQ	(AX)
   129		RET

View as plain text