...

Text file src/internal/bytealg/equal_arm64.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	// memequal(a, b unsafe.Pointer, size uintptr) bool
     9	TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
    10		MOVD	size+16(FP), R1
    11		// short path to handle 0-byte case
    12		CBZ	R1, equal
    13		MOVD	a+0(FP), R0
    14		MOVD	b+8(FP), R2
    15		MOVD	$ret+24(FP), R8
    16		B	memeqbody<>(SB)
    17	equal:
    18		MOVD	$1, R0
    19		MOVB	R0, ret+24(FP)
    20		RET
    21	
    22	// memequal_varlen(a, b unsafe.Pointer) bool
    23	TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
    24		MOVD	a+0(FP), R3
    25		MOVD	b+8(FP), R4
    26		CMP	R3, R4
    27		BEQ	eq
    28		MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
    29		CBZ	R5, eq
    30		MOVD	R3, 8(RSP)
    31		MOVD	R4, 16(RSP)
    32		MOVD	R5, 24(RSP)
    33		BL	runtime·memequal(SB)
    34		MOVBU	32(RSP), R3
    35		MOVB	R3, ret+16(FP)
    36		RET
    37	eq:
    38		MOVD	$1, R3
    39		MOVB	R3, ret+16(FP)
    40		RET
    41	
    42	// input:
    43	// R0: pointer a
    44	// R1: data len
    45	// R2: pointer b
    46	// R8: address to put result
    47	TEXT memeqbody<>(SB),NOSPLIT,$0
    48		CMP	$1, R1
    49		// handle 1-byte special case for better performance
    50		BEQ	one
    51		CMP	$16, R1
    52		// handle specially if length < 16
    53		BLO	tail
    54		BIC	$0x3f, R1, R3
    55		CBZ	R3, chunk16
    56		// work with 64-byte chunks
    57		ADD	R3, R0, R6	// end of chunks
    58	chunk64_loop:
    59		VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    60		VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
    61		VCMEQ	V0.D2, V4.D2, V8.D2
    62		VCMEQ	V1.D2, V5.D2, V9.D2
    63		VCMEQ	V2.D2, V6.D2, V10.D2
    64		VCMEQ	V3.D2, V7.D2, V11.D2
    65		VAND	V8.B16, V9.B16, V8.B16
    66		VAND	V8.B16, V10.B16, V8.B16
    67		VAND	V8.B16, V11.B16, V8.B16
    68		CMP	R0, R6
    69		VMOV	V8.D[0], R4
    70		VMOV	V8.D[1], R5
    71		CBZ	R4, not_equal
    72		CBZ	R5, not_equal
    73		BNE	chunk64_loop
    74		AND	$0x3f, R1, R1
    75		CBZ	R1, equal
    76	chunk16:
    77		// work with 16-byte chunks
    78		BIC	$0xf, R1, R3
    79		CBZ	R3, tail
    80		ADD	R3, R0, R6	// end of chunks
    81	chunk16_loop:
    82		LDP.P	16(R0), (R4, R5)
    83		LDP.P	16(R2), (R7, R9)
    84		EOR	R4, R7
    85		CBNZ	R7, not_equal
    86		EOR	R5, R9
    87		CBNZ	R9, not_equal
    88		CMP	R0, R6
    89		BNE	chunk16_loop
    90		AND	$0xf, R1, R1
    91		CBZ	R1, equal
    92	tail:
    93		// special compare of tail with length < 16
    94		TBZ	$3, R1, lt_8
    95		MOVD	(R0), R4
    96		MOVD	(R2), R5
    97		EOR	R4, R5
    98		CBNZ	R5, not_equal
    99		SUB	$8, R1, R6	// offset of the last 8 bytes
   100		MOVD	(R0)(R6), R4
   101		MOVD	(R2)(R6), R5
   102		EOR	R4, R5
   103		CBNZ	R5, not_equal
   104		B	equal
   105	lt_8:
   106		TBZ	$2, R1, lt_4
   107		MOVWU	(R0), R4
   108		MOVWU	(R2), R5
   109		EOR	R4, R5
   110		CBNZ	R5, not_equal
   111		SUB	$4, R1, R6	// offset of the last 4 bytes
   112		MOVWU	(R0)(R6), R4
   113		MOVWU	(R2)(R6), R5
   114		EOR	R4, R5
   115		CBNZ	R5, not_equal
   116		B	equal
   117	lt_4:
   118		TBZ	$1, R1, lt_2
   119		MOVHU.P	2(R0), R4
   120		MOVHU.P	2(R2), R5
   121		CMP	R4, R5
   122		BNE	not_equal
   123	lt_2:
   124		TBZ	$0, R1, equal
   125	one:
   126		MOVBU	(R0), R4
   127		MOVBU	(R2), R5
   128		CMP	R4, R5
   129		BNE	not_equal
   130	equal:
   131		MOVD	$1, R0
   132		MOVB	R0, (R8)
   133		RET
   134	not_equal:
   135		MOVB	ZR, (R8)
   136		RET

View as plain text