Text file src/internal/bytealg/equal_arm64.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // memequal(a, b unsafe.Pointer, size uintptr) bool
9 TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
10 MOVD size+16(FP), R1
11 // short path to handle 0-byte case
12 CBZ R1, equal
13 MOVD a+0(FP), R0
14 MOVD b+8(FP), R2
15 MOVD $ret+24(FP), R8
16 B memeqbody<>(SB)
17 equal:
18 MOVD $1, R0
19 MOVB R0, ret+24(FP)
20 RET
21
22 // memequal_varlen(a, b unsafe.Pointer) bool
23 TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
24 MOVD a+0(FP), R3
25 MOVD b+8(FP), R4
26 CMP R3, R4
27 BEQ eq
28 MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure
29 CBZ R5, eq
30 MOVD R3, 8(RSP)
31 MOVD R4, 16(RSP)
32 MOVD R5, 24(RSP)
33 BL runtime·memequal(SB)
34 MOVBU 32(RSP), R3
35 MOVB R3, ret+16(FP)
36 RET
37 eq:
38 MOVD $1, R3
39 MOVB R3, ret+16(FP)
40 RET
41
42 // input:
43 // R0: pointer a
44 // R1: data len
45 // R2: pointer b
46 // R8: address to put result
47 TEXT memeqbody<>(SB),NOSPLIT,$0
48 CMP $1, R1
49 // handle 1-byte special case for better performance
50 BEQ one
51 CMP $16, R1
52 // handle specially if length < 16
53 BLO tail
54 BIC $0x3f, R1, R3
55 CBZ R3, chunk16
56 // work with 64-byte chunks
57 ADD R3, R0, R6 // end of chunks
58 chunk64_loop:
59 VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
60 VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2]
61 VCMEQ V0.D2, V4.D2, V8.D2
62 VCMEQ V1.D2, V5.D2, V9.D2
63 VCMEQ V2.D2, V6.D2, V10.D2
64 VCMEQ V3.D2, V7.D2, V11.D2
65 VAND V8.B16, V9.B16, V8.B16
66 VAND V8.B16, V10.B16, V8.B16
67 VAND V8.B16, V11.B16, V8.B16
68 CMP R0, R6
69 VMOV V8.D[0], R4
70 VMOV V8.D[1], R5
71 CBZ R4, not_equal
72 CBZ R5, not_equal
73 BNE chunk64_loop
74 AND $0x3f, R1, R1
75 CBZ R1, equal
76 chunk16:
77 // work with 16-byte chunks
78 BIC $0xf, R1, R3
79 CBZ R3, tail
80 ADD R3, R0, R6 // end of chunks
81 chunk16_loop:
82 LDP.P 16(R0), (R4, R5)
83 LDP.P 16(R2), (R7, R9)
84 EOR R4, R7
85 CBNZ R7, not_equal
86 EOR R5, R9
87 CBNZ R9, not_equal
88 CMP R0, R6
89 BNE chunk16_loop
90 AND $0xf, R1, R1
91 CBZ R1, equal
92 tail:
93 // special compare of tail with length < 16
94 TBZ $3, R1, lt_8
95 MOVD (R0), R4
96 MOVD (R2), R5
97 EOR R4, R5
98 CBNZ R5, not_equal
99 SUB $8, R1, R6 // offset of the last 8 bytes
100 MOVD (R0)(R6), R4
101 MOVD (R2)(R6), R5
102 EOR R4, R5
103 CBNZ R5, not_equal
104 B equal
105 lt_8:
106 TBZ $2, R1, lt_4
107 MOVWU (R0), R4
108 MOVWU (R2), R5
109 EOR R4, R5
110 CBNZ R5, not_equal
111 SUB $4, R1, R6 // offset of the last 4 bytes
112 MOVWU (R0)(R6), R4
113 MOVWU (R2)(R6), R5
114 EOR R4, R5
115 CBNZ R5, not_equal
116 B equal
117 lt_4:
118 TBZ $1, R1, lt_2
119 MOVHU.P 2(R0), R4
120 MOVHU.P 2(R2), R5
121 CMP R4, R5
122 BNE not_equal
123 lt_2:
124 TBZ $0, R1, equal
125 one:
126 MOVBU (R0), R4
127 MOVBU (R2), R5
128 CMP R4, R5
129 BNE not_equal
130 equal:
131 MOVD $1, R0
132 MOVB R0, (R8)
133 RET
134 not_equal:
135 MOVB ZR, (R8)
136 RET
View as plain text