Text file src/pkg/internal/bytealg/equal_amd64p32.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // memequal(a, b unsafe.Pointer, size uintptr) bool
9 TEXT runtime·memequal(SB),NOSPLIT,$0-17
10 MOVL a+0(FP), SI
11 MOVL b+4(FP), DI
12 CMPL SI, DI
13 JEQ eq
14 MOVL size+8(FP), BX
15 CALL memeqbody<>(SB)
16 MOVB AX, ret+16(FP)
17 RET
18 eq:
19 MOVB $1, ret+16(FP)
20 RET
21
22 // memequal_varlen(a, b unsafe.Pointer) bool
23 TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
24 MOVL a+0(FP), SI
25 MOVL b+4(FP), DI
26 CMPL SI, DI
27 JEQ eq
28 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
29 CALL memeqbody<>(SB)
30 MOVB AX, ret+8(FP)
31 RET
32 eq:
33 MOVB $1, ret+8(FP)
34 RET
35
36 // a in SI
37 // b in DI
38 // count in BX
39 TEXT memeqbody<>(SB),NOSPLIT,$0-0
40 XORQ AX, AX
41
42 CMPQ BX, $8
43 JB small
44
45 // 64 bytes at a time using xmm registers
46 hugeloop:
47 CMPQ BX, $64
48 JB bigloop
49 MOVOU (SI), X0
50 MOVOU (DI), X1
51 MOVOU 16(SI), X2
52 MOVOU 16(DI), X3
53 MOVOU 32(SI), X4
54 MOVOU 32(DI), X5
55 MOVOU 48(SI), X6
56 MOVOU 48(DI), X7
57 PCMPEQB X1, X0
58 PCMPEQB X3, X2
59 PCMPEQB X5, X4
60 PCMPEQB X7, X6
61 PAND X2, X0
62 PAND X6, X4
63 PAND X4, X0
64 PMOVMSKB X0, DX
65 ADDQ $64, SI
66 ADDQ $64, DI
67 SUBQ $64, BX
68 CMPL DX, $0xffff
69 JEQ hugeloop
70 RET
71
72 // 8 bytes at a time using 64-bit register
73 bigloop:
74 CMPQ BX, $8
75 JBE leftover
76 MOVQ (SI), CX
77 MOVQ (DI), DX
78 ADDQ $8, SI
79 ADDQ $8, DI
80 SUBQ $8, BX
81 CMPQ CX, DX
82 JEQ bigloop
83 RET
84
85 // remaining 0-8 bytes
86 leftover:
87 ADDQ BX, SI
88 ADDQ BX, DI
89 MOVQ -8(SI), CX
90 MOVQ -8(DI), DX
91 CMPQ CX, DX
92 SETEQ AX
93 RET
94
95 small:
96 CMPQ BX, $0
97 JEQ equal
98
99 LEAQ 0(BX*8), CX
100 NEGQ CX
101
102 CMPB SI, $0xf8
103 JA si_high
104
105 // load at SI won't cross a page boundary.
106 MOVQ (SI), SI
107 JMP si_finish
108 si_high:
109 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
110 MOVQ BX, DX
111 ADDQ SI, DX
112 MOVQ -8(DX), SI
113 SHRQ CX, SI
114 si_finish:
115
116 // same for DI.
117 CMPB DI, $0xf8
118 JA di_high
119 MOVQ (DI), DI
120 JMP di_finish
121 di_high:
122 MOVQ BX, DX
123 ADDQ DI, DX
124 MOVQ -8(DX), DI
125 SHRQ CX, DI
126 di_finish:
127
128 SUBQ SI, DI
129 SHLQ CX, DI
130 equal:
131 SETEQ AX
132 RET
View as plain text