Text file src/internal/bytealg/compare_amd64p32.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Compare(SB),NOSPLIT,$0-28
9 MOVL a_base+0(FP), SI
10 MOVL a_len+4(FP), BX
11 MOVL b_base+12(FP), DI
12 MOVL b_len+16(FP), DX
13 CALL cmpbody<>(SB)
14 MOVL AX, ret+24(FP)
15 RET
16
17 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
18 MOVL a_base+0(FP), SI
19 MOVL a_len+4(FP), BX
20 MOVL b_base+8(FP), DI
21 MOVL b_len+12(FP), DX
22 CALL cmpbody<>(SB)
23 MOVL AX, ret+16(FP)
24 RET
25
26 // input:
27 // SI = a
28 // DI = b
29 // BX = alen
30 // DX = blen
31 // output:
32 // AX = 1/0/-1
33 TEXT cmpbody<>(SB),NOSPLIT,$0-0
34 CMPQ SI, DI
35 JEQ allsame
36 CMPQ BX, DX
37 MOVQ DX, R8
38 CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
39 CMPQ R8, $8
40 JB small
41
42 loop:
43 CMPQ R8, $16
44 JBE _0through16
45 MOVOU (SI), X0
46 MOVOU (DI), X1
47 PCMPEQB X0, X1
48 PMOVMSKB X1, AX
49 XORQ $0xffff, AX // convert EQ to NE
50 JNE diff16 // branch if at least one byte is not equal
51 ADDQ $16, SI
52 ADDQ $16, DI
53 SUBQ $16, R8
54 JMP loop
55
56 // AX = bit mask of differences
57 diff16:
58 BSFQ AX, BX // index of first byte that differs
59 XORQ AX, AX
60 ADDQ BX, SI
61 MOVB (SI), CX
62 ADDQ BX, DI
63 CMPB CX, (DI)
64 SETHI AX
65 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
66 RET
67
68 // 0 through 16 bytes left, alen>=8, blen>=8
69 _0through16:
70 CMPQ R8, $8
71 JBE _0through8
72 MOVQ (SI), AX
73 MOVQ (DI), CX
74 CMPQ AX, CX
75 JNE diff8
76 _0through8:
77 ADDQ R8, SI
78 ADDQ R8, DI
79 MOVQ -8(SI), AX
80 MOVQ -8(DI), CX
81 CMPQ AX, CX
82 JEQ allsame
83
84 // AX and CX contain parts of a and b that differ.
85 diff8:
86 BSWAPQ AX // reverse order of bytes
87 BSWAPQ CX
88 XORQ AX, CX
89 BSRQ CX, CX // index of highest bit difference
90 SHRQ CX, AX // move a's bit to bottom
91 ANDQ $1, AX // mask bit
92 LEAQ -1(AX*2), AX // 1/0 => +1/-1
93 RET
94
95 // 0-7 bytes in common
96 small:
97 LEAQ (R8*8), CX // bytes left -> bits left
98 NEGQ CX // - bits lift (== 64 - bits left mod 64)
99 JEQ allsame
100
101 // load bytes of a into high bytes of AX
102 CMPB SI, $0xf8
103 JA si_high
104 MOVQ (SI), SI
105 JMP si_finish
106 si_high:
107 ADDQ R8, SI
108 MOVQ -8(SI), SI
109 SHRQ CX, SI
110 si_finish:
111 SHLQ CX, SI
112
113 // load bytes of b in to high bytes of BX
114 CMPB DI, $0xf8
115 JA di_high
116 MOVQ (DI), DI
117 JMP di_finish
118 di_high:
119 ADDQ R8, DI
120 MOVQ -8(DI), DI
121 SHRQ CX, DI
122 di_finish:
123 SHLQ CX, DI
124
125 BSWAPQ SI // reverse order of bytes
126 BSWAPQ DI
127 XORQ SI, DI // find bit differences
128 JEQ allsame
129 BSRQ DI, CX // index of highest bit difference
130 SHRQ CX, SI // move a's bit to bottom
131 ANDQ $1, SI // mask bit
132 LEAQ -1(SI*2), AX // 1/0 => +1/-1
133 RET
134
135 allsame:
136 XORQ AX, AX
137 XORQ CX, CX
138 CMPQ BX, DX
139 SETGT AX // 1 if alen > blen
140 SETEQ CX // 1 if alen == blen
141 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
142 RET
View as plain text