Text file src/pkg/internal/bytealg/indexbyte_amd64p32.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·IndexByte(SB),NOSPLIT,$0-20
9 MOVL b_base+0(FP), SI
10 MOVL b_len+4(FP), BX
11 MOVB c+12(FP), AL
12 CALL indexbytebody<>(SB)
13 MOVL AX, ret+16(FP)
14 RET
15
16 TEXT ·IndexByteString(SB),NOSPLIT,$0-20
17 MOVL s_base+0(FP), SI
18 MOVL s_len+4(FP), BX
19 MOVB c+8(FP), AL
20 CALL indexbytebody<>(SB)
21 MOVL AX, ret+16(FP)
22 RET
23
24 // input:
25 // SI: data
26 // BX: data len
27 // AL: byte sought
28 // output:
29 // AX
30 TEXT indexbytebody<>(SB),NOSPLIT,$0
31 MOVL SI, DI
32
33 CMPL BX, $16
34 JLT small
35
36 // round up to first 16-byte boundary
37 TESTL $15, SI
38 JZ aligned
39 MOVL SI, CX
40 ANDL $~15, CX
41 ADDL $16, CX
42
43 // search the beginning
44 SUBL SI, CX
45 REPN; SCASB
46 JZ success
47
48 // DI is 16-byte aligned; get ready to search using SSE instructions
49 aligned:
50 // round down to last 16-byte boundary
51 MOVL BX, R11
52 ADDL SI, R11
53 ANDL $~15, R11
54
55 // shuffle X0 around so that each byte contains c
56 MOVD AX, X0
57 PUNPCKLBW X0, X0
58 PUNPCKLBW X0, X0
59 PSHUFL $0, X0, X0
60 JMP condition
61
62 sse:
63 // move the next 16-byte chunk of the buffer into X1
64 MOVO (DI), X1
65 // compare bytes in X0 to X1
66 PCMPEQB X0, X1
67 // take the top bit of each byte in X1 and put the result in DX
68 PMOVMSKB X1, DX
69 TESTL DX, DX
70 JNZ ssesuccess
71 ADDL $16, DI
72
73 condition:
74 CMPL DI, R11
75 JNE sse
76
77 // search the end
78 MOVL SI, CX
79 ADDL BX, CX
80 SUBL R11, CX
81 // if CX == 0, the zero flag will be set and we'll end up
82 // returning a false success
83 JZ failure
84 REPN; SCASB
85 JZ success
86
87 failure:
88 MOVL $-1, AX
89 RET
90
91 // handle for lengths < 16
92 small:
93 MOVL BX, CX
94 REPN; SCASB
95 JZ success
96 MOVL $-1, AX
97 RET
98
99 // we've found the chunk containing the byte
100 // now just figure out which specific byte it is
101 ssesuccess:
102 // get the index of the least significant set bit
103 BSFW DX, DX
104 SUBL SI, DI
105 ADDL DI, DX
106 MOVL DX, AX
107 RET
108
109 success:
110 SUBL SI, DI
111 SUBL $1, DI
112 MOVL DI, AX
113 RET
View as plain text