Text file src/pkg/internal/bytealg/count_ppc64x.s
1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // +build ppc64le ppc64
6
7 #include "go_asm.h"
8 #include "textflag.h"
9
10 TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
11 MOVD b_base+0(FP), R3 // R3 = byte array pointer
12 MOVD b_len+8(FP), R4 // R4 = length
13 MOVBZ c+24(FP), R5 // R5 = byte
14 MOVD $ret+32(FP), R14 // R14 = &ret
15 BR countbytebody<>(SB)
16
17 TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
18 MOVD s_base+0(FP), R3 // R3 = string
19 MOVD s_len+8(FP), R4 // R4 = length
20 MOVBZ c+16(FP), R5 // R5 = byte
21 MOVD $ret+24(FP), R14 // R14 = &ret
22 BR countbytebody<>(SB)
23
24 // R3: addr of string
25 // R4: len of string
26 // R5: byte to count
27 // R14: addr for return value
28 // endianness shouldn't matter since we are just counting and order
29 // is irrelevant
30 TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
31 DCBT (R3) // Prepare cache line.
32 MOVD R0, R18 // byte count
33 MOVD R3, R19 // Save base address for calculating the index later.
34 MOVD R4, R16
35
36 MOVD R5, R6
37 RLDIMI $8, R6, $48, R6
38 RLDIMI $16, R6, $32, R6
39 RLDIMI $32, R6, $0, R6 // fill reg with the byte to count
40
41 VSPLTISW $3, V4 // used for shift
42 MTVRD R6, V1 // move compare byte
43 VSPLTB $7, V1, V1 // replicate byte across V1
44
45 CMPU R4, $32 // Check if it's a small string (<32 bytes)
46 BLT tail // Jump to the small string case
47 XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator
48
49 cmploop:
50 LXVW4X (R3), VS32 // load bytes from string
51
52 // when the bytes match, the corresonding byte contains all 1s
53 VCMPEQUB V1, V0, V2 // compare bytes
54 VPOPCNTD V2, V3 // each double word contains its count
55 VADDUDM V3, V5, V5 // accumulate bit count in each double word
56 ADD $16, R3, R3 // increment pointer
57 SUB $16, R16, R16 // remaining bytes
58 CMP R16, $16 // at least 16 remaining?
59 BGE cmploop
60 VSRD V5, V4, V5 // shift by 3 to convert bits to bytes
61 VSLDOI $8, V5, V5, V6 // get the double word values from vector
62 MFVSRD V5, R9
63 MFVSRD V6, R10
64 ADD R9, R10, R9
65 ADD R9, R18, R18
66
67 tail:
68 CMP R16, $8 // 8 bytes left?
69 BLT small
70
71 MOVD (R3), R12 // load 8 bytes
72 CMPB R12, R6, R17 // compare bytes
73 POPCNTD R17, R15 // bit count
74 SRD $3, R15, R15 // byte count
75 ADD R15, R18, R18 // add to byte count
76
77 next1:
78 ADD $8, R3, R3
79 SUB $8, R16, R16 // remaining bytes
80 BR tail
81
82 small:
83 CMP $0, R16 // any remaining
84 BEQ done
85 MOVBZ (R3), R12 // check each remaining byte
86 CMP R12, R5
87 BNE next2
88 ADD $1, R18
89
90 next2:
91 SUB $1, R16
92 ADD $1, R3 // inc address
93 BR small
94
95 done:
96 MOVD R18, (R14) // return count
97 RET
View as plain text