...

Text file src/pkg/internal/bytealg/count_ppc64x.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64le ppc64
     6	
     7	#include "go_asm.h"
     8	#include "textflag.h"
     9	
    10	TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
    11		MOVD  b_base+0(FP), R3    // R3 = byte array pointer
    12		MOVD  b_len+8(FP), R4     // R4 = length
    13		MOVBZ c+24(FP), R5        // R5 = byte
    14		MOVD  $ret+32(FP), R14    // R14 = &ret
    15		BR    countbytebody<>(SB)
    16	
    17	TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
    18		MOVD  s_base+0(FP), R3    // R3 = string
    19		MOVD  s_len+8(FP), R4     // R4 = length
    20		MOVBZ c+16(FP), R5        // R5 = byte
    21		MOVD  $ret+24(FP), R14    // R14 = &ret
    22		BR    countbytebody<>(SB)
    23	
    24	// R3: addr of string
    25	// R4: len of string
    26	// R5: byte to count
    27	// R14: addr for return value
    28	// endianness shouldn't matter since we are just counting and order
    29	// is irrelevant
    30	TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
    31		DCBT (R3)    // Prepare cache line.
    32		MOVD R0, R18 // byte count
    33		MOVD R3, R19 // Save base address for calculating the index later.
    34		MOVD R4, R16
    35	
    36		MOVD   R5, R6
    37		RLDIMI $8, R6, $48, R6
    38		RLDIMI $16, R6, $32, R6
    39		RLDIMI $32, R6, $0, R6  // fill reg with the byte to count
    40	
    41		VSPLTISW $3, V4     // used for shift
    42		MTVRD    R6, V1     // move compare byte
    43		VSPLTB   $7, V1, V1 // replicate byte across V1
    44	
    45		CMPU   R4, $32          // Check if it's a small string (<32 bytes)
    46		BLT    tail             // Jump to the small string case
    47		XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator
    48	
    49	cmploop:
    50		LXVW4X (R3), VS32 // load bytes from string
    51	
    52		// when the bytes match, the corresonding byte contains all 1s
    53		VCMPEQUB V1, V0, V2     // compare bytes
    54		VPOPCNTD V2, V3         // each double word contains its count
    55		VADDUDM  V3, V5, V5     // accumulate bit count in each double word
    56		ADD      $16, R3, R3    // increment pointer
    57		SUB      $16, R16, R16  // remaining bytes
    58		CMP      R16, $16       // at least 16 remaining?
    59		BGE      cmploop
    60		VSRD     V5, V4, V5     // shift by 3 to convert bits to bytes
    61		VSLDOI   $8, V5, V5, V6 // get the double word values from vector
    62		MFVSRD   V5, R9
    63		MFVSRD   V6, R10
    64		ADD      R9, R10, R9
    65		ADD      R9, R18, R18
    66	
    67	tail:
    68		CMP R16, $8 // 8 bytes left?
    69		BLT small
    70	
    71		MOVD    (R3), R12     // load 8 bytes
    72		CMPB    R12, R6, R17  // compare bytes
    73		POPCNTD R17, R15      // bit count
    74		SRD     $3, R15, R15  // byte count
    75		ADD     R15, R18, R18 // add to byte count
    76	
    77	next1:
    78		ADD $8, R3, R3
    79		SUB $8, R16, R16 // remaining bytes
    80		BR  tail
    81	
    82	small:
    83		CMP   $0, R16   // any remaining
    84		BEQ   done
    85		MOVBZ (R3), R12 // check each remaining byte
    86		CMP   R12, R5
    87		BNE   next2
    88		ADD   $1, R18
    89	
    90	next2:
    91		SUB $1, R16
    92		ADD $1, R3  // inc address
    93		BR  small
    94	
    95	done:
    96		MOVD R18, (R14) // return count
    97		RET

View as plain text