...

Text file src/internal/bytealg/indexbyte_amd64p32.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	TEXT ·IndexByte(SB),NOSPLIT,$0-20
     9		MOVL b_base+0(FP), SI
    10		MOVL b_len+4(FP), BX
    11		MOVB c+12(FP), AL
    12		CALL indexbytebody<>(SB)
    13		MOVL AX, ret+16(FP)
    14		RET
    15	
    16	TEXT ·IndexByteString(SB),NOSPLIT,$0-20
    17		MOVL s_base+0(FP), SI
    18		MOVL s_len+4(FP), BX
    19		MOVB c+8(FP), AL
    20		CALL indexbytebody<>(SB)
    21		MOVL AX, ret+16(FP)
    22		RET
    23	
    24	// input:
    25	//   SI: data
    26	//   BX: data len
    27	//   AL: byte sought
    28	// output:
    29	//   AX
    30	TEXT indexbytebody<>(SB),NOSPLIT,$0
    31		MOVL SI, DI
    32	
    33		CMPL BX, $16
    34		JLT small
    35	
    36		// round up to first 16-byte boundary
    37		TESTL $15, SI
    38		JZ aligned
    39		MOVL SI, CX
    40		ANDL $~15, CX
    41		ADDL $16, CX
    42	
    43		// search the beginning
    44		SUBL SI, CX
    45		REPN; SCASB
    46		JZ success
    47	
    48	// DI is 16-byte aligned; get ready to search using SSE instructions
    49	aligned:
    50		// round down to last 16-byte boundary
    51		MOVL BX, R11
    52		ADDL SI, R11
    53		ANDL $~15, R11
    54	
    55		// shuffle X0 around so that each byte contains c
    56		MOVD AX, X0
    57		PUNPCKLBW X0, X0
    58		PUNPCKLBW X0, X0
    59		PSHUFL $0, X0, X0
    60		JMP condition
    61	
    62	sse:
    63		// move the next 16-byte chunk of the buffer into X1
    64		MOVO (DI), X1
    65		// compare bytes in X0 to X1
    66		PCMPEQB X0, X1
    67		// take the top bit of each byte in X1 and put the result in DX
    68		PMOVMSKB X1, DX
    69		TESTL DX, DX
    70		JNZ ssesuccess
    71		ADDL $16, DI
    72	
    73	condition:
    74		CMPL DI, R11
    75		JNE sse
    76	
    77		// search the end
    78		MOVL SI, CX
    79		ADDL BX, CX
    80		SUBL R11, CX
    81		// if CX == 0, the zero flag will be set and we'll end up
    82		// returning a false success
    83		JZ failure
    84		REPN; SCASB
    85		JZ success
    86	
    87	failure:
    88		MOVL $-1, AX
    89		RET
    90	
    91	// handle for lengths < 16
    92	small:
    93		MOVL BX, CX
    94		REPN; SCASB
    95		JZ success
    96		MOVL $-1, AX
    97		RET
    98	
    99	// we've found the chunk containing the byte
   100	// now just figure out which specific byte it is
   101	ssesuccess:
   102		// get the index of the least significant set bit
   103		BSFW DX, DX
   104		SUBL SI, DI
   105		ADDL DI, DX
   106		MOVL DX, AX
   107		RET
   108	
   109	success:
   110		SUBL SI, DI
   111		SUBL $1, DI
   112		MOVL DI, AX
   113		RET

View as plain text