...

Text file src/internal/bytealg/index_arm64.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	TEXT ·Index(SB),NOSPLIT,$0-56
     9		MOVD	a_base+0(FP), R0
    10		MOVD	a_len+8(FP), R1
    11		MOVD	b_base+24(FP), R2
    12		MOVD	b_len+32(FP), R3
    13		MOVD	$ret+48(FP), R9
    14		B	indexbody<>(SB)
    15	
    16	TEXT ·IndexString(SB),NOSPLIT,$0-40
    17		MOVD	a_base+0(FP), R0
    18		MOVD	a_len+8(FP), R1
    19		MOVD	b_base+16(FP), R2
    20		MOVD	b_len+24(FP), R3
    21		MOVD	$ret+32(FP), R9
    22		B	indexbody<>(SB)
    23	
    24	// input:
    25	//   R0: haystack
    26	//   R1: length of haystack
    27	//   R2: needle
    28	//   R3: length of needle (2 <= len <= 32)
    29	//   R9: address to put result
    30	TEXT indexbody<>(SB),NOSPLIT,$0-56
    31		// main idea is to load 'sep' into separate register(s)
    32		// to avoid repeatedly re-load it again and again
    33		// for sebsequent substring comparisons
    34		SUB	R3, R1, R4
    35		// R4 contains the start of last substring for comparison
    36		ADD	R0, R4, R4
    37		ADD	$1, R0, R8
    38	
    39		CMP	$8, R3
    40		BHI	greater_8
    41		TBZ	$3, R3, len_2_7
    42	len_8:
    43		// R5 contains 8-byte of sep
    44		MOVD	(R2), R5
    45	loop_8:
    46		// R6 contains substring for comparison
    47		CMP	R4, R0
    48		BHI	not_found
    49		MOVD.P	1(R0), R6
    50		CMP	R5, R6
    51		BNE	loop_8
    52		B	found
    53	len_2_7:
    54		TBZ	$2, R3, len_2_3
    55		TBZ	$1, R3, len_4_5
    56		TBZ	$0, R3, len_6
    57	len_7:
    58		// R5 and R6 contain 7-byte of sep
    59		MOVWU	(R2), R5
    60		// 1-byte overlap with R5
    61		MOVWU	3(R2), R6
    62	loop_7:
    63		CMP	R4, R0
    64		BHI	not_found
    65		MOVWU.P	1(R0), R3
    66		CMP	R5, R3
    67		BNE	loop_7
    68		MOVWU	2(R0), R3
    69		CMP	R6, R3
    70		BNE	loop_7
    71		B	found
    72	len_6:
    73		// R5 and R6 contain 6-byte of sep
    74		MOVWU	(R2), R5
    75		MOVHU	4(R2), R6
    76	loop_6:
    77		CMP	R4, R0
    78		BHI	not_found
    79		MOVWU.P	1(R0), R3
    80		CMP	R5, R3
    81		BNE	loop_6
    82		MOVHU	3(R0), R3
    83		CMP	R6, R3
    84		BNE	loop_6
    85		B	found
    86	len_4_5:
    87		TBZ	$0, R3, len_4
    88	len_5:
    89		// R5 and R7 contain 5-byte of sep
    90		MOVWU	(R2), R5
    91		MOVBU	4(R2), R7
    92	loop_5:
    93		CMP	R4, R0
    94		BHI	not_found
    95		MOVWU.P	1(R0), R3
    96		CMP	R5, R3
    97		BNE	loop_5
    98		MOVBU	3(R0), R3
    99		CMP	R7, R3
   100		BNE	loop_5
   101		B	found
   102	len_4:
   103		// R5 contains 4-byte of sep
   104		MOVWU	(R2), R5
   105	loop_4:
   106		CMP	R4, R0
   107		BHI	not_found
   108		MOVWU.P	1(R0), R6
   109		CMP	R5, R6
   110		BNE	loop_4
   111		B	found
   112	len_2_3:
   113		TBZ	$0, R3, len_2
   114	len_3:
   115		// R6 and R7 contain 3-byte of sep
   116		MOVHU	(R2), R6
   117		MOVBU	2(R2), R7
   118	loop_3:
   119		CMP	R4, R0
   120		BHI	not_found
   121		MOVHU.P	1(R0), R3
   122		CMP	R6, R3
   123		BNE	loop_3
   124		MOVBU	1(R0), R3
   125		CMP	R7, R3
   126		BNE	loop_3
   127		B	found
   128	len_2:
   129		// R5 contains 2-byte of sep
   130		MOVHU	(R2), R5
   131	loop_2:
   132		CMP	R4, R0
   133		BHI	not_found
   134		MOVHU.P	1(R0), R6
   135		CMP	R5, R6
   136		BNE	loop_2
   137	found:
   138		SUB	R8, R0, R0
   139		MOVD	R0, (R9)
   140		RET
   141	not_found:
   142		MOVD	$-1, R0
   143		MOVD	R0, (R9)
   144		RET
   145	greater_8:
   146		SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
   147		CMP	$16, R3
   148		BHI	greater_16
   149	len_9_16:
   150		MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
   151		SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
   152		MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
   153	loop_9_16:
   154		// search the first 8 bytes first
   155		CMP	R4, R0
   156		BHI	not_found
   157		MOVD.P	1(R0), R7
   158		CMP	R5, R7
   159		BNE	loop_9_16
   160		MOVD	(R0)(R11), R7
   161		CMP	R6, R7		// compare the last 8 bytes
   162		BNE	loop_9_16
   163		B	found
   164	greater_16:
   165		CMP	$24, R3
   166		BHI	len_25_32
   167	len_17_24:
   168		LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
   169		SUB	$24, R3, R10		// len(sep) - 24
   170		MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
   171	loop_17_24:
   172		// search the first 16 bytes first
   173		CMP	R4, R0
   174		BHI	not_found
   175		MOVD.P	1(R0), R10
   176		CMP	R5, R10
   177		BNE	loop_17_24
   178		MOVD	7(R0), R10
   179		CMP	R6, R10
   180		BNE	loop_17_24
   181		MOVD	(R0)(R11), R10
   182		CMP	R7, R10		// compare the last 8 bytes
   183		BNE	loop_17_24
   184		B	found
   185	len_25_32:
   186		LDP.P	16(R2), (R5, R6)
   187		MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
   188		SUB	$32, R3, R12	// len(sep) - 32
   189		MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
   190	loop_25_32:
   191		// search the first 24 bytes first
   192		CMP	R4, R0
   193		BHI	not_found
   194		MOVD.P	1(R0), R12
   195		CMP	R5, R12
   196		BNE	loop_25_32
   197		MOVD	7(R0), R12
   198		CMP	R6, R12
   199		BNE	loop_25_32
   200		MOVD	15(R0), R12
   201		CMP	R7, R12
   202		BNE	loop_25_32
   203		MOVD	(R0)(R11), R12
   204		CMP	R10, R12	// compare the last 8 bytes
   205		BNE	loop_25_32
   206		B	found

View as plain text