...

Text file src/pkg/internal/bytealg/index_s390x.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "textflag.h"
     7	
     8	// Caller must confirm availability of vx facility before calling.
     9	TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
    10		LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    11		LMG	b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    12		MOVD	$ret+48(FP), R5
    13		BR	indexbody<>(SB)
    14	
    15	// Caller must confirm availability of vx facility before calling.
    16	TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
    17		LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    18		LMG	b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    19		MOVD	$ret+32(FP), R5
    20		BR	indexbody<>(SB)
    21	
    22	// s: string we are searching
    23	// sep: string to search for
    24	// R1=&s[0], R2=len(s)
    25	// R3=&sep[0], R4=len(sep)
    26	// R5=&ret (int)
    27	// Caller must confirm availability of vx facility before calling.
    28	TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
    29		CMPBGT	R4, R2, notfound
    30		ADD	R1, R2
    31		SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
    32		CMPBEQ	R4, $0, notfound
    33		SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
    34		VLL	R4, (R3), V0 // contains first 16 bytes of sep
    35		MOVD	R1, R7
    36	index2plus:
    37		CMPBNE	R4, $1, index3plus
    38		MOVD	$15(R7), R9
    39		CMPBGE	R9, R2, index2to16
    40		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    41		VONE	V16
    42		VREPH	$0, V0, V1
    43		CMPBGE	R9, R2, index2to16
    44	index2loop:
    45		VL	0(R7), V2          // 16 bytes, even indices
    46		VL	1(R7), V4          // 16 bytes, odd indices
    47		VCEQH	V1, V2, V5         // compare even indices
    48		VCEQH	V1, V4, V6         // compare odd indices
    49		VSEL	V5, V6, V31, V7    // merge even and odd indices
    50		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    51		BLT	foundV17
    52		MOVD	$16(R7), R7        // R7+=16
    53		ADD	$15, R7, R9
    54		CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
    55		CMPBLE	R7, R2, index2to16
    56		BR	notfound
    57	
    58	index3plus:
    59		CMPBNE	R4, $2, index4plus
    60		ADD	$15, R7, R9
    61		CMPBGE	R9, R2, index2to16
    62		MOVD	$1, R0
    63		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    64		VONE	V16
    65		VREPH	$0, V0, V1
    66		VREPB	$2, V0, V8
    67	index3loop:
    68		VL	(R7), V2           // load 16-bytes into V2
    69		VLL	R0, 16(R7), V3     // load 2-bytes into V3
    70		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    71		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
    72		VCEQH	V1, V2, V5         // compare 2-byte even indices
    73		VCEQH	V1, V4, V6         // compare 2-byte odd indices
    74		VCEQB	V8, V9, V10        // compare last bytes
    75		VSEL	V5, V6, V31, V7    // merge even and odd indices
    76		VN	V7, V10, V7        // AND indices with last byte
    77		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    78		BLT	foundV17
    79		MOVD	$16(R7), R7        // R7+=16
    80		ADD	$15, R7, R9
    81		CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
    82		CMPBLE	R7, R2, index2to16
    83		BR	notfound
    84	
    85	index4plus:
    86		CMPBNE	R4, $3, index5plus
    87		ADD	$15, R7, R9
    88		CMPBGE	R9, R2, index2to16
    89		MOVD	$2, R0
    90		VGBM	$0x8888, V29       // 0xff000000ff000000...
    91		VGBM	$0x2222, V30       // 0x0000ff000000ff00...
    92		VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
    93		VONE	V16
    94		VREPF	$0, V0, V1
    95	index4loop:
    96		VL	(R7), V2           // load 16-bytes into V2
    97		VLL	R0, 16(R7), V3     // load 3-bytes into V3
    98		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    99		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
   100		VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
   101		VCEQF	V1, V2, V5         // compare index 0, 4, ...
   102		VCEQF	V1, V4, V6         // compare index 1, 5, ...
   103		VCEQF	V1, V9, V11        // compare index 2, 6, ...
   104		VCEQF	V1, V10, V12       // compare index 3, 7, ...
   105		VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
   106		VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
   107		VSEL	V13, V14, V31, V7  // final merge
   108		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   109		BLT	foundV17
   110		MOVD	$16(R7), R7        // R7+=16
   111		ADD	$15, R7, R9
   112		CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
   113		CMPBLE	R7, R2, index2to16
   114		BR	notfound
   115	
   116	index5plus:
   117		CMPBGT	R4, $15, index17plus
   118	index2to16:
   119		CMPBGT	R7, R2, notfound
   120		MOVD	$1(R7), R8
   121		CMPBGT	R8, R2, index2to16tail
   122	index2to16loop:
   123		// unrolled 2x
   124		VLL	R4, (R7), V1
   125		VLL	R4, 1(R7), V2
   126		VCEQGS	V0, V1, V3
   127		BEQ	found
   128		MOVD	$1(R7), R7
   129		VCEQGS	V0, V2, V4
   130		BEQ	found
   131		MOVD	$1(R7), R7
   132		CMPBLT	R7, R2, index2to16loop
   133		CMPBGT	R7, R2, notfound
   134	index2to16tail:
   135		VLL	R4, (R7), V1
   136		VCEQGS	V0, V1, V2
   137		BEQ	found
   138		BR	notfound
   139	
   140	index17plus:
   141		CMPBGT	R4, $31, index33plus
   142		SUB	$16, R4, R0
   143		VLL	R0, 16(R3), V1
   144		VONE	V7
   145	index17to32loop:
   146		VL	(R7), V2
   147		VLL	R0, 16(R7), V3
   148		VCEQG	V0, V2, V4
   149		VCEQG	V1, V3, V5
   150		VN	V4, V5, V6
   151		VCEQGS	V6, V7, V8
   152		BEQ	found
   153		MOVD	$1(R7), R7
   154		CMPBLE  R7, R2, index17to32loop
   155		BR	notfound
   156	
   157	index33plus:
   158		CMPBGT	R4, $47, index49plus
   159		SUB	$32, R4, R0
   160		VL	16(R3), V1
   161		VLL	R0, 32(R3), V2
   162		VONE	V11
   163	index33to48loop:
   164		VL	(R7), V3
   165		VL	16(R7), V4
   166		VLL	R0, 32(R7), V5
   167		VCEQG	V0, V3, V6
   168		VCEQG	V1, V4, V7
   169		VCEQG	V2, V5, V8
   170		VN	V6, V7, V9
   171		VN	V8, V9, V10
   172		VCEQGS	V10, V11, V12
   173		BEQ	found
   174		MOVD	$1(R7), R7
   175		CMPBLE  R7, R2, index33to48loop
   176		BR	notfound
   177	
   178	index49plus:
   179		CMPBGT	R4, $63, index65plus
   180		SUB	$48, R4, R0
   181		VL	16(R3), V1
   182		VL	32(R3), V2
   183		VLL	R0, 48(R3), V3
   184		VONE	V15
   185	index49to64loop:
   186		VL	(R7), V4
   187		VL	16(R7), V5
   188		VL	32(R7), V6
   189		VLL	R0, 48(R7), V7
   190		VCEQG	V0, V4, V8
   191		VCEQG	V1, V5, V9
   192		VCEQG	V2, V6, V10
   193		VCEQG	V3, V7, V11
   194		VN	V8, V9, V12
   195		VN	V10, V11, V13
   196		VN	V12, V13, V14
   197		VCEQGS	V14, V15, V16
   198		BEQ	found
   199		MOVD	$1(R7), R7
   200		CMPBLE  R7, R2, index49to64loop
   201	notfound:
   202		MOVD	$-1, (R5)
   203		RET
   204	
   205	index65plus:
   206		// not implemented
   207		MOVD	$0, (R0)
   208		RET
   209	
   210	foundV17: // index is in doubleword V17[0]
   211		VLGVG	$0, V17, R8
   212		ADD	R8, R7
   213	found:
   214		SUB	R1, R7
   215		MOVD	R7, (R5)
   216		RET

View as plain text