...

Text file src/pkg/internal/bytealg/compare_ppc64x.s

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64 ppc64le
     6	
     7	#include "go_asm.h"
     8	#include "textflag.h"
     9	
    10	TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
    11		MOVD	a_base+0(FP), R5
    12		MOVD	b_base+24(FP), R6
    13		MOVD	a_len+8(FP), R3
    14		CMP	R5,R6,CR7
    15		MOVD	b_len+32(FP), R4
    16		MOVD	$ret+48(FP), R7
    17		CMP	R3,R4,CR6
    18		BEQ	CR7,equal
    19	
    20	#ifdef	GOARCH_ppc64le
    21		BR	cmpbodyLE<>(SB)
    22	#else
    23		BR      cmpbodyBE<>(SB)
    24	#endif
    25	
    26	equal:
    27		BEQ	CR6,done
    28		MOVD	$1, R8
    29		BGT	CR6,greater
    30		NEG	R8
    31	
    32	greater:
    33		MOVD	R8, (R7)
    34		RET
    35	
    36	done:
    37		MOVD	$0, (R7)
    38		RET
    39	
    40	TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
    41		MOVD	a_base+0(FP), R5
    42		MOVD	b_base+16(FP), R6
    43		MOVD	a_len+8(FP), R3
    44		CMP	R5,R6,CR7
    45		MOVD	b_len+24(FP), R4
    46		MOVD	$ret+32(FP), R7
    47		CMP	R3,R4,CR6
    48		BEQ	CR7,equal
    49	
    50	#ifdef	GOARCH_ppc64le
    51		BR	cmpbodyLE<>(SB)
    52	#else
    53		BR      cmpbodyBE<>(SB)
    54	#endif
    55	
    56	equal:
    57		BEQ	CR6,done
    58		MOVD	$1, R8
    59		BGT	CR6,greater
    60		NEG	R8
    61	
    62	greater:
    63		MOVD	R8, (R7)
    64		RET
    65	
    66	done:
    67		MOVD	$0, (R7)
    68		RET
    69	
    70	// Do an efficient memcmp for ppc64le
    71	// R3 = a len
    72	// R4 = b len
    73	// R5 = a addr
    74	// R6 = b addr
    75	// R7 = addr of return value
    76	TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
    77		MOVD	R3,R8		// set up length
    78		CMP	R3,R4,CR2	// unequal?
    79		BC	12,8,setuplen	// BLT CR2
    80		MOVD	R4,R8		// use R4 for comparison len
    81	setuplen:
    82		MOVD	R8,CTR		// set up loop counter
    83		CMP	R8,$8		// only optimize >=8
    84		BLT	simplecheck
    85		DCBT	(R5)		// cache hint
    86		DCBT	(R6)
    87		CMP	R8,$32		// optimize >= 32
    88		MOVD	R8,R9
    89		BLT	setup8a		// 8 byte moves only
    90	setup32a:
    91		SRADCC	$5,R8,R9	// number of 32 byte chunks
    92		MOVD	R9,CTR
    93	
    94	        // Special processing for 32 bytes or longer.
    95	        // Loading this way is faster and correct as long as the
    96		// doublewords being compared are equal. Once they
    97		// are found unequal, reload them in proper byte order
    98		// to determine greater or less than.
    99	loop32a:
   100		MOVD	0(R5),R9	// doublewords to compare
   101		MOVD	0(R6),R10	// get 4 doublewords
   102		MOVD	8(R5),R14
   103		MOVD	8(R6),R15
   104		CMPU	R9,R10		// bytes equal?
   105		MOVD	$0,R16		// set up for cmpne
   106		BNE	cmpne		// further compare for LT or GT
   107		MOVD	16(R5),R9	// get next pair of doublewords
   108		MOVD	16(R6),R10
   109		CMPU	R14,R15		// bytes match?
   110		MOVD	$8,R16		// set up for cmpne
   111		BNE	cmpne		// further compare for LT or GT
   112		MOVD	24(R5),R14	// get next pair of doublewords
   113		MOVD    24(R6),R15
   114		CMPU	R9,R10		// bytes match?
   115		MOVD	$16,R16		// set up for cmpne
   116		BNE	cmpne		// further compare for LT or GT
   117		MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   118		ADD	$32,R5		// bump up to next 32
   119		ADD	$32,R6
   120		CMPU    R14,R15		// bytes match?
   121		BC	8,2,loop32a	// br ctr and cr
   122		BNE	cmpne
   123		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   124		BEQ	leftover	// and result is 0
   125	setup8a:
   126		SRADCC	$3,R9,R9	// get the 8 byte count
   127		BEQ	leftover	// shifted value is 0
   128		MOVD	R9,CTR		// loop count for doublewords
   129	loop8:
   130		MOVDBR	(R5+R0),R9	// doublewords to compare
   131		MOVDBR	(R6+R0),R10	// LE compare order
   132		ADD	$8,R5
   133		ADD	$8,R6
   134		CMPU	R9,R10		// match?
   135		BC	8,2,loop8	// bt ctr <> 0 && cr
   136		BGT	greater
   137		BLT	less
   138	leftover:
   139		ANDCC	$7,R8,R9	// check for leftover bytes
   140		MOVD	R9,CTR		// save the ctr
   141		BNE	simple		// leftover bytes
   142		BC	12,10,equal	// test CR2 for length comparison
   143		BC	12,8,less
   144		BR	greater
   145	simplecheck:
   146		CMP	R8,$0		// remaining compare length 0
   147		BNE	simple		// do simple compare
   148		BC	12,10,equal	// test CR2 for length comparison
   149		BC	12,8,less	// 1st len < 2nd len, result less
   150		BR	greater		// 1st len > 2nd len must be greater
   151	simple:
   152		MOVBZ	0(R5), R9	// get byte from 1st operand
   153		ADD	$1,R5
   154		MOVBZ	0(R6), R10	// get byte from 2nd operand
   155		ADD	$1,R6
   156		CMPU	R9, R10
   157		BC	8,2,simple	// bc ctr <> 0 && cr
   158		BGT	greater		// 1st > 2nd
   159		BLT	less		// 1st < 2nd
   160		BC	12,10,equal	// test CR2 for length comparison
   161		BC	12,9,greater	// 2nd len > 1st len
   162		BR	less		// must be less
   163	cmpne:				// only here is not equal
   164		MOVDBR	(R5+R16),R8	// reload in reverse order
   165		MOVDBR	(R6+R16),R9
   166		CMPU	R8,R9		// compare correct endianness
   167		BGT	greater		// here only if NE
   168	less:
   169		MOVD	$-1,R3
   170		MOVD	R3,(R7)		// return value if A < B
   171		RET
   172	equal:
   173		MOVD	$0,(R7)		// return value if A == B
   174		RET
   175	greater:
   176		MOVD	$1,R3
   177		MOVD	R3,(R7)		// return value if A > B
   178		RET
   179	
   180	// Do an efficient memcmp for ppc64 (BE)
   181	// R3 = a len
   182	// R4 = b len
   183	// R5 = a addr
   184	// R6 = b addr
   185	// R7 = addr of return value
   186	TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   187		MOVD	R3,R8		// set up length
   188		CMP	R3,R4,CR2	// unequal?
   189		BC	12,8,setuplen	// BLT CR2
   190		MOVD	R4,R8		// use R4 for comparison len
   191	setuplen:
   192		MOVD	R8,CTR		// set up loop counter
   193		CMP	R8,$8		// only optimize >=8
   194		BLT	simplecheck
   195		DCBT	(R5)		// cache hint
   196		DCBT	(R6)
   197		CMP	R8,$32		// optimize >= 32
   198		MOVD	R8,R9
   199		BLT	setup8a		// 8 byte moves only
   200	
   201	setup32a:
   202		SRADCC	$5,R8,R9	// number of 32 byte chunks
   203		MOVD	R9,CTR
   204	loop32a:
   205		MOVD	0(R5),R9	// doublewords to compare
   206		MOVD	0(R6),R10	// get 4 doublewords
   207		MOVD	8(R5),R14
   208		MOVD	8(R6),R15
   209		CMPU	R9,R10		// bytes equal?
   210		BLT	less		// found to be less
   211		BGT	greater		// found to be greater
   212		MOVD	16(R5),R9	// get next pair of doublewords
   213		MOVD	16(R6),R10
   214		CMPU	R14,R15		// bytes match?
   215		BLT	less		// found less
   216		BGT	greater		// found greater
   217		MOVD	24(R5),R14	// get next pair of doublewords
   218		MOVD	24(R6),R15
   219		CMPU	R9,R10		// bytes match?
   220		BLT	less		// found to be less
   221		BGT	greater		// found to be greater
   222		ADD	$32,R5		// bump up to next 32
   223		ADD	$32,R6
   224		CMPU	R14,R15		// bytes match?
   225		BC	8,2,loop32a	// br ctr and cr
   226		BLT	less		// with BE, byte ordering is
   227		BGT	greater		// good for compare
   228		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   229		BEQ	leftover	// and result is 0
   230	setup8a:
   231		SRADCC	$3,R9,R9	// get the 8 byte count
   232		BEQ	leftover	// shifted value is 0
   233		MOVD	R9,CTR		// loop count for doublewords
   234	loop8:
   235		MOVD	(R5),R9
   236		MOVD	(R6),R10
   237		ADD	$8,R5
   238		ADD	$8,R6
   239		CMPU	R9,R10		// match?
   240		BC	8,2,loop8	// bt ctr <> 0 && cr
   241		BGT	greater
   242		BLT	less
   243	leftover:
   244		ANDCC	$7,R8,R9	// check for leftover bytes
   245		MOVD	R9,CTR		// save the ctr
   246		BNE	simple		// leftover bytes
   247		BC	12,10,equal	// test CR2 for length comparison
   248		BC	12,8,less
   249		BR	greater
   250	simplecheck:
   251		CMP	R8,$0		// remaining compare length 0
   252		BNE	simple		// do simple compare
   253		BC	12,10,equal	// test CR2 for length comparison
   254		BC 	12,8,less	// 1st len < 2nd len, result less
   255		BR	greater		// same len, must be equal
   256	simple:
   257		MOVBZ	0(R5),R9	// get byte from 1st operand
   258		ADD	$1,R5
   259		MOVBZ	0(R6),R10	// get byte from 2nd operand
   260		ADD	$1,R6
   261		CMPU	R9,R10
   262		BC	8,2,simple	// bc ctr <> 0 && cr
   263		BGT	greater		// 1st > 2nd
   264		BLT	less		// 1st < 2nd
   265		BC	12,10,equal	// test CR2 for length comparison
   266		BC	12,9,greater	// 2nd len > 1st len
   267	less:
   268		MOVD	$-1,R3
   269		MOVD    R3,(R7)		// return value if A < B
   270		RET
   271	equal:
   272		MOVD    $0,(R7)		// return value if A == B
   273		RET
   274	greater:
   275		MOVD	$1,R3
   276		MOVD	R3,(R7)		// return value if A > B
   277		RET

View as plain text