...

Text file src/runtime/memclr_ppc64x.s

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64 ppc64le
     6	
     7	#include "textflag.h"
     8	
     9	// func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
    10	TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16
    11		MOVD ptr+0(FP), R3
    12		MOVD n+8(FP), R4
    13	
    14		// Determine if there are doublewords to clear
    15	check:
    16		ANDCC $7, R4, R5  // R5: leftover bytes to clear
    17		SRD   $3, R4, R6  // R6: double words to clear
    18		CMP   R6, $0, CR1 // CR1[EQ] set if no double words
    19	
    20		BC    12, 6, nozerolarge // only single bytes
    21		CMP   R4, $512
    22		BLT   under512           // special case for < 512
    23		ANDCC $127, R3, R8       // check for 128 alignment of address
    24		BEQ   zero512setup
    25	
    26		ANDCC $7, R3, R15
    27		BEQ   zero512xsetup // at least 8 byte aligned
    28	
    29		// zero bytes up to 8 byte alignment
    30	
    31		ANDCC $1, R3, R15 // check for byte alignment
    32		BEQ   byte2
    33		MOVB  R0, 0(R3)   // zero 1 byte
    34		ADD   $1, R3      // bump ptr by 1
    35		ADD   $-1, R4
    36	
    37	byte2:
    38		ANDCC $2, R3, R15 // check for 2 byte alignment
    39		BEQ   byte4
    40		MOVH  R0, 0(R3)   // zero 2 bytes
    41		ADD   $2, R3      // bump ptr by 2
    42		ADD   $-2, R4
    43	
    44	byte4:
    45		ANDCC $4, R3, R15   // check for 4 byte alignment
    46		BEQ   zero512xsetup
    47		MOVW  R0, 0(R3)     // zero 4 bytes
    48		ADD   $4, R3        // bump ptr by 4
    49		ADD   $-4, R4
    50		BR    zero512xsetup // ptr should now be 8 byte aligned
    51	
    52	under512:
    53		MOVD  R6, CTR     // R6 = number of double words
    54		SRDCC $2, R6, R7  // 32 byte chunks?
    55		BNE   zero32setup
    56	
    57		// Clear double words
    58	
    59	zero8:
    60		MOVD R0, 0(R3)    // double word
    61		ADD  $8, R3
    62		ADD  $-8, R4
    63		BC   16, 0, zero8 // dec ctr, br zero8 if ctr not 0
    64		BR   nozerolarge  // handle leftovers
    65	
    66		// Prepare to clear 32 bytes at a time.
    67	
    68	zero32setup:
    69		DCBTST (R3)             // prepare data cache
    70		XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
    71		MOVD   R7, CTR          // number of 32 byte chunks
    72		MOVD   $16, R8
    73	
    74	zero32:
    75		STXVD2X VS32, (R3+R0)   // store 16 bytes
    76		STXVD2X VS32, (R3+R8)
    77		ADD     $32, R3
    78		ADD     $-32, R4
    79		BC      16, 0, zero32   // dec ctr, br zero32 if ctr not 0
    80		RLDCLCC $61, R4, $3, R6 // remaining doublewords
    81		BEQ     nozerolarge
    82		MOVD    R6, CTR         // set up the CTR for doublewords
    83		BR      zero8
    84	
    85	nozerolarge:
    86		ANDCC $7, R4, R5 // any remaining bytes
    87		BC    4, 1, LR   // ble lr
    88	
    89	zerotail:
    90		MOVD R5, CTR // set up to clear tail bytes
    91	
    92	zerotailloop:
    93		MOVB R0, 0(R3)           // clear single bytes
    94		ADD  $1, R3
    95		BC   16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0
    96		RET
    97	
    98	zero512xsetup:  // 512 chunk with extra needed
    99		ANDCC $8, R3, R11    // 8 byte alignment?
   100		BEQ   zero512setup16
   101		MOVD  R0, 0(R3)      // clear 8 bytes
   102		ADD   $8, R3         // update ptr to next 8
   103		ADD   $-8, R4        // dec count by 8
   104	
   105	zero512setup16:
   106		ANDCC $127, R3, R14 // < 128 byte alignment
   107		BEQ   zero512setup  // handle 128 byte alignment
   108		MOVD  $128, R15
   109		SUB   R14, R15, R14 // find increment to 128 alignment
   110		SRD   $4, R14, R15  // number of 16 byte chunks
   111	
   112	zero512presetup:
   113		MOVD   R15, CTR         // loop counter of 16 bytes
   114		XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
   115	
   116	zero512preloop:  // clear up to 128 alignment
   117		STXVD2X VS32, (R3+R0)         // clear 16 bytes
   118		ADD     $16, R3               // update ptr
   119		ADD     $-16, R4              // dec count
   120		BC      16, 0, zero512preloop
   121	
   122	zero512setup:  // setup for dcbz loop
   123		CMP  R4, $512   // check if at least 512
   124		BLT  remain
   125		SRD  $9, R4, R8 // loop count for 512 chunks
   126		MOVD R8, CTR    // set up counter
   127		MOVD $128, R9   // index regs for 128 bytes
   128		MOVD $256, R10
   129		MOVD $384, R11
   130	
   131	zero512:
   132		DCBZ (R3+R0)        // clear first chunk
   133		DCBZ (R3+R9)        // clear second chunk
   134		DCBZ (R3+R10)       // clear third chunk
   135		DCBZ (R3+R11)       // clear fourth chunk
   136		ADD  $512, R3
   137		ADD  $-512, R4
   138		BC   16, 0, zero512
   139	
   140	remain:
   141		CMP  R4, $128  // check if 128 byte chunks left
   142		BLT  smaller
   143		DCBZ (R3+R0)   // clear 128
   144		ADD  $128, R3
   145		ADD  $-128, R4
   146		BR   remain
   147	
   148	smaller:
   149		ANDCC $127, R4, R7 // find leftovers
   150		BEQ   done
   151		CMP   R7, $64      // more than 64, do 32 at a time
   152		BLT   zero8setup   // less than 64, do 8 at a time
   153		SRD   $5, R7, R7   // set up counter for 32
   154		BR    zero32setup
   155	
   156	zero8setup:
   157		SRDCC $3, R7, R7  // less than 8 bytes
   158		BEQ   nozerolarge
   159		MOVD  R7, CTR
   160		BR    zero8
   161	
   162	done:
   163		RET

View as plain text