...

Text file src/pkg/runtime/memmove_arm64.s

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "textflag.h"
     6	
     7	// func memmove(to, from unsafe.Pointer, n uintptr)
     8	TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
     9		MOVD	to+0(FP), R3
    10		MOVD	from+8(FP), R4
    11		MOVD	n+16(FP), R5
    12		CBNZ	R5, check
    13		RET
    14	
    15	check:
    16		CMP	$16, R5
    17		BLE	copy16
    18	
    19		AND	$~31, R5, R7	// R7 is N&~31
    20		SUB	R7, R5, R6	// R6 is N&31
    21	
    22		CMP	R3, R4
    23		BLT	backward
    24	
    25		// Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
    26		// R3 and R4 are advanced as we copy.
    27	
    28		// (There may be implementations of armv8 where copying by bytes until
    29		// at least one of source or dest is word aligned is a worthwhile
    30		// optimization, but the on the one tested so far (xgene) it did not
    31		// make a significance difference.)
    32	
    33		CBZ	R7, noforwardlarge	// Do we need to do any quadword copying?
    34	
    35		ADD	R3, R7, R9	// R9 points just past where we copy by word
    36	
    37	forwardlargeloop:
    38		// Copy 32 bytes at a time.
    39		LDP.P	32(R4), (R8, R10)
    40		STP.P	(R8, R10), 32(R3)
    41		LDP	-16(R4), (R11, R12)
    42		STP	(R11, R12), -16(R3)
    43		SUB 	$32, R7, R7
    44		CBNZ	R7, forwardlargeloop
    45	
    46	noforwardlarge:
    47		CBNZ	R6, forwardtail		// Do we need to copy any tail bytes?
    48		RET
    49	
    50	forwardtail:
    51		// There are R6 <= 31 bytes remaining to copy.
    52		// This is large enough to still contain pointers,
    53		// which must be copied atomically.
    54		// Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
    55		TBZ	$4, R6, 3(PC)	// write 16 bytes if R6&16 != 0
    56		LDP.P	16(R4), (R8, R10)
    57		STP.P	(R8, R10), 16(R3)
    58	
    59		TBZ	$3, R6, 3(PC)	// write 8 bytes if R6&8 != 0
    60		MOVD.P	8(R4), R8
    61		MOVD.P	R8, 8(R3)
    62	
    63		AND	$7, R6
    64		CBNZ	R6, 2(PC)
    65		RET
    66	
    67		ADD	R3, R6, R9	// R9 points just past the destination memory
    68	
    69	forwardtailloop:
    70		MOVBU.P 1(R4), R8
    71		MOVBU.P	R8, 1(R3)
    72		CMP	R3, R9
    73		BNE	forwardtailloop
    74		RET
    75	
    76		// Small copies: 1..16 bytes.
    77	copy16:
    78		ADD	R4, R5, R8	// R8 points just past the last source byte
    79		ADD	R3, R5, R9	// R9 points just past the last destination byte
    80		CMP	$8, R5
    81		BLT	copy7
    82		MOVD	(R4), R6
    83		MOVD	-8(R8), R7
    84		MOVD	R6, (R3)
    85		MOVD	R7, -8(R9)
    86		RET
    87	
    88	copy7:
    89		TBZ	$2, R5, copy3
    90		MOVWU	(R4), R6
    91		MOVWU	-4(R8), R7
    92		MOVW	R6, (R3)
    93		MOVW	R7, -4(R9)
    94		RET
    95	
    96	copy3:
    97		TBZ	$1, R5, copy1
    98		MOVHU	(R4), R6
    99		MOVHU	-2(R8), R7
   100		MOVH	R6, (R3)
   101		MOVH	R7, -2(R9)
   102		RET
   103	
   104	copy1:
   105		MOVBU	(R4), R6
   106		MOVB	R6, (R3)
   107		RET
   108	
   109	backward:
   110		// Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
   111		// R3 and R4 are advanced to the end of the destination/source buffers
   112		// respectively and moved back as we copy.
   113	
   114		ADD	R4, R5, R4	// R4 points just past the last source byte
   115		ADD	R3, R5, R3	// R3 points just past the last destination byte
   116	
   117		CBZ	R6, nobackwardtail	// Do we need to do any byte-by-byte copying?
   118	
   119		AND	$7, R6, R12
   120		CBZ	R12, backwardtaillarge
   121	
   122		SUB	R12, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
   123	backwardtailloop:
   124		// Copy sub-pointer-size tail.
   125		MOVBU.W	-1(R4), R8
   126		MOVBU.W	R8, -1(R3)
   127		CMP	R9, R3
   128		BNE	backwardtailloop
   129	
   130	backwardtaillarge:
   131		// Do 8/16-byte write if possible.
   132		// See comment at forwardtail.
   133		TBZ	$3, R6, 3(PC)
   134		MOVD.W	-8(R4), R8
   135		MOVD.W	R8, -8(R3)
   136	
   137		TBZ	$4, R6, 3(PC)
   138		LDP.W	-16(R4), (R8, R10)
   139		STP.W	(R8, R10), -16(R3)
   140	
   141	nobackwardtail:
   142		CBNZ     R7, backwardlarge	// Do we need to do any doubleword-by-doubleword copying?
   143		RET
   144	
   145	backwardlarge:
   146		SUB	R7, R3, R9	// R9 points at the lowest destination byte
   147	
   148	backwardlargeloop:
   149		LDP	-16(R4), (R8, R10)
   150		STP	(R8, R10), -16(R3)
   151		LDP.W	-32(R4), (R11, R12)
   152		STP.W	(R11, R12), -32(R3)
   153		CMP	R9, R3
   154		BNE	backwardlargeloop
   155		RET

View as plain text