Text file src/runtime/memmove_arm.s

     1	// Inferno's libkern/memmove-arm.s
     2	// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
     3	//
     4	//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5	//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6	//         Portions Copyright 2009 The Go Authors. All rights reserved.
     7	//
     8	// Permission is hereby granted, free of charge, to any person obtaining a copy
     9	// of this software and associated documentation files (the "Software"), to deal
    10	// in the Software without restriction, including without limitation the rights
    11	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12	// copies of the Software, and to permit persons to whom the Software is
    13	// furnished to do so, subject to the following conditions:
    14	//
    15	// The above copyright notice and this permission notice shall be included in
    16	// all copies or substantial portions of the Software.
    17	//
    18	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24	// THE SOFTWARE.
    25	
    26	#include "textflag.h"
    27	
    28	// TE or TS are spilled to the stack during bulk register moves.
    29	#define TS	R0
    30	#define TE	R8
    31	
    32	// Warning: the linker will use R11 to synthesize certain instructions. Please
    33	// take care and double check with objdump.
    34	#define FROM	R11
    35	#define N	R12
    36	#define TMP	R12				/* N and TMP don't overlap */
    37	#define TMP1	R5
    38	
    39	#define RSHIFT	R5
    40	#define LSHIFT	R6
    41	#define OFFSET	R7
    42	
    43	#define BR0	R0					/* shared with TS */
    44	#define BW0	R1
    45	#define BR1	R1
    46	#define BW1	R2
    47	#define BR2	R2
    48	#define BW2	R3
    49	#define BR3	R3
    50	#define BW3	R4
    51	
    52	#define FW0	R1
    53	#define FR0	R2
    54	#define FW1	R2
    55	#define FR1	R3
    56	#define FW2	R3
    57	#define FR2	R4
    58	#define FW3	R4
    59	#define FR3	R8					/* shared with TE */
    60	
    61	// func memmove(to, from unsafe.Pointer, n uintptr)
    62	TEXT runtime·memmove(SB), NOSPLIT, $4-12
    63	_memmove:
    64		MOVW	to+0(FP), TS
    65		MOVW	from+4(FP), FROM
    66		MOVW	n+8(FP), N
    67	
    68		ADD	N, TS, TE	/* to end pointer */
    69	
    70		CMP	FROM, TS
    71		BLS	_forward
    72	
    73	_back:
    74		ADD	N, FROM		/* from end pointer */
    75		CMP	$4, N		/* need at least 4 bytes to copy */
    76		BLT	_b1tail
    77	
    78	_b4align:				/* align destination on 4 */
    79		AND.S	$3, TE, TMP
    80		BEQ	_b4aligned
    81	
    82		MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    83		MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    84		B	_b4align
    85	
    86	_b4aligned:				/* is source now aligned? */
    87		AND.S	$3, FROM, TMP
    88		BNE	_bunaligned
    89	
    90		ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    91		MOVW	TS, savedts-4(SP)
    92	_b32loop:
    93		CMP	TMP, TE
    94		BLS	_b4tail
    95	
    96		MOVM.DB.W (FROM), [R0-R7]
    97		MOVM.DB.W [R0-R7], (TE)
    98		B	_b32loop
    99	
   100	_b4tail:				/* do remaining words if possible */
   101		MOVW	savedts-4(SP), TS
   102		ADD	$3, TS, TMP
   103	_b4loop:
   104		CMP	TMP, TE
   105		BLS	_b1tail
   106	
   107		MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   108		MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   109		B	_b4loop
   110	
   111	_b1tail:				/* remaining bytes */
   112		CMP	TE, TS
   113		BEQ	_return
   114	
   115		MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   116		MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   117		B	_b1tail
   118	
   119	_forward:
   120		CMP	$4, N		/* need at least 4 bytes to copy */
   121		BLT	_f1tail
   122	
   123	_f4align:				/* align destination on 4 */
   124		AND.S	$3, TS, TMP
   125		BEQ	_f4aligned
   126	
   127		MOVBU.P	1(FROM), TMP	/* implicit write back */
   128		MOVBU.P	TMP, 1(TS)	/* implicit write back */
   129		B	_f4align
   130	
   131	_f4aligned:				/* is source now aligned? */
   132		AND.S	$3, FROM, TMP
   133		BNE	_funaligned
   134	
   135		SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   136		MOVW	TE, savedte-4(SP)
   137	_f32loop:
   138		CMP	TMP, TS
   139		BHS	_f4tail
   140	
   141		MOVM.IA.W (FROM), [R1-R8]
   142		MOVM.IA.W [R1-R8], (TS)
   143		B	_f32loop
   144	
   145	_f4tail:
   146		MOVW	savedte-4(SP), TE
   147		SUB	$3, TE, TMP	/* do remaining words if possible */
   148	_f4loop:
   149		CMP	TMP, TS
   150		BHS	_f1tail
   151	
   152		MOVW.P	4(FROM), TMP1	/* implicit write back */
   153		MOVW.P	TMP1, 4(TS)	/* implicit write back */
   154		B	_f4loop
   155	
   156	_f1tail:
   157		CMP	TS, TE
   158		BEQ	_return
   159	
   160		MOVBU.P	1(FROM), TMP	/* implicit write back */
   161		MOVBU.P	TMP, 1(TS)	/* implicit write back */
   162		B	_f1tail
   163	
   164	_return:
   165		MOVW	to+0(FP), R0
   166		RET
   167	
   168	_bunaligned:
   169		CMP	$2, TMP		/* is TMP < 2 ? */
   170	
   171		MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   172		MOVW.LT	$24, LSHIFT
   173		MOVW.LT	$1, OFFSET
   174	
   175		MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   176		MOVW.EQ	$16, LSHIFT
   177		MOVW.EQ	$2, OFFSET
   178	
   179		MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   180		MOVW.GT	$8, LSHIFT
   181		MOVW.GT	$3, OFFSET
   182	
   183		ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   184		CMP	TMP, TE
   185		BLS	_b1tail
   186	
   187		BIC	$3, FROM		/* align source */
   188		MOVW	TS, savedts-4(SP)
   189		MOVW	(FROM), BR0	/* prime first block register */
   190	
   191	_bu16loop:
   192		CMP	TMP, TE
   193		BLS	_bu1tail
   194	
   195		MOVW	BR0<<LSHIFT, BW3
   196		MOVM.DB.W (FROM), [BR0-BR3]
   197		ORR	BR3>>RSHIFT, BW3
   198	
   199		MOVW	BR3<<LSHIFT, BW2
   200		ORR	BR2>>RSHIFT, BW2
   201	
   202		MOVW	BR2<<LSHIFT, BW1
   203		ORR	BR1>>RSHIFT, BW1
   204	
   205		MOVW	BR1<<LSHIFT, BW0
   206		ORR	BR0>>RSHIFT, BW0
   207	
   208		MOVM.DB.W [BW0-BW3], (TE)
   209		B	_bu16loop
   210	
   211	_bu1tail:
   212		MOVW	savedts-4(SP), TS
   213		ADD	OFFSET, FROM
   214		B	_b1tail
   215	
   216	_funaligned:
   217		CMP	$2, TMP
   218	
   219		MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   220		MOVW.LT	$24, LSHIFT
   221		MOVW.LT	$3, OFFSET
   222	
   223		MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   224		MOVW.EQ	$16, LSHIFT
   225		MOVW.EQ	$2, OFFSET
   226	
   227		MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   228		MOVW.GT	$8, LSHIFT
   229		MOVW.GT	$1, OFFSET
   230	
   231		SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   232		CMP	TMP, TS
   233		BHS	_f1tail
   234	
   235		BIC	$3, FROM		/* align source */
   236		MOVW	TE, savedte-4(SP)
   237		MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   238	
   239	_fu16loop:
   240		CMP	TMP, TS
   241		BHS	_fu1tail
   242	
   243		MOVW	FR3>>RSHIFT, FW0
   244		MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   245		ORR	FR0<<LSHIFT, FW0
   246	
   247		MOVW	FR0>>RSHIFT, FW1
   248		ORR	FR1<<LSHIFT, FW1
   249	
   250		MOVW	FR1>>RSHIFT, FW2
   251		ORR	FR2<<LSHIFT, FW2
   252	
   253		MOVW	FR2>>RSHIFT, FW3
   254		ORR	FR3<<LSHIFT, FW3
   255	
   256		MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   257		B	_fu16loop
   258	
   259	_fu1tail:
   260		MOVW	savedte-4(SP), TE
   261		SUB	OFFSET, FROM
   262		B	_f1tail
View as plain text