...

Text file src/vendor/golang.org/x/crypto/poly1305/sum_arm.s

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build arm,!gccgo,!appengine,!nacl
     6	
     7	#include "textflag.h"
     8	
     9	// This code was translated into a form compatible with 5a from the public
    10	// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
    11	
    12	DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
    13	DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
    14	DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
    15	DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
    16	DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
    17	GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
    18	
    19	// Warning: the linker may use R11 to synthesize certain instructions. Please
    20	// take care and verify that no synthetic instructions use it.
    21	
    22	TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
    23		// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
    24		// might look like it's only 60 bytes of space but the final four bytes
    25		// will be written by another function.) We need to skip over four
    26		// bytes of stack because that's saving the value of 'g'.
    27		ADD       $4, R13, R8
    28		MOVM.IB   [R4-R7], (R8)
    29		MOVM.IA.W (R1), [R2-R5]
    30		MOVW      $·poly1305_init_constants_armv6<>(SB), R7
    31		MOVW      R2, R8
    32		MOVW      R2>>26, R9
    33		MOVW      R3>>20, g
    34		MOVW      R4>>14, R11
    35		MOVW      R5>>8, R12
    36		ORR       R3<<6, R9, R9
    37		ORR       R4<<12, g, g
    38		ORR       R5<<18, R11, R11
    39		MOVM.IA   (R7), [R2-R6]
    40		AND       R8, R2, R2
    41		AND       R9, R3, R3
    42		AND       g, R4, R4
    43		AND       R11, R5, R5
    44		AND       R12, R6, R6
    45		MOVM.IA.W [R2-R6], (R0)
    46		EOR       R2, R2, R2
    47		EOR       R3, R3, R3
    48		EOR       R4, R4, R4
    49		EOR       R5, R5, R5
    50		EOR       R6, R6, R6
    51		MOVM.IA.W [R2-R6], (R0)
    52		MOVM.IA.W (R1), [R2-R5]
    53		MOVM.IA   [R2-R6], (R0)
    54		ADD       $20, R13, R0
    55		MOVM.DA   (R0), [R4-R7]
    56		RET
    57	
    58	#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
    59		MOVBU (offset+0)(Rsrc), Rtmp; \
    60		MOVBU Rtmp, (offset+0)(Rdst); \
    61		MOVBU (offset+1)(Rsrc), Rtmp; \
    62		MOVBU Rtmp, (offset+1)(Rdst); \
    63		MOVBU (offset+2)(Rsrc), Rtmp; \
    64		MOVBU Rtmp, (offset+2)(Rdst); \
    65		MOVBU (offset+3)(Rsrc), Rtmp; \
    66		MOVBU Rtmp, (offset+3)(Rdst)
    67	
    68	TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
    69		// Needs 24 bytes of stack for saved registers and then 88 bytes of
    70		// scratch space after that. We assume that 24 bytes at (R13) have
    71		// already been used: four bytes for the link register saved in the
    72		// prelude of poly1305_auth_armv6, four bytes for saving the value of g
    73		// in that function and 16 bytes of scratch space used around
    74		// poly1305_finish_ext_armv6_skip1.
    75		ADD     $24, R13, R12
    76		MOVM.IB [R4-R8, R14], (R12)
    77		MOVW    R0, 88(R13)
    78		MOVW    R1, 92(R13)
    79		MOVW    R2, 96(R13)
    80		MOVW    R1, R14
    81		MOVW    R2, R12
    82		MOVW    56(R0), R8
    83		WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
    84		EOR     R6, R6, R6
    85		MOVW.EQ $(1<<24), R6
    86		MOVW    R6, 84(R13)
    87		ADD     $116, R13, g
    88		MOVM.IA (R0), [R0-R9]
    89		MOVM.IA [R0-R4], (g)
    90		CMP     $16, R12
    91		BLO     poly1305_blocks_armv6_done
    92	
    93	poly1305_blocks_armv6_mainloop:
    94		WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
    95		BEQ     poly1305_blocks_armv6_mainloop_aligned
    96		ADD     $100, R13, g
    97		MOVW_UNALIGNED(R14, g, R0, 0)
    98		MOVW_UNALIGNED(R14, g, R0, 4)
    99		MOVW_UNALIGNED(R14, g, R0, 8)
   100		MOVW_UNALIGNED(R14, g, R0, 12)
   101		MOVM.IA (g), [R0-R3]
   102		ADD     $16, R14
   103		B       poly1305_blocks_armv6_mainloop_loaded
   104	
   105	poly1305_blocks_armv6_mainloop_aligned:
   106		MOVM.IA.W (R14), [R0-R3]
   107	
   108	poly1305_blocks_armv6_mainloop_loaded:
   109		MOVW    R0>>26, g
   110		MOVW    R1>>20, R11
   111		MOVW    R2>>14, R12
   112		MOVW    R14, 92(R13)
   113		MOVW    R3>>8, R4
   114		ORR     R1<<6, g, g
   115		ORR     R2<<12, R11, R11
   116		ORR     R3<<18, R12, R12
   117		BIC     $0xfc000000, R0, R0
   118		BIC     $0xfc000000, g, g
   119		MOVW    84(R13), R3
   120		BIC     $0xfc000000, R11, R11
   121		BIC     $0xfc000000, R12, R12
   122		ADD     R0, R5, R5
   123		ADD     g, R6, R6
   124		ORR     R3, R4, R4
   125		ADD     R11, R7, R7
   126		ADD     $116, R13, R14
   127		ADD     R12, R8, R8
   128		ADD     R4, R9, R9
   129		MOVM.IA (R14), [R0-R4]
   130		MULLU   R4, R5, (R11, g)
   131		MULLU   R3, R5, (R14, R12)
   132		MULALU  R3, R6, (R11, g)
   133		MULALU  R2, R6, (R14, R12)
   134		MULALU  R2, R7, (R11, g)
   135		MULALU  R1, R7, (R14, R12)
   136		ADD     R4<<2, R4, R4
   137		ADD     R3<<2, R3, R3
   138		MULALU  R1, R8, (R11, g)
   139		MULALU  R0, R8, (R14, R12)
   140		MULALU  R0, R9, (R11, g)
   141		MULALU  R4, R9, (R14, R12)
   142		MOVW    g, 76(R13)
   143		MOVW    R11, 80(R13)
   144		MOVW    R12, 68(R13)
   145		MOVW    R14, 72(R13)
   146		MULLU   R2, R5, (R11, g)
   147		MULLU   R1, R5, (R14, R12)
   148		MULALU  R1, R6, (R11, g)
   149		MULALU  R0, R6, (R14, R12)
   150		MULALU  R0, R7, (R11, g)
   151		MULALU  R4, R7, (R14, R12)
   152		ADD     R2<<2, R2, R2
   153		ADD     R1<<2, R1, R1
   154		MULALU  R4, R8, (R11, g)
   155		MULALU  R3, R8, (R14, R12)
   156		MULALU  R3, R9, (R11, g)
   157		MULALU  R2, R9, (R14, R12)
   158		MOVW    g, 60(R13)
   159		MOVW    R11, 64(R13)
   160		MOVW    R12, 52(R13)
   161		MOVW    R14, 56(R13)
   162		MULLU   R0, R5, (R11, g)
   163		MULALU  R4, R6, (R11, g)
   164		MULALU  R3, R7, (R11, g)
   165		MULALU  R2, R8, (R11, g)
   166		MULALU  R1, R9, (R11, g)
   167		ADD     $52, R13, R0
   168		MOVM.IA (R0), [R0-R7]
   169		MOVW    g>>26, R12
   170		MOVW    R4>>26, R14
   171		ORR     R11<<6, R12, R12
   172		ORR     R5<<6, R14, R14
   173		BIC     $0xfc000000, g, g
   174		BIC     $0xfc000000, R4, R4
   175		ADD.S   R12, R0, R0
   176		ADC     $0, R1, R1
   177		ADD.S   R14, R6, R6
   178		ADC     $0, R7, R7
   179		MOVW    R0>>26, R12
   180		MOVW    R6>>26, R14
   181		ORR     R1<<6, R12, R12
   182		ORR     R7<<6, R14, R14
   183		BIC     $0xfc000000, R0, R0
   184		BIC     $0xfc000000, R6, R6
   185		ADD     R14<<2, R14, R14
   186		ADD.S   R12, R2, R2
   187		ADC     $0, R3, R3
   188		ADD     R14, g, g
   189		MOVW    R2>>26, R12
   190		MOVW    g>>26, R14
   191		ORR     R3<<6, R12, R12
   192		BIC     $0xfc000000, g, R5
   193		BIC     $0xfc000000, R2, R7
   194		ADD     R12, R4, R4
   195		ADD     R14, R0, R0
   196		MOVW    R4>>26, R12
   197		BIC     $0xfc000000, R4, R8
   198		ADD     R12, R6, R9
   199		MOVW    96(R13), R12
   200		MOVW    92(R13), R14
   201		MOVW    R0, R6
   202		CMP     $32, R12
   203		SUB     $16, R12, R12
   204		MOVW    R12, 96(R13)
   205		BHS     poly1305_blocks_armv6_mainloop
   206	
   207	poly1305_blocks_armv6_done:
   208		MOVW    88(R13), R12
   209		MOVW    R5, 20(R12)
   210		MOVW    R6, 24(R12)
   211		MOVW    R7, 28(R12)
   212		MOVW    R8, 32(R12)
   213		MOVW    R9, 36(R12)
   214		ADD     $48, R13, R0
   215		MOVM.DA (R0), [R4-R8, R14]
   216		RET
   217	
   218	#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
   219		MOVBU.P 1(Rsrc), Rtmp; \
   220		MOVBU.P Rtmp, 1(Rdst); \
   221		MOVBU.P 1(Rsrc), Rtmp; \
   222		MOVBU.P Rtmp, 1(Rdst)
   223	
   224	#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
   225		MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
   226		MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
   227	
   228	// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
   229	TEXT ·poly1305_auth_armv6(SB), $196-16
   230		// The value 196, just above, is the sum of 64 (the size of the context
   231		// structure) and 132 (the amount of stack needed).
   232		//
   233		// At this point, the stack pointer (R13) has been moved down. It
   234		// points to the saved link register and there's 196 bytes of free
   235		// space above it.
   236		//
   237		// The stack for this function looks like:
   238		//
   239		// +---------------------
   240		// |
   241		// | 64 bytes of context structure
   242		// |
   243		// +---------------------
   244		// |
   245		// | 112 bytes for poly1305_blocks_armv6
   246		// |
   247		// +---------------------
   248		// | 16 bytes of final block, constructed at
   249		// | poly1305_finish_ext_armv6_skip8
   250		// +---------------------
   251		// | four bytes of saved 'g'
   252		// +---------------------
   253		// | lr, saved by prelude    <- R13 points here
   254		// +---------------------
   255		MOVW g, 4(R13)
   256	
   257		MOVW out+0(FP), R4
   258		MOVW m+4(FP), R5
   259		MOVW mlen+8(FP), R6
   260		MOVW key+12(FP), R7
   261	
   262		ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
   263		MOVW R7, R1
   264	
   265		// poly1305_init_ext_armv6 will write to the stack from R13+4, but
   266		// that's ok because none of the other values have been written yet.
   267		BL    poly1305_init_ext_armv6<>(SB)
   268		BIC.S $15, R6, R2
   269		BEQ   poly1305_auth_armv6_noblocks
   270		ADD   $136, R13, R0
   271		MOVW  R5, R1
   272		ADD   R2, R5, R5
   273		SUB   R2, R6, R6
   274		BL    poly1305_blocks_armv6<>(SB)
   275	
   276	poly1305_auth_armv6_noblocks:
   277		ADD  $136, R13, R0
   278		MOVW R5, R1
   279		MOVW R6, R2
   280		MOVW R4, R3
   281	
   282		MOVW  R0, R5
   283		MOVW  R1, R6
   284		MOVW  R2, R7
   285		MOVW  R3, R8
   286		AND.S R2, R2, R2
   287		BEQ   poly1305_finish_ext_armv6_noremaining
   288		EOR   R0, R0
   289		ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
   290		MOVW  R0, (R9)
   291		MOVW  R0, 4(R9)
   292		MOVW  R0, 8(R9)
   293		MOVW  R0, 12(R9)
   294		WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
   295		BEQ   poly1305_finish_ext_armv6_aligned
   296		WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
   297		BEQ   poly1305_finish_ext_armv6_skip8
   298		MOVWP_UNALIGNED(R1, R9, g)
   299		MOVWP_UNALIGNED(R1, R9, g)
   300	
   301	poly1305_finish_ext_armv6_skip8:
   302		WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
   303		BEQ  poly1305_finish_ext_armv6_skip4
   304		MOVWP_UNALIGNED(R1, R9, g)
   305	
   306	poly1305_finish_ext_armv6_skip4:
   307		WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
   308		BEQ  poly1305_finish_ext_armv6_skip2
   309		MOVHUP_UNALIGNED(R1, R9, g)
   310		B    poly1305_finish_ext_armv6_skip2
   311	
   312	poly1305_finish_ext_armv6_aligned:
   313		WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
   314		BEQ       poly1305_finish_ext_armv6_skip8_aligned
   315		MOVM.IA.W (R1), [g-R11]
   316		MOVM.IA.W [g-R11], (R9)
   317	
   318	poly1305_finish_ext_armv6_skip8_aligned:
   319		WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
   320		BEQ    poly1305_finish_ext_armv6_skip4_aligned
   321		MOVW.P 4(R1), g
   322		MOVW.P g, 4(R9)
   323	
   324	poly1305_finish_ext_armv6_skip4_aligned:
   325		WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
   326		BEQ     poly1305_finish_ext_armv6_skip2
   327		MOVHU.P 2(R1), g
   328		MOVH.P  g, 2(R9)
   329	
   330	poly1305_finish_ext_armv6_skip2:
   331		WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
   332		BEQ     poly1305_finish_ext_armv6_skip1
   333		MOVBU.P 1(R1), g
   334		MOVBU.P g, 1(R9)
   335	
   336	poly1305_finish_ext_armv6_skip1:
   337		MOVW  $1, R11
   338		MOVBU R11, 0(R9)
   339		MOVW  R11, 56(R5)
   340		MOVW  R5, R0
   341		ADD   $8, R13, R1
   342		MOVW  $16, R2
   343		BL    poly1305_blocks_armv6<>(SB)
   344	
   345	poly1305_finish_ext_armv6_noremaining:
   346		MOVW      20(R5), R0
   347		MOVW      24(R5), R1
   348		MOVW      28(R5), R2
   349		MOVW      32(R5), R3
   350		MOVW      36(R5), R4
   351		MOVW      R4>>26, R12
   352		BIC       $0xfc000000, R4, R4
   353		ADD       R12<<2, R12, R12
   354		ADD       R12, R0, R0
   355		MOVW      R0>>26, R12
   356		BIC       $0xfc000000, R0, R0
   357		ADD       R12, R1, R1
   358		MOVW      R1>>26, R12
   359		BIC       $0xfc000000, R1, R1
   360		ADD       R12, R2, R2
   361		MOVW      R2>>26, R12
   362		BIC       $0xfc000000, R2, R2
   363		ADD       R12, R3, R3
   364		MOVW      R3>>26, R12
   365		BIC       $0xfc000000, R3, R3
   366		ADD       R12, R4, R4
   367		ADD       $5, R0, R6
   368		MOVW      R6>>26, R12
   369		BIC       $0xfc000000, R6, R6
   370		ADD       R12, R1, R7
   371		MOVW      R7>>26, R12
   372		BIC       $0xfc000000, R7, R7
   373		ADD       R12, R2, g
   374		MOVW      g>>26, R12
   375		BIC       $0xfc000000, g, g
   376		ADD       R12, R3, R11
   377		MOVW      $-(1<<26), R12
   378		ADD       R11>>26, R12, R12
   379		BIC       $0xfc000000, R11, R11
   380		ADD       R12, R4, R9
   381		MOVW      R9>>31, R12
   382		SUB       $1, R12
   383		AND       R12, R6, R6
   384		AND       R12, R7, R7
   385		AND       R12, g, g
   386		AND       R12, R11, R11
   387		AND       R12, R9, R9
   388		MVN       R12, R12
   389		AND       R12, R0, R0
   390		AND       R12, R1, R1
   391		AND       R12, R2, R2
   392		AND       R12, R3, R3
   393		AND       R12, R4, R4
   394		ORR       R6, R0, R0
   395		ORR       R7, R1, R1
   396		ORR       g, R2, R2
   397		ORR       R11, R3, R3
   398		ORR       R9, R4, R4
   399		ORR       R1<<26, R0, R0
   400		MOVW      R1>>6, R1
   401		ORR       R2<<20, R1, R1
   402		MOVW      R2>>12, R2
   403		ORR       R3<<14, R2, R2
   404		MOVW      R3>>18, R3
   405		ORR       R4<<8, R3, R3
   406		MOVW      40(R5), R6
   407		MOVW      44(R5), R7
   408		MOVW      48(R5), g
   409		MOVW      52(R5), R11
   410		ADD.S     R6, R0, R0
   411		ADC.S     R7, R1, R1
   412		ADC.S     g, R2, R2
   413		ADC.S     R11, R3, R3
   414		MOVM.IA   [R0-R3], (R8)
   415		MOVW      R5, R12
   416		EOR       R0, R0, R0
   417		EOR       R1, R1, R1
   418		EOR       R2, R2, R2
   419		EOR       R3, R3, R3
   420		EOR       R4, R4, R4
   421		EOR       R5, R5, R5
   422		EOR       R6, R6, R6
   423		EOR       R7, R7, R7
   424		MOVM.IA.W [R0-R7], (R12)
   425		MOVM.IA   [R0-R7], (R12)
   426		MOVW      4(R13), g
   427		RET

View as plain text