Files
MAX_CARLINK_A270S/MXC_A27-PCB4.5-270S/lib/wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S
2025-01-21 16:49:37 +08:00

8848 lines
161 KiB
ArmAsm

/* armv8-curve25519
*
* Copyright (C) 2006-2023 wolfSSL Inc.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./x25519/x25519.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S
*/
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_INLINE
#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#ifndef __APPLE__
.text
.globl fe_init
.type fe_init,@function
.align 2
fe_init:
#else
.section __TEXT,__text
.globl _fe_init
.p2align 2
_fe_init:
#endif /* __APPLE__ */
ret
#ifndef __APPLE__
.size fe_init,.-fe_init
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_frombytes
.type fe_frombytes,@function
.align 2
fe_frombytes:
#else
.section __TEXT,__text
.globl _fe_frombytes
.p2align 2
_fe_frombytes:
#endif /* __APPLE__ */
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
and x5, x5, #0x7fffffffffffffff
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
ret
#ifndef __APPLE__
.size fe_frombytes,.-fe_frombytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_tobytes
.type fe_tobytes,@function
.align 2
fe_tobytes:
#else
.section __TEXT,__text
.globl _fe_tobytes
.p2align 2
_fe_tobytes:
#endif /* __APPLE__ */
mov x7, #19
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
adds x6, x2, x7
adcs x6, x3, xzr
adcs x6, x4, xzr
adc x6, x5, xzr
and x6, x7, x6, asr 63
adds x2, x2, x6
adcs x3, x3, xzr
adcs x4, x4, xzr
adc x5, x5, xzr
and x5, x5, #0x7fffffffffffffff
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
ret
#ifndef __APPLE__
.size fe_tobytes,.-fe_tobytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_1
.type fe_1,@function
.align 2
fe_1:
#else
.section __TEXT,__text
.globl _fe_1
.p2align 2
_fe_1:
#endif /* __APPLE__ */
# Set one
mov x1, #1
stp x1, xzr, [x0]
stp xzr, xzr, [x0, #16]
ret
#ifndef __APPLE__
.size fe_1,.-fe_1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_0
.type fe_0,@function
.align 2
fe_0:
#else
.section __TEXT,__text
.globl _fe_0
.p2align 2
_fe_0:
#endif /* __APPLE__ */
# Set zero
stp xzr, xzr, [x0]
stp xzr, xzr, [x0, #16]
ret
#ifndef __APPLE__
.size fe_0,.-fe_0
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_copy
.type fe_copy,@function
.align 2
fe_copy:
#else
.section __TEXT,__text
.globl _fe_copy
.p2align 2
_fe_copy:
#endif /* __APPLE__ */
# Copy
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
ret
#ifndef __APPLE__
.size fe_copy,.-fe_copy
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sub
.type fe_sub,@function
.align 2
fe_sub:
#else
.section __TEXT,__text
.globl _fe_sub
.p2align 2
_fe_sub:
#endif /* __APPLE__ */
# Sub
ldp x3, x4, [x1]
ldp x5, x6, [x1, #16]
ldp x7, x8, [x2]
ldp x9, x10, [x2, #16]
subs x3, x3, x7
sbcs x4, x4, x8
sbcs x5, x5, x9
sbcs x6, x6, x10
csetm x11, cc
mov x12, #-19
# Mask the modulus
extr x11, x11, x6, #63
mul x12, x11, x12
# Add modulus (if underflow)
subs x3, x3, x12
sbcs x4, x4, xzr
and x6, x6, #0x7fffffffffffffff
sbcs x5, x5, xzr
sbc x6, x6, xzr
stp x3, x4, [x0]
stp x5, x6, [x0, #16]
ret
#ifndef __APPLE__
.size fe_sub,.-fe_sub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_add
.type fe_add,@function
.align 2
fe_add:
#else
.section __TEXT,__text
.globl _fe_add
.p2align 2
_fe_add:
#endif /* __APPLE__ */
# Add
ldp x3, x4, [x1]
ldp x5, x6, [x1, #16]
ldp x7, x8, [x2]
ldp x9, x10, [x2, #16]
adds x3, x3, x7
adcs x4, x4, x8
adcs x5, x5, x9
adcs x6, x6, x10
cset x11, cs
mov x12, #19
# Mask the modulus
extr x11, x11, x6, #63
mul x12, x11, x12
# Sub modulus (if overflow)
adds x3, x3, x12
adcs x4, x4, xzr
and x6, x6, #0x7fffffffffffffff
adcs x5, x5, xzr
adc x6, x6, xzr
stp x3, x4, [x0]
stp x5, x6, [x0, #16]
ret
#ifndef __APPLE__
.size fe_add,.-fe_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_neg
.type fe_neg,@function
.align 2
fe_neg:
#else
.section __TEXT,__text
.globl _fe_neg
.p2align 2
_fe_neg:
#endif /* __APPLE__ */
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
mov x6, #-19
mov x7, #-1
mov x8, #-1
mov x9, #0x7fffffffffffffff
subs x6, x6, x2
sbcs x7, x7, x3
sbcs x8, x8, x4
sbc x9, x9, x5
stp x6, x7, [x0]
stp x8, x9, [x0, #16]
ret
#ifndef __APPLE__
.size fe_neg,.-fe_neg
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_isnonzero
.type fe_isnonzero,@function
.align 2
fe_isnonzero:
#else
.section __TEXT,__text
.globl _fe_isnonzero
.p2align 2
_fe_isnonzero:
#endif /* __APPLE__ */
mov x6, #19
ldp x1, x2, [x0]
ldp x3, x4, [x0, #16]
adds x5, x1, x6
adcs x5, x2, xzr
adcs x5, x3, xzr
adc x5, x4, xzr
and x5, x6, x5, asr 63
adds x1, x1, x5
adcs x2, x2, xzr
adcs x3, x3, xzr
adc x4, x4, xzr
and x4, x4, #0x7fffffffffffffff
orr x0, x1, x2
orr x3, x3, x4
orr x0, x0, x3
ret
#ifndef __APPLE__
.size fe_isnonzero,.-fe_isnonzero
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_isnegative
.type fe_isnegative,@function
.align 2
fe_isnegative:
#else
.section __TEXT,__text
.globl _fe_isnegative
.p2align 2
_fe_isnegative:
#endif /* __APPLE__ */
mov x6, #19
ldp x1, x2, [x0]
ldp x3, x4, [x0, #16]
adds x5, x1, x6
adcs x5, x2, xzr
adcs x5, x3, xzr
adc x5, x4, xzr
and x0, x1, #1
eor x0, x0, x5, lsr 63
ret
#ifndef __APPLE__
.size fe_isnegative,.-fe_isnegative
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_cmov_table
.type fe_cmov_table,@function
.align 2
fe_cmov_table:
#else
.section __TEXT,__text
.globl _fe_cmov_table
.p2align 2
_fe_cmov_table:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-128]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
stp x26, x27, [x29, #104]
str x28, [x29, #120]
str x0, [x29, #16]
sxtb x2, w2
sbfx x3, x2, #7, #1
eor x0, x2, x3
sub x0, x0, x3
mov x4, #1
mov x5, xzr
mov x6, xzr
mov x7, xzr
mov x8, #1
mov x9, xzr
mov x10, xzr
mov x11, xzr
mov x12, xzr
mov x13, xzr
mov x14, xzr
mov x15, xzr
cmp x0, #1
ldp x16, x17, [x1]
ldp x19, x20, [x1, #16]
ldp x21, x22, [x1, #32]
ldp x23, x24, [x1, #48]
ldp x25, x26, [x1, #64]
ldp x27, x28, [x1, #80]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #2
ldp x16, x17, [x1, #96]
ldp x19, x20, [x1, #112]
ldp x21, x22, [x1, #128]
ldp x23, x24, [x1, #144]
ldp x25, x26, [x1, #160]
ldp x27, x28, [x1, #176]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #3
ldp x16, x17, [x1, #192]
ldp x19, x20, [x1, #208]
ldp x21, x22, [x1, #224]
ldp x23, x24, [x1, #240]
ldp x25, x26, [x1, #256]
ldp x27, x28, [x1, #272]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #4
ldp x16, x17, [x1, #288]
ldp x19, x20, [x1, #304]
ldp x21, x22, [x1, #320]
ldp x23, x24, [x1, #336]
ldp x25, x26, [x1, #352]
ldp x27, x28, [x1, #368]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
add x1, x1, #0x180
cmp x0, #5
ldp x16, x17, [x1]
ldp x19, x20, [x1, #16]
ldp x21, x22, [x1, #32]
ldp x23, x24, [x1, #48]
ldp x25, x26, [x1, #64]
ldp x27, x28, [x1, #80]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #6
ldp x16, x17, [x1, #96]
ldp x19, x20, [x1, #112]
ldp x21, x22, [x1, #128]
ldp x23, x24, [x1, #144]
ldp x25, x26, [x1, #160]
ldp x27, x28, [x1, #176]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #7
ldp x16, x17, [x1, #192]
ldp x19, x20, [x1, #208]
ldp x21, x22, [x1, #224]
ldp x23, x24, [x1, #240]
ldp x25, x26, [x1, #256]
ldp x27, x28, [x1, #272]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
cmp x0, #8
ldp x16, x17, [x1, #288]
ldp x19, x20, [x1, #304]
ldp x21, x22, [x1, #320]
ldp x23, x24, [x1, #336]
ldp x25, x26, [x1, #352]
ldp x27, x28, [x1, #368]
csel x4, x16, x4, eq
csel x5, x17, x5, eq
csel x6, x19, x6, eq
csel x7, x20, x7, eq
csel x8, x21, x8, eq
csel x9, x22, x9, eq
csel x10, x23, x10, eq
csel x11, x24, x11, eq
csel x12, x25, x12, eq
csel x13, x26, x13, eq
csel x14, x27, x14, eq
csel x15, x28, x15, eq
mov x16, #-19
mov x17, #-1
mov x19, #-1
mov x20, #0x7fffffffffffffff
subs x16, x16, x12
sbcs x17, x17, x13
sbcs x19, x19, x14
sbc x20, x20, x15
cmp x2, #0
mov x3, x4
csel x4, x8, x4, lt
csel x8, x3, x8, lt
mov x3, x5
csel x5, x9, x5, lt
csel x9, x3, x9, lt
mov x3, x6
csel x6, x10, x6, lt
csel x10, x3, x10, lt
mov x3, x7
csel x7, x11, x7, lt
csel x11, x3, x11, lt
csel x12, x16, x12, lt
csel x13, x17, x13, lt
csel x14, x19, x14, lt
csel x15, x20, x15, lt
ldr x0, [x29, #16]
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
stp x8, x9, [x0, #32]
stp x10, x11, [x0, #48]
stp x12, x13, [x0, #64]
stp x14, x15, [x0, #80]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
ldp x26, x27, [x29, #104]
ldr x28, [x29, #120]
ldp x29, x30, [sp], #0x80
ret
#ifndef __APPLE__
.size fe_cmov_table,.-fe_cmov_table
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifndef __APPLE__
.text
.globl fe_mul
.type fe_mul,@function
.align 2
fe_mul:
#else
.section __TEXT,__text
.globl _fe_mul
.p2align 2
_fe_mul:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-64]!
add x29, sp, #0
str x17, [x29, #24]
str x19, [x29, #32]
stp x20, x21, [x29, #40]
str x22, [x29, #56]
# Multiply
ldp x14, x15, [x1]
ldp x16, x17, [x1, #16]
ldp x19, x20, [x2]
ldp x21, x22, [x2, #16]
# A[0] * B[0]
umulh x7, x14, x19
mul x6, x14, x19
# A[2] * B[0]
umulh x9, x16, x19
mul x8, x16, x19
# A[1] * B[0]
mul x3, x15, x19
adds x7, x7, x3
umulh x4, x15, x19
adcs x8, x8, x4
# A[1] * B[3]
umulh x11, x15, x22
adc x9, x9, xzr
mul x10, x15, x22
# A[0] * B[1]
mul x3, x14, x20
adds x7, x7, x3
umulh x4, x14, x20
adcs x8, x8, x4
# A[2] * B[1]
mul x3, x16, x20
adcs x9, x9, x3
umulh x4, x16, x20
adcs x10, x10, x4
adc x11, x11, xzr
# A[1] * B[2]
mul x3, x15, x21
adds x9, x9, x3
umulh x4, x15, x21
adcs x10, x10, x4
adcs x11, x11, xzr
adc x12, xzr, xzr
# A[0] * B[2]
mul x3, x14, x21
adds x8, x8, x3
umulh x4, x14, x21
adcs x9, x9, x4
adcs x10, x10, xzr
adcs x11, x11, xzr
adc x12, x12, xzr
# A[1] * B[1]
mul x3, x15, x20
adds x8, x8, x3
umulh x4, x15, x20
adcs x9, x9, x4
# A[3] * B[1]
mul x3, x17, x20
adcs x10, x10, x3
umulh x4, x17, x20
adcs x11, x11, x4
adc x12, x12, xzr
# A[2] * B[2]
mul x3, x16, x21
adds x10, x10, x3
umulh x4, x16, x21
adcs x11, x11, x4
# A[3] * B[3]
mul x3, x17, x22
adcs x12, x12, x3
umulh x13, x17, x22
adc x13, x13, xzr
# A[0] * B[3]
mul x3, x14, x22
adds x9, x9, x3
umulh x4, x14, x22
adcs x10, x10, x4
# A[2] * B[3]
mul x3, x16, x22
adcs x11, x11, x3
umulh x4, x16, x22
adcs x12, x12, x4
adc x13, x13, xzr
# A[3] * B[0]
mul x3, x17, x19
adds x9, x9, x3
umulh x4, x17, x19
adcs x10, x10, x4
# A[3] * B[2]
mul x3, x17, x21
adcs x11, x11, x3
umulh x4, x17, x21
adcs x12, x12, x4
adc x13, x13, xzr
# Reduce
mov x3, #38
mul x4, x3, x13
adds x9, x9, x4
umulh x5, x3, x13
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x9, #63
mul x5, x5, x3
and x9, x9, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x10
adds x6, x6, x4
umulh x10, x3, x10
mul x4, x3, x11
adcs x7, x7, x4
umulh x11, x3, x11
mul x4, x3, x12
adcs x8, x8, x4
umulh x12, x3, x12
adc x9, x9, xzr
# Add high product results in
adds x6, x6, x5
adcs x7, x7, x10
adcs x8, x8, x11
adc x9, x9, x12
# Reduce if top bit set
mov x3, #19
and x4, x3, x9, asr 63
adds x6, x6, x4
adcs x7, x7, xzr
and x9, x9, #0x7fffffffffffffff
adcs x8, x8, xzr
adc x9, x9, xzr
# Store
stp x6, x7, [x0]
stp x8, x9, [x0, #16]
ldr x17, [x29, #24]
ldr x19, [x29, #32]
ldp x20, x21, [x29, #40]
ldr x22, [x29, #56]
ldp x29, x30, [sp], #0x40
ret
#ifndef __APPLE__
.size fe_mul,.-fe_mul
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq
.type fe_sq,@function
.align 2
fe_sq:
#else
.section __TEXT,__text
.globl _fe_sq
.p2align 2
_fe_sq:
#endif /* __APPLE__ */
# Square
ldp x13, x14, [x1]
ldp x15, x16, [x1, #16]
# A[0] * A[1]
umulh x7, x13, x14
mul x6, x13, x14
# A[0] * A[3]
umulh x9, x13, x16
mul x8, x13, x16
# A[0] * A[2]
mul x2, x13, x15
adds x7, x7, x2
umulh x3, x13, x15
adcs x8, x8, x3
# A[1] * A[3]
mul x2, x14, x16
adcs x9, x9, x2
umulh x10, x14, x16
adc x10, x10, xzr
# A[1] * A[2]
mul x2, x14, x15
adds x8, x8, x2
umulh x3, x14, x15
adcs x9, x9, x3
# A[2] * A[3]
mul x2, x15, x16
adcs x10, x10, x2
umulh x11, x15, x16
adc x11, x11, xzr
# Double
adds x6, x6, x6
adcs x7, x7, x7
adcs x8, x8, x8
adcs x9, x9, x9
adcs x10, x10, x10
adcs x11, x11, x11
adc x12, xzr, xzr
# A[0] * A[0]
umulh x3, x13, x13
mul x5, x13, x13
# A[1] * A[1]
mul x2, x14, x14
adds x6, x6, x3
umulh x3, x14, x14
adcs x7, x7, x2
# A[2] * A[2]
mul x2, x15, x15
adcs x8, x8, x3
umulh x3, x15, x15
adcs x9, x9, x2
# A[3] * A[3]
mul x2, x16, x16
adcs x10, x10, x3
umulh x3, x16, x16
adcs x11, x11, x2
adc x12, x12, x3
# Reduce
mov x2, #38
mul x3, x2, x12
adds x8, x8, x3
umulh x4, x2, x12
adc x4, x4, xzr
mov x2, #19
extr x4, x4, x8, #63
mul x4, x4, x2
and x8, x8, #0x7fffffffffffffff
mov x2, #38
mul x3, x2, x9
adds x5, x5, x3
umulh x9, x2, x9
mul x3, x2, x10
adcs x6, x6, x3
umulh x10, x2, x10
mul x3, x2, x11
adcs x7, x7, x3
umulh x11, x2, x11
adc x8, x8, xzr
# Add high product results in
adds x5, x5, x4
adcs x6, x6, x9
adcs x7, x7, x10
adc x8, x8, x11
# Reduce if top bit set
mov x2, #19
and x3, x2, x8, asr 63
adds x5, x5, x3
adcs x6, x6, xzr
and x8, x8, #0x7fffffffffffffff
adcs x7, x7, xzr
adc x8, x8, xzr
# Store
stp x5, x6, [x0]
stp x7, x8, [x0, #16]
ret
#ifndef __APPLE__
.size fe_sq,.-fe_sq
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_invert
.type fe_invert,@function
.align 2
fe_invert:
#else
.section __TEXT,__text
.globl _fe_invert
.p2align 2
_fe_invert:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-176]!
add x29, sp, #0
str x17, [x29, #160]
str x20, [x29, #168]
# Invert
str x0, [x29, #144]
str x1, [x29, #152]
add x0, x29, #16
#ifndef NDEBUG
ldr x1, [x29, #152]
#endif /* !NDEBUG */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
add x0, x29, #48
add x1, x29, #16
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
add x1, x29, #48
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
ldr x1, [x29, #152]
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
add x0, x29, #16
add x1, x29, #16
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
add x0, x29, #0x50
#ifndef NDEBUG
add x1, x29, #16
#endif /* !NDEBUG */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
add x0, x29, #48
add x1, x29, #48
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 5 times
mov x20, #5
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_invert1:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert1
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x20, #10
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_invert2:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert2
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
add x0, x29, #0x50
#ifndef NDEBUG
add x1, x29, #0x50
#endif /* !NDEBUG */
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 20 times
mov x20, #20
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_fe_invert3:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert3
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
#ifndef NDEBUG
add x0, x29, #0x50
#endif /* !NDEBUG */
add x1, x29, #0x70
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x20, #10
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_fe_invert4:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert4
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
add x0, x29, #48
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x20, #50
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_invert5:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert5
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
add x0, x29, #0x50
#ifndef NDEBUG
add x1, x29, #0x50
#endif /* !NDEBUG */
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 100 times
mov x20, #0x64
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_fe_invert6:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert6
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
#ifndef NDEBUG
add x0, x29, #0x50
#endif /* !NDEBUG */
add x1, x29, #0x70
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x20, #50
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_fe_invert7:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert7
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
add x0, x29, #48
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 5 times
mov x20, #5
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_invert8:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x20, x20, #1
bne L_fe_invert8
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
ldr x0, [x29, #144]
add x1, x29, #48
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
ldr x17, [x29, #160]
ldr x20, [x29, #168]
ldp x29, x30, [sp], #0xb0
ret
#ifndef __APPLE__
.size fe_invert,.-fe_invert
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl curve25519
.type curve25519,@function
.align 2
curve25519:
#else
.section __TEXT,__text
.globl _curve25519
.p2align 2
_curve25519:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-288]!
add x29, sp, #0
str x17, [x29, #200]
str x19, [x29, #208]
stp x20, x21, [x29, #216]
stp x22, x23, [x29, #232]
stp x24, x25, [x29, #248]
stp x26, x27, [x29, #264]
str x28, [x29, #280]
mov x23, xzr
str x0, [x29, #176]
str x2, [x29, #184]
ldp x6, x7, [x2]
ldp x8, x9, [x2, #16]
mov x10, #1
mov x11, xzr
mov x12, xzr
mov x13, xzr
stp x10, x11, [x0]
stp x12, x13, [x0, #16]
# Set zero
stp xzr, xzr, [x29, #16]
stp xzr, xzr, [x29, #32]
mov x24, #0xfe
L_curve25519_bits:
lsr x3, x24, #6
and x4, x24, #63
ldr x5, [x1, x3, LSL 3]
lsr x5, x5, x4
eor x23, x23, x5
# Conditional Swap
subs xzr, xzr, x23, lsl 63
ldp x25, x26, [x29, #16]
ldp x27, x28, [x29, #32]
csel x19, x25, x10, ne
csel x25, x10, x25, ne
csel x20, x26, x11, ne
csel x26, x11, x26, ne
csel x21, x27, x12, ne
csel x27, x12, x27, ne
csel x22, x28, x13, ne
csel x28, x13, x28, ne
# Conditional Swap
subs xzr, xzr, x23, lsl 63
ldp x10, x11, [x0]
ldp x12, x13, [x0, #16]
csel x14, x10, x6, ne
csel x10, x6, x10, ne
csel x15, x11, x7, ne
csel x11, x7, x11, ne
csel x16, x12, x8, ne
csel x12, x8, x12, ne
csel x17, x13, x9, ne
csel x13, x9, x13, ne
mov x23, x5
# Add
adds x6, x10, x25
adcs x7, x11, x26
adcs x8, x12, x27
adcs x9, x13, x28
cset x5, cs
mov x3, #19
extr x5, x5, x9, #63
mul x3, x5, x3
# Sub modulus (if overflow)
adds x6, x6, x3
adcs x7, x7, xzr
and x9, x9, #0x7fffffffffffffff
adcs x8, x8, xzr
adc x9, x9, xzr
# Sub
subs x25, x10, x25
sbcs x26, x11, x26
sbcs x27, x12, x27
sbcs x28, x13, x28
csetm x5, cc
mov x3, #-19
extr x5, x5, x28, #63
mul x3, x5, x3
# Add modulus (if underflow)
subs x25, x25, x3
sbcs x26, x26, xzr
and x28, x28, #0x7fffffffffffffff
sbcs x27, x27, xzr
sbc x28, x28, xzr
stp x25, x26, [x29, #80]
stp x27, x28, [x29, #96]
# Add
adds x10, x14, x19
adcs x11, x15, x20
adcs x12, x16, x21
adcs x13, x17, x22
cset x5, cs
mov x3, #19
extr x5, x5, x13, #63
mul x3, x5, x3
# Sub modulus (if overflow)
adds x10, x10, x3
adcs x11, x11, xzr
and x13, x13, #0x7fffffffffffffff
adcs x12, x12, xzr
adc x13, x13, xzr
# Sub
subs x14, x14, x19
sbcs x15, x15, x20
sbcs x16, x16, x21
sbcs x17, x17, x22
csetm x5, cc
mov x3, #-19
extr x5, x5, x17, #63
mul x3, x5, x3
# Add modulus (if underflow)
subs x14, x14, x3
sbcs x15, x15, xzr
and x17, x17, #0x7fffffffffffffff
sbcs x16, x16, xzr
sbc x17, x17, xzr
# Multiply
# A[0] * B[0]
umulh x20, x14, x6
mul x19, x14, x6
# A[2] * B[0]
umulh x22, x16, x6
mul x21, x16, x6
# A[1] * B[0]
mul x3, x15, x6
adds x20, x20, x3
umulh x4, x15, x6
adcs x21, x21, x4
# A[1] * B[3]
umulh x26, x15, x9
adc x22, x22, xzr
mul x25, x15, x9
# A[0] * B[1]
mul x3, x14, x7
adds x20, x20, x3
umulh x4, x14, x7
adcs x21, x21, x4
# A[2] * B[1]
mul x3, x16, x7
adcs x22, x22, x3
umulh x4, x16, x7
adcs x25, x25, x4
adc x26, x26, xzr
# A[1] * B[2]
mul x3, x15, x8
adds x22, x22, x3
umulh x4, x15, x8
adcs x25, x25, x4
adcs x26, x26, xzr
adc x27, xzr, xzr
# A[0] * B[2]
mul x3, x14, x8
adds x21, x21, x3
umulh x4, x14, x8
adcs x22, x22, x4
adcs x25, x25, xzr
adcs x26, x26, xzr
adc x27, x27, xzr
# A[1] * B[1]
mul x3, x15, x7
adds x21, x21, x3
umulh x4, x15, x7
adcs x22, x22, x4
# A[3] * B[1]
mul x3, x17, x7
adcs x25, x25, x3
umulh x4, x17, x7
adcs x26, x26, x4
adc x27, x27, xzr
# A[2] * B[2]
mul x3, x16, x8
adds x25, x25, x3
umulh x4, x16, x8
adcs x26, x26, x4
# A[3] * B[3]
mul x3, x17, x9
adcs x27, x27, x3
umulh x28, x17, x9
adc x28, x28, xzr
# A[0] * B[3]
mul x3, x14, x9
adds x22, x22, x3
umulh x4, x14, x9
adcs x25, x25, x4
# A[2] * B[3]
mul x3, x16, x9
adcs x26, x26, x3
umulh x4, x16, x9
adcs x27, x27, x4
adc x28, x28, xzr
# A[3] * B[0]
mul x3, x17, x6
adds x22, x22, x3
umulh x4, x17, x6
adcs x25, x25, x4
# A[3] * B[2]
mul x3, x17, x8
adcs x26, x26, x3
umulh x4, x17, x8
adcs x27, x27, x4
adc x28, x28, xzr
# Reduce
mov x3, #38
mul x4, x3, x28
adds x22, x22, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x22, #63
mul x5, x5, x3
and x22, x22, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x19, x19, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x20, x20, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x21, x21, x4
umulh x27, x3, x27
adc x22, x22, xzr
# Add high product results in
adds x19, x19, x5
adcs x20, x20, x25
adcs x21, x21, x26
adc x22, x22, x27
# Store
stp x19, x20, [x29, #48]
stp x21, x22, [x29, #64]
# Multiply
ldp x25, x26, [x29, #80]
ldp x27, x28, [x29, #96]
# A[0] * B[0]
umulh x20, x10, x25
mul x19, x10, x25
# A[2] * B[0]
umulh x22, x12, x25
mul x21, x12, x25
# A[1] * B[0]
mul x3, x11, x25
adds x20, x20, x3
umulh x4, x11, x25
adcs x21, x21, x4
# A[1] * B[3]
umulh x15, x11, x28
adc x22, x22, xzr
mul x14, x11, x28
# A[0] * B[1]
mul x3, x10, x26
adds x20, x20, x3
umulh x4, x10, x26
adcs x21, x21, x4
# A[2] * B[1]
mul x3, x12, x26
adcs x22, x22, x3
umulh x4, x12, x26
adcs x14, x14, x4
adc x15, x15, xzr
# A[1] * B[2]
mul x3, x11, x27
adds x22, x22, x3
umulh x4, x11, x27
adcs x14, x14, x4
adcs x15, x15, xzr
adc x16, xzr, xzr
# A[0] * B[2]
mul x3, x10, x27
adds x21, x21, x3
umulh x4, x10, x27
adcs x22, x22, x4
adcs x14, x14, xzr
adcs x15, x15, xzr
adc x16, x16, xzr
# A[1] * B[1]
mul x3, x11, x26
adds x21, x21, x3
umulh x4, x11, x26
adcs x22, x22, x4
# A[3] * B[1]
mul x3, x13, x26
adcs x14, x14, x3
umulh x4, x13, x26
adcs x15, x15, x4
adc x16, x16, xzr
# A[2] * B[2]
mul x3, x12, x27
adds x14, x14, x3
umulh x4, x12, x27
adcs x15, x15, x4
# A[3] * B[3]
mul x3, x13, x28
adcs x16, x16, x3
umulh x17, x13, x28
adc x17, x17, xzr
# A[0] * B[3]
mul x3, x10, x28
adds x22, x22, x3
umulh x4, x10, x28
adcs x14, x14, x4
# A[2] * B[3]
mul x3, x12, x28
adcs x15, x15, x3
umulh x4, x12, x28
adcs x16, x16, x4
adc x17, x17, xzr
# A[3] * B[0]
mul x3, x13, x25
adds x22, x22, x3
umulh x4, x13, x25
adcs x14, x14, x4
# A[3] * B[2]
mul x3, x13, x27
adcs x15, x15, x3
umulh x4, x13, x27
adcs x16, x16, x4
adc x17, x17, xzr
# Reduce
mov x3, #38
mul x4, x3, x17
adds x22, x22, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x22, #63
mul x5, x5, x3
and x22, x22, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x19, x19, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x20, x20, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x21, x21, x4
umulh x16, x3, x16
adc x22, x22, xzr
# Add high product results in
adds x19, x19, x5
adcs x20, x20, x14
adcs x21, x21, x15
adc x22, x22, x16
# Square
# A[0] * A[1]
umulh x12, x25, x26
mul x11, x25, x26
# A[0] * A[3]
umulh x14, x25, x28
mul x13, x25, x28
# A[0] * A[2]
mul x3, x25, x27
adds x12, x12, x3
umulh x4, x25, x27
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x26, x28
adcs x14, x14, x3
umulh x15, x26, x28
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x26, x27
adds x13, x13, x3
umulh x4, x26, x27
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x27, x28
adcs x15, x15, x3
umulh x16, x27, x28
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x25, x25
mul x10, x25, x25
# A[1] * A[1]
mul x3, x26, x26
adds x11, x11, x4
umulh x4, x26, x26
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x27, x27
adcs x13, x13, x4
umulh x4, x27, x27
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x28, x28
adcs x15, x15, x4
umulh x4, x28, x28
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x10, x10, x5
adcs x11, x11, x14
adcs x12, x12, x15
adc x13, x13, x16
# Square
# A[0] * A[1]
umulh x16, x6, x7
mul x15, x6, x7
# A[0] * A[3]
umulh x25, x6, x9
mul x17, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x16, x16, x3
umulh x4, x6, x8
adcs x17, x17, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x25, x25, x3
umulh x26, x7, x9
adc x26, x26, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x17, x17, x3
umulh x4, x7, x8
adcs x25, x25, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x26, x26, x3
umulh x27, x8, x9
adc x27, x27, xzr
# Double
adds x15, x15, x15
adcs x16, x16, x16
adcs x17, x17, x17
adcs x25, x25, x25
adcs x26, x26, x26
adcs x27, x27, x27
adc x28, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x14, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x15, x15, x4
umulh x4, x7, x7
adcs x16, x16, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x17, x17, x4
umulh x4, x8, x8
adcs x25, x25, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x26, x26, x4
umulh x4, x9, x9
adcs x27, x27, x3
adc x28, x28, x4
# Reduce
mov x3, #38
mul x4, x3, x28
adds x17, x17, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x14, x14, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x15, x15, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x16, x16, x4
umulh x27, x3, x27
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x25
adcs x16, x16, x26
adc x17, x17, x27
# Multiply
# A[0] * B[0]
umulh x7, x14, x10
mul x6, x14, x10
# A[2] * B[0]
umulh x9, x16, x10
mul x8, x16, x10
# A[1] * B[0]
mul x3, x15, x10
adds x7, x7, x3
umulh x4, x15, x10
adcs x8, x8, x4
# A[1] * B[3]
umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13
# A[0] * B[1]
mul x3, x14, x11
adds x7, x7, x3
umulh x4, x14, x11
adcs x8, x8, x4
# A[2] * B[1]
mul x3, x16, x11
adcs x9, x9, x3
umulh x4, x16, x11
adcs x25, x25, x4
adc x26, x26, xzr
# A[1] * B[2]
mul x3, x15, x12
adds x9, x9, x3
umulh x4, x15, x12
adcs x25, x25, x4
adcs x26, x26, xzr
adc x27, xzr, xzr
# A[0] * B[2]
mul x3, x14, x12
adds x8, x8, x3
umulh x4, x14, x12
adcs x9, x9, x4
adcs x25, x25, xzr
adcs x26, x26, xzr
adc x27, x27, xzr
# A[1] * B[1]
mul x3, x15, x11
adds x8, x8, x3
umulh x4, x15, x11
adcs x9, x9, x4
# A[3] * B[1]
mul x3, x17, x11
adcs x25, x25, x3
umulh x4, x17, x11
adcs x26, x26, x4
adc x27, x27, xzr
# A[2] * B[2]
mul x3, x16, x12
adds x25, x25, x3
umulh x4, x16, x12
adcs x26, x26, x4
# A[3] * B[3]
mul x3, x17, x13
adcs x27, x27, x3
umulh x28, x17, x13
adc x28, x28, xzr
# A[0] * B[3]
mul x3, x14, x13
adds x9, x9, x3
umulh x4, x14, x13
adcs x25, x25, x4
# A[2] * B[3]
mul x3, x16, x13
adcs x26, x26, x3
umulh x4, x16, x13
adcs x27, x27, x4
adc x28, x28, xzr
# A[3] * B[0]
mul x3, x17, x10
adds x9, x9, x3
umulh x4, x17, x10
adcs x25, x25, x4
# A[3] * B[2]
mul x3, x17, x12
adcs x26, x26, x3
umulh x4, x17, x12
adcs x27, x27, x4
adc x28, x28, xzr
# Reduce
mov x3, #38
mul x4, x3, x28
adds x9, x9, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x9, #63
mul x5, x5, x3
and x9, x9, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x6, x6, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x7, x7, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x8, x8, x4
umulh x27, x3, x27
adc x9, x9, xzr
# Add high product results in
adds x6, x6, x5
adcs x7, x7, x25
adcs x8, x8, x26
adc x9, x9, x27
# Store
stp x6, x7, [x0]
stp x8, x9, [x0, #16]
# Sub
subs x14, x14, x10
sbcs x15, x15, x11
sbcs x16, x16, x12
sbcs x17, x17, x13
csetm x5, cc
mov x3, #-19
# Mask the modulus
extr x5, x5, x17, #63
mul x3, x5, x3
# Add modulus (if underflow)
subs x14, x14, x3
sbcs x15, x15, xzr
and x17, x17, #0x7fffffffffffffff
sbcs x16, x16, xzr
sbc x17, x17, xzr
# Multiply by 121666
mov x5, #0xdb42
movk x5, #1, lsl 16
mul x6, x14, x5
umulh x7, x14, x5
mul x3, x15, x5
umulh x8, x15, x5
adds x7, x7, x3
adc x8, x8, xzr
mul x3, x16, x5
umulh x9, x16, x5
adds x8, x8, x3
adc x9, x9, xzr
mul x3, x17, x5
umulh x4, x17, x5
adds x9, x9, x3
adc x4, x4, xzr
mov x5, #19
extr x4, x4, x9, #63
mul x4, x4, x5
adds x6, x6, x4
adcs x7, x7, xzr
and x9, x9, #0x7fffffffffffffff
adcs x8, x8, xzr
adc x9, x9, xzr
# Add
adds x10, x10, x6
adcs x11, x11, x7
adcs x12, x12, x8
adcs x13, x13, x9
cset x5, cs
mov x3, #19
# Mask the modulus
extr x5, x5, x13, #63
mul x3, x5, x3
# Sub modulus (if overflow)
adds x10, x10, x3
adcs x11, x11, xzr
and x13, x13, #0x7fffffffffffffff
adcs x12, x12, xzr
adc x13, x13, xzr
# Multiply
# A[0] * B[0]
umulh x7, x14, x10
mul x6, x14, x10
# A[2] * B[0]
umulh x9, x16, x10
mul x8, x16, x10
# A[1] * B[0]
mul x3, x15, x10
adds x7, x7, x3
umulh x4, x15, x10
adcs x8, x8, x4
# A[1] * B[3]
umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13
# A[0] * B[1]
mul x3, x14, x11
adds x7, x7, x3
umulh x4, x14, x11
adcs x8, x8, x4
# A[2] * B[1]
mul x3, x16, x11
adcs x9, x9, x3
umulh x4, x16, x11
adcs x25, x25, x4
adc x26, x26, xzr
# A[1] * B[2]
mul x3, x15, x12
adds x9, x9, x3
umulh x4, x15, x12
adcs x25, x25, x4
adcs x26, x26, xzr
adc x27, xzr, xzr
# A[0] * B[2]
mul x3, x14, x12
adds x8, x8, x3
umulh x4, x14, x12
adcs x9, x9, x4
adcs x25, x25, xzr
adcs x26, x26, xzr
adc x27, x27, xzr
# A[1] * B[1]
mul x3, x15, x11
adds x8, x8, x3
umulh x4, x15, x11
adcs x9, x9, x4
# A[3] * B[1]
mul x3, x17, x11
adcs x25, x25, x3
umulh x4, x17, x11
adcs x26, x26, x4
adc x27, x27, xzr
# A[2] * B[2]
mul x3, x16, x12
adds x25, x25, x3
umulh x4, x16, x12
adcs x26, x26, x4
# A[3] * B[3]
mul x3, x17, x13
adcs x27, x27, x3
umulh x28, x17, x13
adc x28, x28, xzr
# A[0] * B[3]
mul x3, x14, x13
adds x9, x9, x3
umulh x4, x14, x13
adcs x25, x25, x4
# A[2] * B[3]
mul x3, x16, x13
adcs x26, x26, x3
umulh x4, x16, x13
adcs x27, x27, x4
adc x28, x28, xzr
# A[3] * B[0]
mul x3, x17, x10
adds x9, x9, x3
umulh x4, x17, x10
adcs x25, x25, x4
# A[3] * B[2]
mul x3, x17, x12
adcs x26, x26, x3
umulh x4, x17, x12
adcs x27, x27, x4
adc x28, x28, xzr
# Reduce
mov x3, #38
mul x4, x3, x28
adds x9, x9, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x9, #63
mul x5, x5, x3
and x9, x9, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x6, x6, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x7, x7, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x8, x8, x4
umulh x27, x3, x27
adc x9, x9, xzr
# Add high product results in
adds x6, x6, x5
adcs x7, x7, x25
adcs x8, x8, x26
adc x9, x9, x27
# Store
stp x6, x7, [x29, #16]
stp x8, x9, [x29, #32]
# Add
ldp x25, x26, [x29, #48]
ldp x27, x28, [x29, #64]
adds x10, x25, x19
adcs x11, x26, x20
adcs x12, x27, x21
adcs x13, x28, x22
cset x5, cs
mov x3, #19
extr x5, x5, x13, #63
mul x3, x5, x3
# Sub modulus (if overflow)
adds x10, x10, x3
adcs x11, x11, xzr
and x13, x13, #0x7fffffffffffffff
adcs x12, x12, xzr
adc x13, x13, xzr
# Sub
subs x19, x25, x19
sbcs x20, x26, x20
sbcs x21, x27, x21
sbcs x22, x28, x22
csetm x5, cc
mov x3, #-19
extr x5, x5, x22, #63
mul x3, x5, x3
# Add modulus (if underflow)
subs x19, x19, x3
sbcs x20, x20, xzr
and x22, x22, #0x7fffffffffffffff
sbcs x21, x21, xzr
sbc x22, x22, xzr
# Square
# A[0] * A[1]
umulh x8, x10, x11
mul x7, x10, x11
# A[0] * A[3]
umulh x25, x10, x13
mul x9, x10, x13
# A[0] * A[2]
mul x3, x10, x12
adds x8, x8, x3
umulh x4, x10, x12
adcs x9, x9, x4
# A[1] * A[3]
mul x3, x11, x13
adcs x25, x25, x3
umulh x26, x11, x13
adc x26, x26, xzr
# A[1] * A[2]
mul x3, x11, x12
adds x9, x9, x3
umulh x4, x11, x12
adcs x25, x25, x4
# A[2] * A[3]
mul x3, x12, x13
adcs x26, x26, x3
umulh x27, x12, x13
adc x27, x27, xzr
# Double
adds x7, x7, x7
adcs x8, x8, x8
adcs x9, x9, x9
adcs x25, x25, x25
adcs x26, x26, x26
adcs x27, x27, x27
adc x28, xzr, xzr
# A[0] * A[0]
umulh x4, x10, x10
mul x6, x10, x10
# A[1] * A[1]
mul x3, x11, x11
adds x7, x7, x4
umulh x4, x11, x11
adcs x8, x8, x3
# A[2] * A[2]
mul x3, x12, x12
adcs x9, x9, x4
umulh x4, x12, x12
adcs x25, x25, x3
# A[3] * A[3]
mul x3, x13, x13
adcs x26, x26, x4
umulh x4, x13, x13
adcs x27, x27, x3
adc x28, x28, x4
# Reduce
mov x3, #38
mul x4, x3, x28
adds x9, x9, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x9, #63
mul x5, x5, x3
and x9, x9, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x6, x6, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x7, x7, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x8, x8, x4
umulh x27, x3, x27
adc x9, x9, xzr
# Add high product results in
adds x6, x6, x5
adcs x7, x7, x25
adcs x8, x8, x26
adc x9, x9, x27
# Square
# A[0] * A[1]
umulh x16, x19, x20
mul x15, x19, x20
# A[0] * A[3]
umulh x25, x19, x22
mul x17, x19, x22
# A[0] * A[2]
mul x3, x19, x21
adds x16, x16, x3
umulh x4, x19, x21
adcs x17, x17, x4
# A[1] * A[3]
mul x3, x20, x22
adcs x25, x25, x3
umulh x26, x20, x22
adc x26, x26, xzr
# A[1] * A[2]
mul x3, x20, x21
adds x17, x17, x3
umulh x4, x20, x21
adcs x25, x25, x4
# A[2] * A[3]
mul x3, x21, x22
adcs x26, x26, x3
umulh x27, x21, x22
adc x27, x27, xzr
# Double
adds x15, x15, x15
adcs x16, x16, x16
adcs x17, x17, x17
adcs x25, x25, x25
adcs x26, x26, x26
adcs x27, x27, x27
adc x28, xzr, xzr
# A[0] * A[0]
umulh x4, x19, x19
mul x14, x19, x19
# A[1] * A[1]
mul x3, x20, x20
adds x15, x15, x4
umulh x4, x20, x20
adcs x16, x16, x3
# A[2] * A[2]
mul x3, x21, x21
adcs x17, x17, x4
umulh x4, x21, x21
adcs x25, x25, x3
# A[3] * A[3]
mul x3, x22, x22
adcs x26, x26, x4
umulh x4, x22, x22
adcs x27, x27, x3
adc x28, x28, x4
# Reduce
mov x3, #38
mul x4, x3, x28
adds x17, x17, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x14, x14, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x15, x15, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x16, x16, x4
umulh x27, x3, x27
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x25
adcs x16, x16, x26
adc x17, x17, x27
# Multiply
ldp x19, x20, [x2]
ldp x21, x22, [x2, #16]
# A[0] * B[0]
umulh x11, x19, x14
mul x10, x19, x14
# A[2] * B[0]
umulh x13, x21, x14
mul x12, x21, x14
# A[1] * B[0]
mul x3, x20, x14
adds x11, x11, x3
umulh x4, x20, x14
adcs x12, x12, x4
# A[1] * B[3]
umulh x26, x20, x17
adc x13, x13, xzr
mul x25, x20, x17
# A[0] * B[1]
mul x3, x19, x15
adds x11, x11, x3
umulh x4, x19, x15
adcs x12, x12, x4
# A[2] * B[1]
mul x3, x21, x15
adcs x13, x13, x3
umulh x4, x21, x15
adcs x25, x25, x4
adc x26, x26, xzr
# A[1] * B[2]
mul x3, x20, x16
adds x13, x13, x3
umulh x4, x20, x16
adcs x25, x25, x4
adcs x26, x26, xzr
adc x27, xzr, xzr
# A[0] * B[2]
mul x3, x19, x16
adds x12, x12, x3
umulh x4, x19, x16
adcs x13, x13, x4
adcs x25, x25, xzr
adcs x26, x26, xzr
adc x27, x27, xzr
# A[1] * B[1]
mul x3, x20, x15
adds x12, x12, x3
umulh x4, x20, x15
adcs x13, x13, x4
# A[3] * B[1]
mul x3, x22, x15
adcs x25, x25, x3
umulh x4, x22, x15
adcs x26, x26, x4
adc x27, x27, xzr
# A[2] * B[2]
mul x3, x21, x16
adds x25, x25, x3
umulh x4, x21, x16
adcs x26, x26, x4
# A[3] * B[3]
mul x3, x22, x17
adcs x27, x27, x3
umulh x28, x22, x17
adc x28, x28, xzr
# A[0] * B[3]
mul x3, x19, x17
adds x13, x13, x3
umulh x4, x19, x17
adcs x25, x25, x4
# A[2] * B[3]
mul x3, x21, x17
adcs x26, x26, x3
umulh x4, x21, x17
adcs x27, x27, x4
adc x28, x28, xzr
# A[3] * B[0]
mul x3, x22, x14
adds x13, x13, x3
umulh x4, x22, x14
adcs x25, x25, x4
# A[3] * B[2]
mul x3, x22, x16
adcs x26, x26, x3
umulh x4, x22, x16
adcs x27, x27, x4
adc x28, x28, xzr
# Reduce
mov x3, #38
mul x4, x3, x28
adds x13, x13, x4
umulh x5, x3, x28
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x25
adds x10, x10, x4
umulh x25, x3, x25
mul x4, x3, x26
adcs x11, x11, x4
umulh x26, x3, x26
mul x4, x3, x27
adcs x12, x12, x4
umulh x27, x3, x27
adc x13, x13, xzr
# Add high product results in
adds x10, x10, x5
adcs x11, x11, x25
adcs x12, x12, x26
adc x13, x13, x27
subs x24, x24, #1
bge L_curve25519_bits
# Invert
add x0, x29, #48
add x1, x29, #16
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
add x0, x29, #0x50
add x1, x29, #48
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #0x50
#endif /* !NDEBUG */
add x1, x29, #0x50
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #0x50
#endif /* !NDEBUG */
add x1, x29, #16
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
add x0, x29, #48
add x1, x29, #48
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
add x0, x29, #0x70
#ifndef NDEBUG
add x1, x29, #48
#endif /* !NDEBUG */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
add x0, x29, #0x50
add x1, x29, #0x50
add x2, x29, #0x70
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 5 times
mov x24, #5
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_curve25519_inv_1:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_1
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
#ifndef NDEBUG
add x0, x29, #0x50
#endif /* !NDEBUG */
add x1, x29, #0x70
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x24, #10
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_curve25519_inv_2:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_2
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
add x0, x29, #0x70
#ifndef NDEBUG
add x1, x29, #0x70
#endif /* !NDEBUG */
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 20 times
mov x24, #20
ldp x6, x7, [x29, #112]
ldp x8, x9, [x29, #128]
L_curve25519_inv_3:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_3
# Store
stp x6, x7, [x29, #144]
stp x8, x9, [x29, #160]
#ifndef NDEBUG
add x0, x29, #0x70
#endif /* !NDEBUG */
add x1, x29, #0x90
add x2, x29, #0x70
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x24, #10
ldp x6, x7, [x29, #112]
ldp x8, x9, [x29, #128]
L_curve25519_inv_4:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_4
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
add x0, x29, #0x50
add x1, x29, #0x70
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x24, #50
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_curve25519_inv_5:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_5
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
add x0, x29, #0x70
#ifndef NDEBUG
add x1, x29, #0x70
#endif /* !NDEBUG */
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 100 times
mov x24, #0x64
ldp x6, x7, [x29, #112]
ldp x8, x9, [x29, #128]
L_curve25519_inv_6:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_6
# Store
stp x6, x7, [x29, #144]
stp x8, x9, [x29, #160]
#ifndef NDEBUG
add x0, x29, #0x70
#endif /* !NDEBUG */
add x1, x29, #0x90
add x2, x29, #0x70
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x24, #50
ldp x6, x7, [x29, #112]
ldp x8, x9, [x29, #128]
L_curve25519_inv_7:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_7
# Store
stp x6, x7, [x29, #112]
stp x8, x9, [x29, #128]
add x0, x29, #0x50
add x1, x29, #0x70
add x2, x29, #0x50
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 5 times
mov x24, #5
ldp x6, x7, [x29, #80]
ldp x8, x9, [x29, #96]
L_curve25519_inv_8:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x24, x24, #1
bne L_curve25519_inv_8
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
add x0, x29, #16
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
ldr x0, [x29, #176]
# Multiply
ldp x6, x7, [x0]
ldp x8, x9, [x0, #16]
ldp x10, x11, [x29, #16]
ldp x12, x13, [x29, #32]
# A[0] * B[0]
umulh x15, x6, x10
mul x14, x6, x10
# A[2] * B[0]
umulh x17, x8, x10
mul x16, x8, x10
# A[1] * B[0]
mul x3, x7, x10
adds x15, x15, x3
umulh x4, x7, x10
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x7, x13
adc x17, x17, xzr
mul x19, x7, x13
# A[0] * B[1]
mul x3, x6, x11
adds x15, x15, x3
umulh x4, x6, x11
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x8, x11
adcs x17, x17, x3
umulh x4, x8, x11
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x7, x12
adds x17, x17, x3
umulh x4, x7, x12
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x6, x12
adds x16, x16, x3
umulh x4, x6, x12
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x7, x11
adds x16, x16, x3
umulh x4, x7, x11
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x9, x11
adcs x19, x19, x3
umulh x4, x9, x11
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x8, x12
adds x19, x19, x3
umulh x4, x8, x12
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x9, x13
adcs x21, x21, x3
umulh x22, x9, x13
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x6, x13
adds x17, x17, x3
umulh x4, x6, x13
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x8, x13
adcs x20, x20, x3
umulh x4, x8, x13
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x9, x10
adds x17, x17, x3
umulh x4, x9, x10
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x9, x12
adcs x20, x20, x3
umulh x4, x9, x12
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Reduce if top bit set
mov x3, #19
and x4, x3, x17, asr 63
adds x14, x14, x4
adcs x15, x15, xzr
and x17, x17, #0x7fffffffffffffff
adcs x16, x16, xzr
adc x17, x17, xzr
adds x4, x14, x3
adcs x4, x15, xzr
adcs x4, x16, xzr
adc x4, x17, xzr
and x4, x3, x4, asr 63
adds x14, x14, x4
adcs x15, x15, xzr
mov x4, #0x7fffffffffffffff
adcs x16, x16, xzr
adc x17, x17, xzr
and x17, x17, x4
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
mov x0, xzr
ldr x17, [x29, #200]
ldr x19, [x29, #208]
ldp x20, x21, [x29, #216]
ldp x22, x23, [x29, #232]
ldp x24, x25, [x29, #248]
ldp x26, x27, [x29, #264]
ldr x28, [x29, #280]
ldp x29, x30, [sp], #0x120
ret
#ifndef __APPLE__
.size curve25519,.-curve25519
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_pow22523
.type fe_pow22523,@function
.align 2
fe_pow22523:
#else
.section __TEXT,__text
.globl _fe_pow22523
.p2align 2
_fe_pow22523:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #128]
str x23, [x29, #136]
# pow22523
str x0, [x29, #112]
str x1, [x29, #120]
add x0, x29, #16
#ifndef NDEBUG
ldr x1, [x29, #120]
#endif /* !NDEBUG */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
add x0, x29, #48
add x1, x29, #16
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
add x1, x29, #48
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
ldr x1, [x29, #120]
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
add x0, x29, #16
add x1, x29, #16
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #16
#endif /* !NDEBUG */
#ifndef NDEBUG
add x1, x29, #16
#endif /* !NDEBUG */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #16
#endif /* !NDEBUG */
add x1, x29, #48
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 5 times
mov x23, #5
ldp x6, x7, [x29, #16]
ldp x8, x9, [x29, #32]
L_fe_pow22523_1:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_1
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
#ifndef NDEBUG
add x0, x29, #16
#endif /* !NDEBUG */
#ifndef NDEBUG
add x1, x29, #48
#endif /* !NDEBUG */
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x23, #10
ldp x6, x7, [x29, #16]
ldp x8, x9, [x29, #32]
L_fe_pow22523_2:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_2
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
add x0, x29, #48
#ifndef NDEBUG
add x1, x29, #48
#endif /* !NDEBUG */
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 20 times
mov x23, #20
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_pow22523_3:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_3
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 10 times
mov x23, #10
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_pow22523_4:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_4
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
add x0, x29, #16
add x1, x29, #48
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x23, #50
ldp x6, x7, [x29, #16]
ldp x8, x9, [x29, #32]
L_fe_pow22523_5:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_5
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
add x0, x29, #48
#ifndef NDEBUG
add x1, x29, #48
#endif /* !NDEBUG */
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 100 times
mov x23, #0x64
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_pow22523_6:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_6
# Store
stp x6, x7, [x29, #80]
stp x8, x9, [x29, #96]
#ifndef NDEBUG
add x0, x29, #48
#endif /* !NDEBUG */
add x1, x29, #0x50
add x2, x29, #48
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
# Loop: 50 times
mov x23, #50
ldp x6, x7, [x29, #48]
ldp x8, x9, [x29, #64]
L_fe_pow22523_7:
# Square
# A[0] * A[1]
umulh x12, x6, x7
mul x11, x6, x7
# A[0] * A[3]
umulh x14, x6, x9
mul x13, x6, x9
# A[0] * A[2]
mul x3, x6, x8
adds x12, x12, x3
umulh x4, x6, x8
adcs x13, x13, x4
# A[1] * A[3]
mul x3, x7, x9
adcs x14, x14, x3
umulh x15, x7, x9
adc x15, x15, xzr
# A[1] * A[2]
mul x3, x7, x8
adds x13, x13, x3
umulh x4, x7, x8
adcs x14, x14, x4
# A[2] * A[3]
mul x3, x8, x9
adcs x15, x15, x3
umulh x16, x8, x9
adc x16, x16, xzr
# Double
adds x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adcs x15, x15, x15
adcs x16, x16, x16
adc x17, xzr, xzr
# A[0] * A[0]
umulh x4, x6, x6
mul x10, x6, x6
# A[1] * A[1]
mul x3, x7, x7
adds x11, x11, x4
umulh x4, x7, x7
adcs x12, x12, x3
# A[2] * A[2]
mul x3, x8, x8
adcs x13, x13, x4
umulh x4, x8, x8
adcs x14, x14, x3
# A[3] * A[3]
mul x3, x9, x9
adcs x15, x15, x4
umulh x4, x9, x9
adcs x16, x16, x3
adc x17, x17, x4
# Reduce
mov x3, #38
mul x4, x3, x17
adds x13, x13, x4
umulh x5, x3, x17
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x13, #63
mul x5, x5, x3
and x13, x13, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x14
adds x10, x10, x4
umulh x14, x3, x14
mul x4, x3, x15
adcs x11, x11, x4
umulh x15, x3, x15
mul x4, x3, x16
adcs x12, x12, x4
umulh x16, x3, x16
adc x13, x13, xzr
# Add high product results in
adds x6, x10, x5
adcs x7, x11, x14
adcs x8, x12, x15
adc x9, x13, x16
subs x23, x23, #1
bne L_fe_pow22523_7
# Store
stp x6, x7, [x29, #48]
stp x8, x9, [x29, #64]
add x0, x29, #16
add x1, x29, #48
add x2, x29, #16
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
#ifndef NDEBUG
add x0, x29, #16
#endif /* !NDEBUG */
add x1, x29, #16
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
#ifndef __APPLE__
bl fe_sq
#else
bl _fe_sq
#endif /* __APPLE__ */
ldr x0, [x29, #112]
#ifndef NDEBUG
add x1, x29, #16
#endif /* !NDEBUG */
ldr x2, [x29, #120]
#ifndef __APPLE__
bl fe_mul
#else
bl _fe_mul
#endif /* __APPLE__ */
ldr x17, [x29, #128]
ldr x23, [x29, #136]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
.size fe_pow22523,.-fe_pow22523
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p2
.type ge_p1p1_to_p2,@function
.align 2
ge_p1p1_to_p2:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p2
.p2align 2
_ge_p1p1_to_p2:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-80]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x20, x21, [x29, #56]
str x22, [x29, #72]
str x0, [x29, #16]
str x1, [x29, #24]
mov x2, x1
add x1, x1, #0x60
# Multiply
ldp x10, x11, [x1]
ldp x12, x13, [x1, #16]
ldp x6, x7, [x2]
ldp x8, x9, [x2, #16]
# A[0] * B[0]
umulh x15, x10, x6
mul x14, x10, x6
# A[2] * B[0]
umulh x17, x12, x6
mul x16, x12, x6
# A[1] * B[0]
mul x3, x11, x6
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
adds x15, x15, x3
umulh x4, x10, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x12, x7
adcs x17, x17, x3
umulh x4, x12, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x11, x8
adds x17, x17, x3
umulh x4, x11, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x10, x8
adds x16, x16, x3
umulh x4, x10, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x11, x7
adds x16, x16, x3
umulh x4, x11, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x13, x7
adcs x19, x19, x3
umulh x4, x13, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x12, x8
adds x19, x19, x3
umulh x4, x12, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x13, x9
adcs x21, x21, x3
umulh x22, x13, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x10, x9
adds x17, x17, x3
umulh x4, x10, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x12, x9
adcs x20, x20, x3
umulh x4, x12, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x13, x6
adds x17, x17, x3
umulh x4, x13, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x13, x8
adcs x20, x20, x3
umulh x4, x13, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
sub x2, x1, #32
add x0, x0, #0x40
# Multiply
ldp x6, x7, [x2]
ldp x8, x9, [x2, #16]
# A[0] * B[0]
umulh x15, x10, x6
mul x14, x10, x6
# A[2] * B[0]
umulh x17, x12, x6
mul x16, x12, x6
# A[1] * B[0]
mul x3, x11, x6
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
adds x15, x15, x3
umulh x4, x10, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x12, x7
adcs x17, x17, x3
umulh x4, x12, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x11, x8
adds x17, x17, x3
umulh x4, x11, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x10, x8
adds x16, x16, x3
umulh x4, x10, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x11, x7
adds x16, x16, x3
umulh x4, x11, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x13, x7
adcs x19, x19, x3
umulh x4, x13, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x12, x8
adds x19, x19, x3
umulh x4, x12, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x13, x9
adcs x21, x21, x3
umulh x22, x13, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x10, x9
adds x17, x17, x3
umulh x4, x10, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x12, x9
adcs x20, x20, x3
umulh x4, x12, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x13, x6
adds x17, x17, x3
umulh x4, x13, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x13, x8
adcs x20, x20, x3
umulh x4, x13, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
sub x1, x1, #0x40
sub x0, x0, #32
# Multiply
ldp x10, x11, [x1]
ldp x12, x13, [x1, #16]
# A[0] * B[0]
umulh x15, x10, x6
mul x14, x10, x6
# A[2] * B[0]
umulh x17, x12, x6
mul x16, x12, x6
# A[1] * B[0]
mul x3, x11, x6
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
adds x15, x15, x3
umulh x4, x10, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x12, x7
adcs x17, x17, x3
umulh x4, x12, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x11, x8
adds x17, x17, x3
umulh x4, x11, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x10, x8
adds x16, x16, x3
umulh x4, x10, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x11, x7
adds x16, x16, x3
umulh x4, x11, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x13, x7
adcs x19, x19, x3
umulh x4, x13, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x12, x8
adds x19, x19, x3
umulh x4, x12, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x13, x9
adcs x21, x21, x3
umulh x22, x13, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x10, x9
adds x17, x17, x3
umulh x4, x10, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x12, x9
adcs x20, x20, x3
umulh x4, x12, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x13, x6
adds x17, x17, x3
umulh x4, x13, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x13, x8
adcs x20, x20, x3
umulh x4, x13, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x20, x21, [x29, #56]
ldr x22, [x29, #72]
ldp x29, x30, [sp], #0x50
ret
#ifndef __APPLE__
.size ge_p1p1_to_p2,.-ge_p1p1_to_p2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p3
.type ge_p1p1_to_p3,@function
.align 2
ge_p1p1_to_p3:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p3
.p2align 2
_ge_p1p1_to_p3:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-112]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
str x26, [x29, #104]
str x0, [x29, #16]
str x1, [x29, #24]
mov x2, x1
add x1, x1, #0x60
# Multiply
ldp x10, x11, [x1]
ldp x12, x13, [x1, #16]
ldp x6, x7, [x2]
ldp x8, x9, [x2, #16]
# A[0] * B[0]
umulh x15, x10, x6
mul x14, x10, x6
# A[2] * B[0]
umulh x17, x12, x6
mul x16, x12, x6
# A[1] * B[0]
mul x3, x11, x6
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
adds x15, x15, x3
umulh x4, x10, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x12, x7
adcs x17, x17, x3
umulh x4, x12, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x11, x8
adds x17, x17, x3
umulh x4, x11, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x10, x8
adds x16, x16, x3
umulh x4, x10, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x11, x7
adds x16, x16, x3
umulh x4, x11, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x13, x7
adcs x19, x19, x3
umulh x4, x13, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x12, x8
adds x19, x19, x3
umulh x4, x12, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x13, x9
adcs x21, x21, x3
umulh x22, x13, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x10, x9
adds x17, x17, x3
umulh x4, x10, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x12, x9
adcs x20, x20, x3
umulh x4, x12, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x13, x6
adds x17, x17, x3
umulh x4, x13, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x13, x8
adcs x20, x20, x3
umulh x4, x13, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
sub x1, x1, #0x40
add x0, x0, #0x60
# Multiply
ldp x23, x24, [x1]
ldp x25, x26, [x1, #16]
# A[0] * B[0]
umulh x15, x23, x6
mul x14, x23, x6
# A[2] * B[0]
umulh x17, x25, x6
mul x16, x25, x6
# A[1] * B[0]
mul x3, x24, x6
adds x15, x15, x3
umulh x4, x24, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9
# A[0] * B[1]
mul x3, x23, x7
adds x15, x15, x3
umulh x4, x23, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x25, x7
adcs x17, x17, x3
umulh x4, x25, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x24, x8
adds x17, x17, x3
umulh x4, x24, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x23, x8
adds x16, x16, x3
umulh x4, x23, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x24, x7
adds x16, x16, x3
umulh x4, x24, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x26, x7
adcs x19, x19, x3
umulh x4, x26, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x25, x8
adds x19, x19, x3
umulh x4, x25, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x26, x9
adcs x21, x21, x3
umulh x22, x26, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x23, x9
adds x17, x17, x3
umulh x4, x23, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x25, x9
adcs x20, x20, x3
umulh x4, x25, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x26, x6
adds x17, x17, x3
umulh x4, x26, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x26, x8
adcs x20, x20, x3
umulh x4, x26, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
add x2, x1, #32
sub x0, x0, #0x40
# Multiply
ldp x6, x7, [x2]
ldp x8, x9, [x2, #16]
# A[0] * B[0]
umulh x15, x23, x6
mul x14, x23, x6
# A[2] * B[0]
umulh x17, x25, x6
mul x16, x25, x6
# A[1] * B[0]
mul x3, x24, x6
adds x15, x15, x3
umulh x4, x24, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9
# A[0] * B[1]
mul x3, x23, x7
adds x15, x15, x3
umulh x4, x23, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x25, x7
adcs x17, x17, x3
umulh x4, x25, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x24, x8
adds x17, x17, x3
umulh x4, x24, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x23, x8
adds x16, x16, x3
umulh x4, x23, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x24, x7
adds x16, x16, x3
umulh x4, x24, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x26, x7
adcs x19, x19, x3
umulh x4, x26, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x25, x8
adds x19, x19, x3
umulh x4, x25, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x26, x9
adcs x21, x21, x3
umulh x22, x26, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x23, x9
adds x17, x17, x3
umulh x4, x23, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x25, x9
adcs x20, x20, x3
umulh x4, x25, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x26, x6
adds x17, x17, x3
umulh x4, x26, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x26, x8
adcs x20, x20, x3
umulh x4, x26, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
add x1, x1, #0x40
add x0, x0, #32
# Multiply
# A[0] * B[0]
umulh x15, x10, x6
mul x14, x10, x6
# A[2] * B[0]
umulh x17, x12, x6
mul x16, x12, x6
# A[1] * B[0]
mul x3, x11, x6
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
adds x15, x15, x3
umulh x4, x10, x7
adcs x16, x16, x4
# A[2] * B[1]
mul x3, x12, x7
adcs x17, x17, x3
umulh x4, x12, x7
adcs x19, x19, x4
adc x20, x20, xzr
# A[1] * B[2]
mul x3, x11, x8
adds x17, x17, x3
umulh x4, x11, x8
adcs x19, x19, x4
adcs x20, x20, xzr
adc x21, xzr, xzr
# A[0] * B[2]
mul x3, x10, x8
adds x16, x16, x3
umulh x4, x10, x8
adcs x17, x17, x4
adcs x19, x19, xzr
adcs x20, x20, xzr
adc x21, x21, xzr
# A[1] * B[1]
mul x3, x11, x7
adds x16, x16, x3
umulh x4, x11, x7
adcs x17, x17, x4
# A[3] * B[1]
mul x3, x13, x7
adcs x19, x19, x3
umulh x4, x13, x7
adcs x20, x20, x4
adc x21, x21, xzr
# A[2] * B[2]
mul x3, x12, x8
adds x19, x19, x3
umulh x4, x12, x8
adcs x20, x20, x4
# A[3] * B[3]
mul x3, x13, x9
adcs x21, x21, x3
umulh x22, x13, x9
adc x22, x22, xzr
# A[0] * B[3]
mul x3, x10, x9
adds x17, x17, x3
umulh x4, x10, x9
adcs x19, x19, x4
# A[2] * B[3]
mul x3, x12, x9
adcs x20, x20, x3
umulh x4, x12, x9
adcs x21, x21, x4
adc x22, x22, xzr
# A[3] * B[0]
mul x3, x13, x6
adds x17, x17, x3
umulh x4, x13, x6
adcs x19, x19, x4
# A[3] * B[2]
mul x3, x13, x8
adcs x20, x20, x3
umulh x4, x13, x8
adcs x21, x21, x4
adc x22, x22, xzr
# Reduce
mov x3, #38
mul x4, x3, x22
adds x17, x17, x4
umulh x5, x3, x22
adc x5, x5, xzr
mov x3, #19
extr x5, x5, x17, #63
mul x5, x5, x3
and x17, x17, #0x7fffffffffffffff
mov x3, #38
mul x4, x3, x19
adds x14, x14, x4
umulh x19, x3, x19
mul x4, x3, x20
adcs x15, x15, x4
umulh x20, x3, x20
mul x4, x3, x21
adcs x16, x16, x4
umulh x21, x3, x21
adc x17, x17, xzr
# Add high product results in
adds x14, x14, x5
adcs x15, x15, x19
adcs x16, x16, x20
adc x17, x17, x21
# Store
stp x14, x15, [x0]
stp x16, x17, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
ldr x26, [x29, #104]
ldp x29, x30, [sp], #0x70
ret
#ifndef __APPLE__
.size ge_p1p1_to_p3,.-ge_p1p1_to_p3
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p2_dbl
.type ge_p2_dbl,@function
.align 2
ge_p2_dbl:
#else
.section __TEXT,__text
.globl _ge_p2_dbl
.p2align 2
_ge_p2_dbl:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-128]!
add x29, sp, #0
str x17, [x29, #40]
str x19, [x29, #48]
stp x20, x21, [x29, #56]
stp x22, x23, [x29, #72]
stp x24, x25, [x29, #88]
stp x26, x27, [x29, #104]
str x28, [x29, #120]
str x0, [x29, #16]
str x1, [x29, #24]
add x0, x0, #0x40
# Square
ldp x4, x5, [x1]
ldp x6, x7, [x1, #16]
# A[0] * A[1]
umulh x10, x4, x5
mul x9, x4, x5
# A[0] * A[3]
umulh x12, x4, x7
mul x11, x4, x7
# A[0] * A[2]
mul x25, x4, x6
adds x10, x10, x25
umulh x26, x4, x6
adcs x11, x11, x26
# A[1] * A[3]
mul x25, x5, x7
adcs x12, x12, x25
umulh x13, x5, x7
adc x13, x13, xzr
# A[1] * A[2]
mul x25, x5, x6
adds x11, x11, x25
umulh x26, x5, x6
adcs x12, x12, x26
# A[2] * A[3]
mul x25, x6, x7
adcs x13, x13, x25
umulh x14, x6, x7
adc x14, x14, xzr
# Double
adds x9, x9, x9
adcs x10, x10, x10
adcs x11, x11, x11
adcs x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adc x15, xzr, xzr
# A[0] * A[0]
umulh x26, x4, x4
mul x8, x4, x4
# A[1] * A[1]
mul x25, x5, x5
adds x9, x9, x26
umulh x26, x5, x5
adcs x10, x10, x25
# A[2] * A[2]
mul x25, x6, x6
adcs x11, x11, x26
umulh x26, x6, x6
adcs x12, x12, x25
# A[3] * A[3]
mul x25, x7, x7
adcs x13, x13, x26
umulh x26, x7, x7
adcs x14, x14, x25
adc x15, x15, x26
# Reduce
mov x25, #38
mul x26, x25, x15
adds x11, x11, x26
umulh x27, x25, x15
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x11, #63
mul x27, x27, x25
and x11, x11, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x12
adds x8, x8, x26
umulh x12, x25, x12
mul x26, x25, x13
adcs x9, x9, x26
umulh x13, x25, x13
mul x26, x25, x14
adcs x10, x10, x26
umulh x14, x25, x14
adc x11, x11, xzr
# Add high product results in
adds x8, x8, x27
adcs x9, x9, x12
adcs x10, x10, x13
adc x11, x11, x14
# Store
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
add x2, x1, #32
sub x0, x0, #32
# Square
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * A[1]
umulh x23, x16, x17
mul x22, x16, x17
# A[0] * A[3]
umulh x4, x16, x20
mul x24, x16, x20
# A[0] * A[2]
mul x25, x16, x19
adds x23, x23, x25
umulh x26, x16, x19
adcs x24, x24, x26
# A[1] * A[3]
mul x25, x17, x20
adcs x4, x4, x25
umulh x5, x17, x20
adc x5, x5, xzr
# A[1] * A[2]
mul x25, x17, x19
adds x24, x24, x25
umulh x26, x17, x19
adcs x4, x4, x26
# A[2] * A[3]
mul x25, x19, x20
adcs x5, x5, x25
umulh x6, x19, x20
adc x6, x6, xzr
# Double
adds x22, x22, x22
adcs x23, x23, x23
adcs x24, x24, x24
adcs x4, x4, x4
adcs x5, x5, x5
adcs x6, x6, x6
adc x7, xzr, xzr
# A[0] * A[0]
umulh x26, x16, x16
mul x21, x16, x16
# A[1] * A[1]
mul x25, x17, x17
adds x22, x22, x26
umulh x26, x17, x17
adcs x23, x23, x25
# A[2] * A[2]
mul x25, x19, x19
adcs x24, x24, x26
umulh x26, x19, x19
adcs x4, x4, x25
# A[3] * A[3]
mul x25, x20, x20
adcs x5, x5, x26
umulh x26, x20, x20
adcs x6, x6, x25
adc x7, x7, x26
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
add x3, x0, #32
mov x2, x0
add x1, x0, #32
# Add
adds x4, x21, x8
adcs x5, x22, x9
adcs x6, x23, x10
adcs x7, x24, x11
cset x28, cs
mov x25, #19
extr x28, x28, x7, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x4, x4, x25
adcs x5, x5, xzr
and x7, x7, #0x7fffffffffffffff
adcs x6, x6, xzr
adc x7, x7, xzr
# Sub
subs x12, x21, x8
sbcs x13, x22, x9
sbcs x14, x23, x10
sbcs x15, x24, x11
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x1, [x29, #24]
add x2, x1, #32
sub x0, x0, #32
# Add
ldp x8, x9, [x1]
ldp x10, x11, [x1, #16]
adds x8, x8, x16
adcs x9, x9, x17
adcs x10, x10, x19
adcs x11, x11, x20
cset x28, cs
mov x25, #19
# Mask the modulus
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
mov x1, x0
# Square
# A[0] * A[1]
umulh x23, x8, x9
mul x22, x8, x9
# A[0] * A[3]
umulh x4, x8, x11
mul x24, x8, x11
# A[0] * A[2]
mul x25, x8, x10
adds x23, x23, x25
umulh x26, x8, x10
adcs x24, x24, x26
# A[1] * A[3]
mul x25, x9, x11
adcs x4, x4, x25
umulh x5, x9, x11
adc x5, x5, xzr
# A[1] * A[2]
mul x25, x9, x10
adds x24, x24, x25
umulh x26, x9, x10
adcs x4, x4, x26
# A[2] * A[3]
mul x25, x10, x11
adcs x5, x5, x25
umulh x6, x10, x11
adc x6, x6, xzr
# Double
adds x22, x22, x22
adcs x23, x23, x23
adcs x24, x24, x24
adcs x4, x4, x4
adcs x5, x5, x5
adcs x6, x6, x6
adc x7, xzr, xzr
# A[0] * A[0]
umulh x26, x8, x8
mul x21, x8, x8
# A[1] * A[1]
mul x25, x9, x9
adds x22, x22, x26
umulh x26, x9, x9
adcs x23, x23, x25
# A[2] * A[2]
mul x25, x10, x10
adcs x24, x24, x26
umulh x26, x10, x10
adcs x4, x4, x25
# A[3] * A[3]
mul x25, x11, x11
adcs x5, x5, x26
umulh x26, x11, x11
adcs x6, x6, x25
adc x7, x7, x26
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
add x2, x0, #32
# Sub
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
subs x21, x21, x8
sbcs x22, x22, x9
sbcs x23, x23, x10
sbcs x24, x24, x11
csetm x28, cc
mov x25, #-19
# Mask the modulus
extr x28, x28, x24, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x21, x21, x25
sbcs x22, x22, xzr
and x24, x24, #0x7fffffffffffffff
sbcs x23, x23, xzr
sbc x24, x24, xzr
stp x21, x22, [x0]
stp x23, x24, [x0, #16]
ldr x2, [x29, #24]
add x2, x2, #0x40
add x0, x0, #0x60
# Square * 2
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * A[1]
umulh x6, x16, x17
mul x5, x16, x17
# A[0] * A[3]
umulh x8, x16, x20
mul x7, x16, x20
# A[0] * A[2]
mul x25, x16, x19
adds x6, x6, x25
umulh x26, x16, x19
adcs x7, x7, x26
# A[1] * A[3]
mul x25, x17, x20
adcs x8, x8, x25
umulh x9, x17, x20
adc x9, x9, xzr
# A[1] * A[2]
mul x25, x17, x19
adds x7, x7, x25
umulh x26, x17, x19
adcs x8, x8, x26
# A[2] * A[3]
mul x25, x19, x20
adcs x9, x9, x25
umulh x10, x19, x20
adc x10, x10, xzr
# Double
adds x5, x5, x5
adcs x6, x6, x6
adcs x7, x7, x7
adcs x8, x8, x8
adcs x9, x9, x9
adcs x10, x10, x10
adc x11, xzr, xzr
# A[0] * A[0]
umulh x26, x16, x16
mul x4, x16, x16
# A[1] * A[1]
mul x25, x17, x17
adds x5, x5, x26
umulh x26, x17, x17
adcs x6, x6, x25
# A[2] * A[2]
mul x25, x19, x19
adcs x7, x7, x26
umulh x26, x19, x19
adcs x8, x8, x25
# A[3] * A[3]
mul x25, x20, x20
adcs x9, x9, x26
umulh x26, x20, x20
adcs x10, x10, x25
adc x11, x11, x26
# Reduce
mov x25, #38
mul x26, x25, x11
adds x7, x7, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x7, #63
mul x27, x27, x25
and x7, x7, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x4, x4, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x5, x5, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x6, x6, x26
umulh x10, x25, x10
adc x7, x7, xzr
# Add high product results in
adds x4, x4, x27
adcs x5, x5, x8
adcs x6, x6, x9
adc x7, x7, x10
mov x25, #19
lsr x26, x7, #62
extr x7, x7, x6, #63
extr x6, x6, x5, #63
extr x5, x5, x4, #63
lsl x4, x4, #1
mul x26, x26, x25
adds x4, x4, x26
adcs x5, x5, xzr
and x7, x7, #0x7fffffffffffffff
adcs x6, x6, xzr
adc x7, x7, xzr
# Store
sub x1, x0, #32
# Sub
subs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
sbcs x7, x7, x15
csetm x28, cc
mov x25, #-19
# Mask the modulus
extr x28, x28, x7, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x4, x4, x25
sbcs x5, x5, xzr
and x7, x7, #0x7fffffffffffffff
sbcs x6, x6, xzr
sbc x7, x7, xzr
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
ldr x17, [x29, #40]
ldr x19, [x29, #48]
ldp x20, x21, [x29, #56]
ldp x22, x23, [x29, #72]
ldp x24, x25, [x29, #88]
ldp x26, x27, [x29, #104]
ldr x28, [x29, #120]
ldp x29, x30, [sp], #0x80
ret
#ifndef __APPLE__
.size ge_p2_dbl,.-ge_p2_dbl
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_madd
.type ge_madd,@function
.align 2
ge_madd:
#else
.section __TEXT,__text
.globl _ge_madd
.p2align 2
_ge_madd:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
stp x26, x27, [x29, #120]
str x28, [x29, #136]
str x0, [x29, #16]
str x1, [x29, #24]
str x2, [x29, #32]
mov x3, x1
add x2, x1, #32
add x1, x0, #32
# Add
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x16, x8, x4
adcs x17, x9, x5
adcs x19, x10, x6
adcs x20, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x20, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x16, x16, x25
adcs x17, x17, xzr
and x20, x20, #0x7fffffffffffffff
adcs x19, x19, xzr
adc x20, x20, xzr
# Sub
subs x12, x8, x4
sbcs x13, x9, x5
sbcs x14, x10, x6
sbcs x15, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
ldr x2, [x29, #32]
mov x1, x0
# Multiply
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
# A[0] * B[0]
umulh x22, x16, x8
mul x21, x16, x8
# A[2] * B[0]
umulh x24, x19, x8
mul x23, x19, x8
# A[1] * B[0]
mul x25, x17, x8
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
adds x22, x22, x25
umulh x26, x16, x9
adcs x23, x23, x26
# A[2] * B[1]
mul x25, x19, x9
adcs x24, x24, x25
umulh x26, x19, x9
adcs x4, x4, x26
adc x5, x5, xzr
# A[1] * B[2]
mul x25, x17, x10
adds x24, x24, x25
umulh x26, x17, x10
adcs x4, x4, x26
adcs x5, x5, xzr
adc x6, xzr, xzr
# A[0] * B[2]
mul x25, x16, x10
adds x23, x23, x25
umulh x26, x16, x10
adcs x24, x24, x26
adcs x4, x4, xzr
adcs x5, x5, xzr
adc x6, x6, xzr
# A[1] * B[1]
mul x25, x17, x9
adds x23, x23, x25
umulh x26, x17, x9
adcs x24, x24, x26
# A[3] * B[1]
mul x25, x20, x9
adcs x4, x4, x25
umulh x26, x20, x9
adcs x5, x5, x26
adc x6, x6, xzr
# A[2] * B[2]
mul x25, x19, x10
adds x4, x4, x25
umulh x26, x19, x10
adcs x5, x5, x26
# A[3] * B[3]
mul x25, x20, x11
adcs x6, x6, x25
umulh x7, x20, x11
adc x7, x7, xzr
# A[0] * B[3]
mul x25, x16, x11
adds x24, x24, x25
umulh x26, x16, x11
adcs x4, x4, x26
# A[2] * B[3]
mul x25, x19, x11
adcs x5, x5, x25
umulh x26, x19, x11
adcs x6, x6, x26
adc x7, x7, xzr
# A[3] * B[0]
mul x25, x20, x8
adds x24, x24, x25
umulh x26, x20, x8
adcs x4, x4, x26
# A[3] * B[2]
mul x25, x20, x10
adcs x5, x5, x25
umulh x26, x20, x10
adcs x6, x6, x26
adc x7, x7, xzr
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
add x2, x2, #32
add x1, x0, #32
add x0, x0, #32
# Multiply
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * B[0]
umulh x5, x12, x16
mul x4, x12, x16
# A[2] * B[0]
umulh x7, x14, x16
mul x6, x14, x16
# A[1] * B[0]
mul x25, x13, x16
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
adds x5, x5, x25
umulh x26, x12, x17
adcs x6, x6, x26
# A[2] * B[1]
mul x25, x14, x17
adcs x7, x7, x25
umulh x26, x14, x17
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x13, x19
adds x7, x7, x25
umulh x26, x13, x19
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x12, x19
adds x6, x6, x25
umulh x26, x12, x19
adcs x7, x7, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x13, x17
adds x6, x6, x25
umulh x26, x13, x17
adcs x7, x7, x26
# A[3] * B[1]
mul x25, x15, x17
adcs x8, x8, x25
umulh x26, x15, x17
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x14, x19
adds x8, x8, x25
umulh x26, x14, x19
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x15, x20
adcs x10, x10, x25
umulh x11, x15, x20
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x12, x20
adds x7, x7, x25
umulh x26, x12, x20
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x14, x20
adcs x9, x9, x25
umulh x26, x14, x20
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x15, x16
adds x7, x7, x25
umulh x26, x15, x16
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x15, x19
adcs x9, x9, x25
umulh x26, x15, x19
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x7, x7, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x7, #63
mul x27, x27, x25
and x7, x7, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x4, x4, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x5, x5, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x6, x6, x26
umulh x10, x25, x10
adc x7, x7, xzr
# Add high product results in
adds x4, x4, x27
adcs x5, x5, x8
adcs x6, x6, x9
adc x7, x7, x10
mov x3, x0
sub x2, x0, #32
sub x1, x0, #32
# Add
adds x8, x21, x4
adcs x9, x22, x5
adcs x10, x23, x6
adcs x11, x24, x7
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x12, x21, x4
sbcs x13, x22, x5
sbcs x14, x23, x6
sbcs x15, x24, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x1, [x29, #24]
ldr x2, [x29, #32]
add x2, x2, #0x40
add x1, x1, #0x60
add x0, x0, #0x40
# Multiply
ldp x21, x22, [x1]
ldp x23, x24, [x1, #16]
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
# A[0] * B[0]
umulh x17, x21, x4
mul x16, x21, x4
# A[2] * B[0]
umulh x20, x23, x4
mul x19, x23, x4
# A[1] * B[0]
mul x25, x22, x4
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
adds x17, x17, x25
umulh x26, x21, x5
adcs x19, x19, x26
# A[2] * B[1]
mul x25, x23, x5
adcs x20, x20, x25
umulh x26, x23, x5
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x22, x6
adds x20, x20, x25
umulh x26, x22, x6
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x21, x6
adds x19, x19, x25
umulh x26, x21, x6
adcs x20, x20, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x22, x5
adds x19, x19, x25
umulh x26, x22, x5
adcs x20, x20, x26
# A[3] * B[1]
mul x25, x24, x5
adcs x8, x8, x25
umulh x26, x24, x5
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x23, x6
adds x8, x8, x25
umulh x26, x23, x6
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x24, x7
adcs x10, x10, x25
umulh x11, x24, x7
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x21, x7
adds x20, x20, x25
umulh x26, x21, x7
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x23, x7
adcs x9, x9, x25
umulh x26, x23, x7
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x24, x4
adds x20, x20, x25
umulh x26, x24, x4
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x24, x6
adcs x9, x9, x25
umulh x26, x24, x6
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x20, x20, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x20, #63
mul x27, x27, x25
and x20, x20, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x16, x16, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x17, x17, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x19, x19, x26
umulh x10, x25, x10
adc x20, x20, xzr
# Add high product results in
adds x16, x16, x27
adcs x17, x17, x8
adcs x19, x19, x9
adc x20, x20, x10
sub x1, x1, #32
# Double
ldp x12, x13, [x1]
ldp x14, x15, [x1, #16]
adds x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adc x15, x15, x15
mov x25, #-19
asr x28, x15, #63
# Mask the modulus
and x25, x28, x25
and x26, x28, #0x7fffffffffffffff
# Sub modulus (if overflow)
subs x12, x12, x25
sbcs x13, x13, x28
sbcs x14, x14, x28
sbc x15, x15, x26
mov x3, x0
sub x2, x0, #32
mov x1, x0
sub x0, x0, #32
# Add
adds x8, x12, x16
adcs x9, x13, x17
adcs x10, x14, x19
adcs x11, x15, x20
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x4, x12, x16
sbcs x5, x13, x17
sbcs x6, x14, x19
sbcs x7, x15, x20
csetm x28, cc
mov x25, #-19
extr x28, x28, x7, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x4, x4, x25
sbcs x5, x5, xzr
and x7, x7, #0x7fffffffffffffff
sbcs x6, x6, xzr
sbc x7, x7, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x4, x5, [x1]
stp x6, x7, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
ldp x26, x27, [x29, #120]
ldr x28, [x29, #136]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
.size ge_madd,.-ge_madd
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_msub
.type ge_msub,@function
.align 2
ge_msub:
#else
.section __TEXT,__text
.globl _ge_msub
.p2align 2
_ge_msub:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
stp x26, x27, [x29, #120]
str x28, [x29, #136]
str x0, [x29, #16]
str x1, [x29, #24]
str x2, [x29, #32]
mov x3, x1
add x2, x1, #32
add x1, x0, #32
# Add
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x16, x8, x4
adcs x17, x9, x5
adcs x19, x10, x6
adcs x20, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x20, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x16, x16, x25
adcs x17, x17, xzr
and x20, x20, #0x7fffffffffffffff
adcs x19, x19, xzr
adc x20, x20, xzr
# Sub
subs x12, x8, x4
sbcs x13, x9, x5
sbcs x14, x10, x6
sbcs x15, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
ldr x2, [x29, #32]
add x2, x2, #32
mov x1, x0
# Multiply
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
# A[0] * B[0]
umulh x22, x16, x8
mul x21, x16, x8
# A[2] * B[0]
umulh x24, x19, x8
mul x23, x19, x8
# A[1] * B[0]
mul x25, x17, x8
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
adds x22, x22, x25
umulh x26, x16, x9
adcs x23, x23, x26
# A[2] * B[1]
mul x25, x19, x9
adcs x24, x24, x25
umulh x26, x19, x9
adcs x4, x4, x26
adc x5, x5, xzr
# A[1] * B[2]
mul x25, x17, x10
adds x24, x24, x25
umulh x26, x17, x10
adcs x4, x4, x26
adcs x5, x5, xzr
adc x6, xzr, xzr
# A[0] * B[2]
mul x25, x16, x10
adds x23, x23, x25
umulh x26, x16, x10
adcs x24, x24, x26
adcs x4, x4, xzr
adcs x5, x5, xzr
adc x6, x6, xzr
# A[1] * B[1]
mul x25, x17, x9
adds x23, x23, x25
umulh x26, x17, x9
adcs x24, x24, x26
# A[3] * B[1]
mul x25, x20, x9
adcs x4, x4, x25
umulh x26, x20, x9
adcs x5, x5, x26
adc x6, x6, xzr
# A[2] * B[2]
mul x25, x19, x10
adds x4, x4, x25
umulh x26, x19, x10
adcs x5, x5, x26
# A[3] * B[3]
mul x25, x20, x11
adcs x6, x6, x25
umulh x7, x20, x11
adc x7, x7, xzr
# A[0] * B[3]
mul x25, x16, x11
adds x24, x24, x25
umulh x26, x16, x11
adcs x4, x4, x26
# A[2] * B[3]
mul x25, x19, x11
adcs x5, x5, x25
umulh x26, x19, x11
adcs x6, x6, x26
adc x7, x7, xzr
# A[3] * B[0]
mul x25, x20, x8
adds x24, x24, x25
umulh x26, x20, x8
adcs x4, x4, x26
# A[3] * B[2]
mul x25, x20, x10
adcs x5, x5, x25
umulh x26, x20, x10
adcs x6, x6, x26
adc x7, x7, xzr
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
sub x2, x2, #32
add x1, x0, #32
add x0, x0, #32
# Multiply
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * B[0]
umulh x5, x12, x16
mul x4, x12, x16
# A[2] * B[0]
umulh x7, x14, x16
mul x6, x14, x16
# A[1] * B[0]
mul x25, x13, x16
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
adds x5, x5, x25
umulh x26, x12, x17
adcs x6, x6, x26
# A[2] * B[1]
mul x25, x14, x17
adcs x7, x7, x25
umulh x26, x14, x17
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x13, x19
adds x7, x7, x25
umulh x26, x13, x19
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x12, x19
adds x6, x6, x25
umulh x26, x12, x19
adcs x7, x7, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x13, x17
adds x6, x6, x25
umulh x26, x13, x17
adcs x7, x7, x26
# A[3] * B[1]
mul x25, x15, x17
adcs x8, x8, x25
umulh x26, x15, x17
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x14, x19
adds x8, x8, x25
umulh x26, x14, x19
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x15, x20
adcs x10, x10, x25
umulh x11, x15, x20
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x12, x20
adds x7, x7, x25
umulh x26, x12, x20
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x14, x20
adcs x9, x9, x25
umulh x26, x14, x20
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x15, x16
adds x7, x7, x25
umulh x26, x15, x16
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x15, x19
adcs x9, x9, x25
umulh x26, x15, x19
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x7, x7, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x7, #63
mul x27, x27, x25
and x7, x7, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x4, x4, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x5, x5, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x6, x6, x26
umulh x10, x25, x10
adc x7, x7, xzr
# Add high product results in
adds x4, x4, x27
adcs x5, x5, x8
adcs x6, x6, x9
adc x7, x7, x10
mov x3, x0
sub x2, x0, #32
sub x1, x0, #32
# Add
adds x8, x21, x4
adcs x9, x22, x5
adcs x10, x23, x6
adcs x11, x24, x7
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x12, x21, x4
sbcs x13, x22, x5
sbcs x14, x23, x6
sbcs x15, x24, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x1, [x29, #24]
ldr x2, [x29, #32]
add x2, x2, #0x40
add x1, x1, #0x60
add x0, x0, #0x40
# Multiply
ldp x21, x22, [x1]
ldp x23, x24, [x1, #16]
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
# A[0] * B[0]
umulh x17, x21, x4
mul x16, x21, x4
# A[2] * B[0]
umulh x20, x23, x4
mul x19, x23, x4
# A[1] * B[0]
mul x25, x22, x4
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
adds x17, x17, x25
umulh x26, x21, x5
adcs x19, x19, x26
# A[2] * B[1]
mul x25, x23, x5
adcs x20, x20, x25
umulh x26, x23, x5
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x22, x6
adds x20, x20, x25
umulh x26, x22, x6
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x21, x6
adds x19, x19, x25
umulh x26, x21, x6
adcs x20, x20, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x22, x5
adds x19, x19, x25
umulh x26, x22, x5
adcs x20, x20, x26
# A[3] * B[1]
mul x25, x24, x5
adcs x8, x8, x25
umulh x26, x24, x5
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x23, x6
adds x8, x8, x25
umulh x26, x23, x6
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x24, x7
adcs x10, x10, x25
umulh x11, x24, x7
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x21, x7
adds x20, x20, x25
umulh x26, x21, x7
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x23, x7
adcs x9, x9, x25
umulh x26, x23, x7
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x24, x4
adds x20, x20, x25
umulh x26, x24, x4
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x24, x6
adcs x9, x9, x25
umulh x26, x24, x6
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x20, x20, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x20, #63
mul x27, x27, x25
and x20, x20, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x16, x16, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x17, x17, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x19, x19, x26
umulh x10, x25, x10
adc x20, x20, xzr
# Add high product results in
adds x16, x16, x27
adcs x17, x17, x8
adcs x19, x19, x9
adc x20, x20, x10
sub x1, x1, #32
# Double
ldp x12, x13, [x1]
ldp x14, x15, [x1, #16]
adds x12, x12, x12
adcs x13, x13, x13
adcs x14, x14, x14
adc x15, x15, x15
mov x25, #-19
asr x28, x15, #63
# Mask the modulus
and x25, x28, x25
and x26, x28, #0x7fffffffffffffff
# Sub modulus (if overflow)
subs x12, x12, x25
sbcs x13, x13, x28
sbcs x14, x14, x28
sbc x15, x15, x26
mov x3, x0
sub x2, x0, #32
sub x1, x0, #32
# Add
adds x8, x12, x16
adcs x9, x13, x17
adcs x10, x14, x19
adcs x11, x15, x20
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x4, x12, x16
sbcs x5, x13, x17
sbcs x6, x14, x19
sbcs x7, x15, x20
csetm x28, cc
mov x25, #-19
extr x28, x28, x7, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x4, x4, x25
sbcs x5, x5, xzr
and x7, x7, #0x7fffffffffffffff
sbcs x6, x6, xzr
sbc x7, x7, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x4, x5, [x1]
stp x6, x7, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
ldp x26, x27, [x29, #120]
ldr x28, [x29, #136]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
.size ge_msub,.-ge_msub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_add
.type ge_add,@function
.align 2
ge_add:
#else
.section __TEXT,__text
.globl _ge_add
.p2align 2
_ge_add:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
stp x26, x27, [x29, #120]
str x28, [x29, #136]
str x0, [x29, #16]
str x1, [x29, #24]
str x2, [x29, #32]
mov x3, x1
add x2, x1, #32
add x1, x0, #32
# Add
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x16, x8, x4
adcs x17, x9, x5
adcs x19, x10, x6
adcs x20, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x20, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x16, x16, x25
adcs x17, x17, xzr
and x20, x20, #0x7fffffffffffffff
adcs x19, x19, xzr
adc x20, x20, xzr
# Sub
subs x12, x8, x4
sbcs x13, x9, x5
sbcs x14, x10, x6
sbcs x15, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
ldr x2, [x29, #32]
mov x1, x0
# Multiply
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
# A[0] * B[0]
umulh x22, x16, x8
mul x21, x16, x8
# A[2] * B[0]
umulh x24, x19, x8
mul x23, x19, x8
# A[1] * B[0]
mul x25, x17, x8
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
adds x22, x22, x25
umulh x26, x16, x9
adcs x23, x23, x26
# A[2] * B[1]
mul x25, x19, x9
adcs x24, x24, x25
umulh x26, x19, x9
adcs x4, x4, x26
adc x5, x5, xzr
# A[1] * B[2]
mul x25, x17, x10
adds x24, x24, x25
umulh x26, x17, x10
adcs x4, x4, x26
adcs x5, x5, xzr
adc x6, xzr, xzr
# A[0] * B[2]
mul x25, x16, x10
adds x23, x23, x25
umulh x26, x16, x10
adcs x24, x24, x26
adcs x4, x4, xzr
adcs x5, x5, xzr
adc x6, x6, xzr
# A[1] * B[1]
mul x25, x17, x9
adds x23, x23, x25
umulh x26, x17, x9
adcs x24, x24, x26
# A[3] * B[1]
mul x25, x20, x9
adcs x4, x4, x25
umulh x26, x20, x9
adcs x5, x5, x26
adc x6, x6, xzr
# A[2] * B[2]
mul x25, x19, x10
adds x4, x4, x25
umulh x26, x19, x10
adcs x5, x5, x26
# A[3] * B[3]
mul x25, x20, x11
adcs x6, x6, x25
umulh x7, x20, x11
adc x7, x7, xzr
# A[0] * B[3]
mul x25, x16, x11
adds x24, x24, x25
umulh x26, x16, x11
adcs x4, x4, x26
# A[2] * B[3]
mul x25, x19, x11
adcs x5, x5, x25
umulh x26, x19, x11
adcs x6, x6, x26
adc x7, x7, xzr
# A[3] * B[0]
mul x25, x20, x8
adds x24, x24, x25
umulh x26, x20, x8
adcs x4, x4, x26
# A[3] * B[2]
mul x25, x20, x10
adcs x5, x5, x25
umulh x26, x20, x10
adcs x6, x6, x26
adc x7, x7, xzr
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
# Store
stp x21, x22, [x0]
stp x23, x24, [x0, #16]
add x2, x2, #32
add x1, x0, #32
add x0, x0, #32
# Multiply
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * B[0]
umulh x5, x12, x16
mul x4, x12, x16
# A[2] * B[0]
umulh x7, x14, x16
mul x6, x14, x16
# A[1] * B[0]
mul x25, x13, x16
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
adds x5, x5, x25
umulh x26, x12, x17
adcs x6, x6, x26
# A[2] * B[1]
mul x25, x14, x17
adcs x7, x7, x25
umulh x26, x14, x17
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x13, x19
adds x7, x7, x25
umulh x26, x13, x19
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x12, x19
adds x6, x6, x25
umulh x26, x12, x19
adcs x7, x7, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x13, x17
adds x6, x6, x25
umulh x26, x13, x17
adcs x7, x7, x26
# A[3] * B[1]
mul x25, x15, x17
adcs x8, x8, x25
umulh x26, x15, x17
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x14, x19
adds x8, x8, x25
umulh x26, x14, x19
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x15, x20
adcs x10, x10, x25
umulh x11, x15, x20
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x12, x20
adds x7, x7, x25
umulh x26, x12, x20
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x14, x20
adcs x9, x9, x25
umulh x26, x14, x20
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x15, x16
adds x7, x7, x25
umulh x26, x15, x16
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x15, x19
adcs x9, x9, x25
umulh x26, x15, x19
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x7, x7, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x7, #63
mul x27, x27, x25
and x7, x7, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x4, x4, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x5, x5, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x6, x6, x26
umulh x10, x25, x10
adc x7, x7, xzr
# Add high product results in
adds x4, x4, x27
adcs x5, x5, x8
adcs x6, x6, x9
adc x7, x7, x10
# Store
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
mov x3, x0
sub x2, x0, #32
sub x1, x0, #32
# Add
adds x8, x21, x4
adcs x9, x22, x5
adcs x10, x23, x6
adcs x11, x24, x7
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x12, x21, x4
sbcs x13, x22, x5
sbcs x14, x23, x6
sbcs x15, x24, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x1, [x29, #24]
ldr x2, [x29, #32]
add x2, x2, #0x60
add x1, x1, #0x60
add x0, x0, #0x40
# Multiply
ldp x21, x22, [x1]
ldp x23, x24, [x1, #16]
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
# A[0] * B[0]
umulh x17, x21, x4
mul x16, x21, x4
# A[2] * B[0]
umulh x20, x23, x4
mul x19, x23, x4
# A[1] * B[0]
mul x25, x22, x4
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
adds x17, x17, x25
umulh x26, x21, x5
adcs x19, x19, x26
# A[2] * B[1]
mul x25, x23, x5
adcs x20, x20, x25
umulh x26, x23, x5
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x22, x6
adds x20, x20, x25
umulh x26, x22, x6
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x21, x6
adds x19, x19, x25
umulh x26, x21, x6
adcs x20, x20, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x22, x5
adds x19, x19, x25
umulh x26, x22, x5
adcs x20, x20, x26
# A[3] * B[1]
mul x25, x24, x5
adcs x8, x8, x25
umulh x26, x24, x5
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x23, x6
adds x8, x8, x25
umulh x26, x23, x6
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x24, x7
adcs x10, x10, x25
umulh x11, x24, x7
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x21, x7
adds x20, x20, x25
umulh x26, x21, x7
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x23, x7
adcs x9, x9, x25
umulh x26, x23, x7
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x24, x4
adds x20, x20, x25
umulh x26, x24, x4
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x24, x6
adcs x9, x9, x25
umulh x26, x24, x6
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x20, x20, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x20, #63
mul x27, x27, x25
and x20, x20, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x16, x16, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x17, x17, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x19, x19, x26
umulh x10, x25, x10
adc x20, x20, xzr
# Add high product results in
adds x16, x16, x27
adcs x17, x17, x8
adcs x19, x19, x9
adc x20, x20, x10
# Store
stp x16, x17, [x0]
stp x19, x20, [x0, #16]
sub x3, x2, #32
sub x2, x1, #32
sub x1, x0, #32
# Multiply
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
ldp x12, x13, [x3]
ldp x14, x15, [x3, #16]
# A[0] * B[0]
umulh x9, x4, x12
mul x8, x4, x12
# A[2] * B[0]
umulh x11, x6, x12
mul x10, x6, x12
# A[1] * B[0]
mul x25, x5, x12
adds x9, x9, x25
umulh x26, x5, x12
adcs x10, x10, x26
# A[1] * B[3]
umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15
# A[0] * B[1]
mul x25, x4, x13
adds x9, x9, x25
umulh x26, x4, x13
adcs x10, x10, x26
# A[2] * B[1]
mul x25, x6, x13
adcs x11, x11, x25
umulh x26, x6, x13
adcs x16, x16, x26
adc x17, x17, xzr
# A[1] * B[2]
mul x25, x5, x14
adds x11, x11, x25
umulh x26, x5, x14
adcs x16, x16, x26
adcs x17, x17, xzr
adc x19, xzr, xzr
# A[0] * B[2]
mul x25, x4, x14
adds x10, x10, x25
umulh x26, x4, x14
adcs x11, x11, x26
adcs x16, x16, xzr
adcs x17, x17, xzr
adc x19, x19, xzr
# A[1] * B[1]
mul x25, x5, x13
adds x10, x10, x25
umulh x26, x5, x13
adcs x11, x11, x26
# A[3] * B[1]
mul x25, x7, x13
adcs x16, x16, x25
umulh x26, x7, x13
adcs x17, x17, x26
adc x19, x19, xzr
# A[2] * B[2]
mul x25, x6, x14
adds x16, x16, x25
umulh x26, x6, x14
adcs x17, x17, x26
# A[3] * B[3]
mul x25, x7, x15
adcs x19, x19, x25
umulh x20, x7, x15
adc x20, x20, xzr
# A[0] * B[3]
mul x25, x4, x15
adds x11, x11, x25
umulh x26, x4, x15
adcs x16, x16, x26
# A[2] * B[3]
mul x25, x6, x15
adcs x17, x17, x25
umulh x26, x6, x15
adcs x19, x19, x26
adc x20, x20, xzr
# A[3] * B[0]
mul x25, x7, x12
adds x11, x11, x25
umulh x26, x7, x12
adcs x16, x16, x26
# A[3] * B[2]
mul x25, x7, x14
adcs x17, x17, x25
umulh x26, x7, x14
adcs x19, x19, x26
adc x20, x20, xzr
# Reduce
mov x25, #38
mul x26, x25, x20
adds x11, x11, x26
umulh x27, x25, x20
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x11, #63
mul x27, x27, x25
and x11, x11, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x16
adds x8, x8, x26
umulh x16, x25, x16
mul x26, x25, x17
adcs x9, x9, x26
umulh x17, x25, x17
mul x26, x25, x19
adcs x10, x10, x26
umulh x19, x25, x19
adc x11, x11, xzr
# Add high product results in
adds x8, x8, x27
adcs x9, x9, x16
adcs x10, x10, x17
adc x11, x11, x19
# Double
adds x8, x8, x8
adcs x9, x9, x9
adcs x10, x10, x10
adc x11, x11, x11
mov x25, #-19
asr x28, x11, #63
# Mask the modulus
and x25, x28, x25
and x26, x28, #0x7fffffffffffffff
# Sub modulus (if overflow)
subs x8, x8, x25
sbcs x9, x9, x28
sbcs x10, x10, x28
sbc x11, x11, x26
mov x3, x0
sub x2, x0, #32
mov x1, x0
sub x0, x0, #32
# Add
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x21, x8, x4
adcs x22, x9, x5
adcs x23, x10, x6
adcs x24, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x24, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x21, x21, x25
adcs x22, x22, xzr
and x24, x24, #0x7fffffffffffffff
adcs x23, x23, xzr
adc x24, x24, xzr
# Sub
subs x12, x8, x4
sbcs x13, x9, x5
sbcs x14, x10, x6
sbcs x15, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x21, x22, [x0]
stp x23, x24, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
ldp x26, x27, [x29, #120]
ldr x28, [x29, #136]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
.size ge_add,.-ge_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_sub
.type ge_sub,@function
.align 2
ge_sub:
#else
.section __TEXT,__text
.globl _ge_sub
.p2align 2
_ge_sub:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-144]!
add x29, sp, #0
str x17, [x29, #56]
str x19, [x29, #64]
stp x20, x21, [x29, #72]
stp x22, x23, [x29, #88]
stp x24, x25, [x29, #104]
stp x26, x27, [x29, #120]
str x28, [x29, #136]
str x0, [x29, #16]
str x1, [x29, #24]
str x2, [x29, #32]
mov x3, x1
add x2, x1, #32
add x1, x0, #32
# Add
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x16, x8, x4
adcs x17, x9, x5
adcs x19, x10, x6
adcs x20, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x20, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x16, x16, x25
adcs x17, x17, xzr
and x20, x20, #0x7fffffffffffffff
adcs x19, x19, xzr
adc x20, x20, xzr
# Sub
subs x12, x8, x4
sbcs x13, x9, x5
sbcs x14, x10, x6
sbcs x15, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
ldr x2, [x29, #32]
add x2, x2, #32
mov x1, x0
# Multiply
ldp x8, x9, [x2]
ldp x10, x11, [x2, #16]
# A[0] * B[0]
umulh x22, x16, x8
mul x21, x16, x8
# A[2] * B[0]
umulh x24, x19, x8
mul x23, x19, x8
# A[1] * B[0]
mul x25, x17, x8
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
adds x22, x22, x25
umulh x26, x16, x9
adcs x23, x23, x26
# A[2] * B[1]
mul x25, x19, x9
adcs x24, x24, x25
umulh x26, x19, x9
adcs x4, x4, x26
adc x5, x5, xzr
# A[1] * B[2]
mul x25, x17, x10
adds x24, x24, x25
umulh x26, x17, x10
adcs x4, x4, x26
adcs x5, x5, xzr
adc x6, xzr, xzr
# A[0] * B[2]
mul x25, x16, x10
adds x23, x23, x25
umulh x26, x16, x10
adcs x24, x24, x26
adcs x4, x4, xzr
adcs x5, x5, xzr
adc x6, x6, xzr
# A[1] * B[1]
mul x25, x17, x9
adds x23, x23, x25
umulh x26, x17, x9
adcs x24, x24, x26
# A[3] * B[1]
mul x25, x20, x9
adcs x4, x4, x25
umulh x26, x20, x9
adcs x5, x5, x26
adc x6, x6, xzr
# A[2] * B[2]
mul x25, x19, x10
adds x4, x4, x25
umulh x26, x19, x10
adcs x5, x5, x26
# A[3] * B[3]
mul x25, x20, x11
adcs x6, x6, x25
umulh x7, x20, x11
adc x7, x7, xzr
# A[0] * B[3]
mul x25, x16, x11
adds x24, x24, x25
umulh x26, x16, x11
adcs x4, x4, x26
# A[2] * B[3]
mul x25, x19, x11
adcs x5, x5, x25
umulh x26, x19, x11
adcs x6, x6, x26
adc x7, x7, xzr
# A[3] * B[0]
mul x25, x20, x8
adds x24, x24, x25
umulh x26, x20, x8
adcs x4, x4, x26
# A[3] * B[2]
mul x25, x20, x10
adcs x5, x5, x25
umulh x26, x20, x10
adcs x6, x6, x26
adc x7, x7, xzr
# Reduce
mov x25, #38
mul x26, x25, x7
adds x24, x24, x26
umulh x27, x25, x7
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x24, #63
mul x27, x27, x25
and x24, x24, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x4
adds x21, x21, x26
umulh x4, x25, x4
mul x26, x25, x5
adcs x22, x22, x26
umulh x5, x25, x5
mul x26, x25, x6
adcs x23, x23, x26
umulh x6, x25, x6
adc x24, x24, xzr
# Add high product results in
adds x21, x21, x27
adcs x22, x22, x4
adcs x23, x23, x5
adc x24, x24, x6
# Reduce if top bit set
mov x25, #19
and x26, x25, x24, asr 63
adds x21, x21, x26
adcs x22, x22, xzr
and x24, x24, #0x7fffffffffffffff
adcs x23, x23, xzr
adc x24, x24, xzr
# Store
stp x21, x22, [x0]
stp x23, x24, [x0, #16]
sub x2, x2, #32
add x1, x0, #32
add x0, x0, #32
# Multiply
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * B[0]
umulh x5, x12, x16
mul x4, x12, x16
# A[2] * B[0]
umulh x7, x14, x16
mul x6, x14, x16
# A[1] * B[0]
mul x25, x13, x16
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
adds x5, x5, x25
umulh x26, x12, x17
adcs x6, x6, x26
# A[2] * B[1]
mul x25, x14, x17
adcs x7, x7, x25
umulh x26, x14, x17
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x13, x19
adds x7, x7, x25
umulh x26, x13, x19
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x12, x19
adds x6, x6, x25
umulh x26, x12, x19
adcs x7, x7, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x13, x17
adds x6, x6, x25
umulh x26, x13, x17
adcs x7, x7, x26
# A[3] * B[1]
mul x25, x15, x17
adcs x8, x8, x25
umulh x26, x15, x17
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x14, x19
adds x8, x8, x25
umulh x26, x14, x19
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x15, x20
adcs x10, x10, x25
umulh x11, x15, x20
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x12, x20
adds x7, x7, x25
umulh x26, x12, x20
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x14, x20
adcs x9, x9, x25
umulh x26, x14, x20
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x15, x16
adds x7, x7, x25
umulh x26, x15, x16
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x15, x19
adcs x9, x9, x25
umulh x26, x15, x19
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x7, x7, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x7, #63
mul x27, x27, x25
and x7, x7, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x4, x4, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x5, x5, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x6, x6, x26
umulh x10, x25, x10
adc x7, x7, xzr
# Add high product results in
adds x4, x4, x27
adcs x5, x5, x8
adcs x6, x6, x9
adc x7, x7, x10
# Store
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
mov x3, x0
sub x2, x0, #32
sub x1, x0, #32
# Add
adds x8, x21, x4
adcs x9, x22, x5
adcs x10, x23, x6
adcs x11, x24, x7
cset x28, cs
mov x25, #19
extr x28, x28, x11, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x8, x8, x25
adcs x9, x9, xzr
and x11, x11, #0x7fffffffffffffff
adcs x10, x10, xzr
adc x11, x11, xzr
# Sub
subs x12, x21, x4
sbcs x13, x22, x5
sbcs x14, x23, x6
sbcs x15, x24, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x15, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x12, x12, x25
sbcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
sbcs x14, x14, xzr
sbc x15, x15, xzr
stp x8, x9, [x0]
stp x10, x11, [x0, #16]
stp x12, x13, [x1]
stp x14, x15, [x1, #16]
ldr x1, [x29, #24]
ldr x2, [x29, #32]
add x2, x2, #0x60
add x1, x1, #0x60
add x0, x0, #0x40
# Multiply
ldp x21, x22, [x1]
ldp x23, x24, [x1, #16]
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
# A[0] * B[0]
umulh x17, x21, x4
mul x16, x21, x4
# A[2] * B[0]
umulh x20, x23, x4
mul x19, x23, x4
# A[1] * B[0]
mul x25, x22, x4
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
adds x17, x17, x25
umulh x26, x21, x5
adcs x19, x19, x26
# A[2] * B[1]
mul x25, x23, x5
adcs x20, x20, x25
umulh x26, x23, x5
adcs x8, x8, x26
adc x9, x9, xzr
# A[1] * B[2]
mul x25, x22, x6
adds x20, x20, x25
umulh x26, x22, x6
adcs x8, x8, x26
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x25, x21, x6
adds x19, x19, x25
umulh x26, x21, x6
adcs x20, x20, x26
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x25, x22, x5
adds x19, x19, x25
umulh x26, x22, x5
adcs x20, x20, x26
# A[3] * B[1]
mul x25, x24, x5
adcs x8, x8, x25
umulh x26, x24, x5
adcs x9, x9, x26
adc x10, x10, xzr
# A[2] * B[2]
mul x25, x23, x6
adds x8, x8, x25
umulh x26, x23, x6
adcs x9, x9, x26
# A[3] * B[3]
mul x25, x24, x7
adcs x10, x10, x25
umulh x11, x24, x7
adc x11, x11, xzr
# A[0] * B[3]
mul x25, x21, x7
adds x20, x20, x25
umulh x26, x21, x7
adcs x8, x8, x26
# A[2] * B[3]
mul x25, x23, x7
adcs x9, x9, x25
umulh x26, x23, x7
adcs x10, x10, x26
adc x11, x11, xzr
# A[3] * B[0]
mul x25, x24, x4
adds x20, x20, x25
umulh x26, x24, x4
adcs x8, x8, x26
# A[3] * B[2]
mul x25, x24, x6
adcs x9, x9, x25
umulh x26, x24, x6
adcs x10, x10, x26
adc x11, x11, xzr
# Reduce
mov x25, #38
mul x26, x25, x11
adds x20, x20, x26
umulh x27, x25, x11
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x20, #63
mul x27, x27, x25
and x20, x20, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x8
adds x16, x16, x26
umulh x8, x25, x8
mul x26, x25, x9
adcs x17, x17, x26
umulh x9, x25, x9
mul x26, x25, x10
adcs x19, x19, x26
umulh x10, x25, x10
adc x20, x20, xzr
# Add high product results in
adds x16, x16, x27
adcs x17, x17, x8
adcs x19, x19, x9
adc x20, x20, x10
# Reduce if top bit set
mov x25, #19
and x26, x25, x20, asr 63
adds x16, x16, x26
adcs x17, x17, xzr
and x20, x20, #0x7fffffffffffffff
adcs x19, x19, xzr
adc x20, x20, xzr
# Store
stp x16, x17, [x0]
stp x19, x20, [x0, #16]
sub x3, x2, #32
sub x2, x1, #32
sub x1, x0, #32
# Multiply
ldp x4, x5, [x2]
ldp x6, x7, [x2, #16]
ldp x12, x13, [x3]
ldp x14, x15, [x3, #16]
# A[0] * B[0]
umulh x9, x4, x12
mul x8, x4, x12
# A[2] * B[0]
umulh x11, x6, x12
mul x10, x6, x12
# A[1] * B[0]
mul x25, x5, x12
adds x9, x9, x25
umulh x26, x5, x12
adcs x10, x10, x26
# A[1] * B[3]
umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15
# A[0] * B[1]
mul x25, x4, x13
adds x9, x9, x25
umulh x26, x4, x13
adcs x10, x10, x26
# A[2] * B[1]
mul x25, x6, x13
adcs x11, x11, x25
umulh x26, x6, x13
adcs x16, x16, x26
adc x17, x17, xzr
# A[1] * B[2]
mul x25, x5, x14
adds x11, x11, x25
umulh x26, x5, x14
adcs x16, x16, x26
adcs x17, x17, xzr
adc x19, xzr, xzr
# A[0] * B[2]
mul x25, x4, x14
adds x10, x10, x25
umulh x26, x4, x14
adcs x11, x11, x26
adcs x16, x16, xzr
adcs x17, x17, xzr
adc x19, x19, xzr
# A[1] * B[1]
mul x25, x5, x13
adds x10, x10, x25
umulh x26, x5, x13
adcs x11, x11, x26
# A[3] * B[1]
mul x25, x7, x13
adcs x16, x16, x25
umulh x26, x7, x13
adcs x17, x17, x26
adc x19, x19, xzr
# A[2] * B[2]
mul x25, x6, x14
adds x16, x16, x25
umulh x26, x6, x14
adcs x17, x17, x26
# A[3] * B[3]
mul x25, x7, x15
adcs x19, x19, x25
umulh x20, x7, x15
adc x20, x20, xzr
# A[0] * B[3]
mul x25, x4, x15
adds x11, x11, x25
umulh x26, x4, x15
adcs x16, x16, x26
# A[2] * B[3]
mul x25, x6, x15
adcs x17, x17, x25
umulh x26, x6, x15
adcs x19, x19, x26
adc x20, x20, xzr
# A[3] * B[0]
mul x25, x7, x12
adds x11, x11, x25
umulh x26, x7, x12
adcs x16, x16, x26
# A[3] * B[2]
mul x25, x7, x14
adcs x17, x17, x25
umulh x26, x7, x14
adcs x19, x19, x26
adc x20, x20, xzr
# Reduce
mov x25, #38
mul x26, x25, x20
adds x11, x11, x26
umulh x27, x25, x20
adc x27, x27, xzr
mov x25, #19
extr x27, x27, x11, #63
mul x27, x27, x25
and x11, x11, #0x7fffffffffffffff
mov x25, #38
mul x26, x25, x16
adds x8, x8, x26
umulh x16, x25, x16
mul x26, x25, x17
adcs x9, x9, x26
umulh x17, x25, x17
mul x26, x25, x19
adcs x10, x10, x26
umulh x19, x25, x19
adc x11, x11, xzr
# Add high product results in
adds x8, x8, x27
adcs x9, x9, x16
adcs x10, x10, x17
adc x11, x11, x19
# Double
adds x8, x8, x8
adcs x9, x9, x9
adcs x10, x10, x10
adc x11, x11, x11
mov x25, #-19
asr x28, x11, #63
# Mask the modulus
and x25, x28, x25
and x26, x28, #0x7fffffffffffffff
# Sub modulus (if overflow)
subs x8, x8, x25
sbcs x9, x9, x28
sbcs x10, x10, x28
sbc x11, x11, x26
mov x3, x0
sub x2, x0, #32
# Add
ldp x4, x5, [x3]
ldp x6, x7, [x3, #16]
adds x12, x8, x4
adcs x13, x9, x5
adcs x14, x10, x6
adcs x15, x11, x7
cset x28, cs
mov x25, #19
extr x28, x28, x15, #63
mul x25, x28, x25
# Sub modulus (if overflow)
adds x12, x12, x25
adcs x13, x13, xzr
and x15, x15, #0x7fffffffffffffff
adcs x14, x14, xzr
adc x15, x15, xzr
# Sub
subs x21, x8, x4
sbcs x22, x9, x5
sbcs x23, x10, x6
sbcs x24, x11, x7
csetm x28, cc
mov x25, #-19
extr x28, x28, x24, #63
mul x25, x28, x25
# Add modulus (if underflow)
subs x21, x21, x25
sbcs x22, x22, xzr
and x24, x24, #0x7fffffffffffffff
sbcs x23, x23, xzr
sbc x24, x24, xzr
stp x12, x13, [x0]
stp x14, x15, [x0, #16]
stp x21, x22, [x1]
stp x23, x24, [x1, #16]
ldr x17, [x29, #56]
ldr x19, [x29, #64]
ldp x20, x21, [x29, #72]
ldp x22, x23, [x29, #88]
ldp x24, x25, [x29, #104]
ldp x26, x27, [x29, #120]
ldr x28, [x29, #136]
ldp x29, x30, [sp], #0x90
ret
#ifndef __APPLE__
.size ge_sub,.-ge_sub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_reduce
.type sc_reduce,@function
.align 2
sc_reduce:
#else
.section __TEXT,__text
.globl _sc_reduce
.p2align 2
_sc_reduce:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-64]!
add x29, sp, #0
str x17, [x29, #16]
str x19, [x29, #24]
stp x20, x21, [x29, #32]
stp x22, x23, [x29, #48]
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
ldp x6, x7, [x0, #32]
ldp x8, x9, [x0, #48]
lsr x23, x9, #56
lsl x9, x9, #4
orr x9, x9, x8, lsr 60
lsl x8, x8, #4
orr x8, x8, x7, lsr 60
lsl x7, x7, #4
orr x7, x7, x6, lsr 60
lsl x6, x6, #4
mov x1, #15
orr x6, x6, x5, lsr 60
bic x5, x5, x1, lsl 60
bic x9, x9, x1, lsl 60
# Add order times bits 504..511
mov x11, #0x2c13
movk x11, #0xa30a, lsl 16
movk x11, #0x9ce5, lsl 32
movk x11, #0xa7ed, lsl 48
mov x13, #0x6329
movk x13, #0x5d08, lsl 16
movk x13, #0x621, lsl 32
movk x13, #0xeb21, lsl 48
mul x10, x23, x11
umulh x11, x23, x11
mul x12, x23, x13
umulh x13, x23, x13
adds x6, x6, x10
adcs x7, x7, x11
adcs x8, x8, xzr
adc x9, x9, xzr
adds x7, x7, x12
adcs x8, x8, x13
adc x9, x9, xzr
subs x8, x8, x23
sbc x9, x9, xzr
# Sub product of top 4 words and order
mov x1, #0x2c13
movk x1, #0xa30a, lsl 16
movk x1, #0x9ce5, lsl 32
movk x1, #0xa7ed, lsl 48
mul x10, x6, x1
umulh x11, x6, x1
mul x12, x7, x1
umulh x13, x7, x1
mul x14, x8, x1
umulh x15, x8, x1
mul x16, x9, x1
umulh x17, x9, x1
adds x2, x2, x10
adcs x3, x3, x11
adcs x4, x4, x14
adcs x5, x5, x15
adc x19, xzr, xzr
adds x3, x3, x12
adcs x4, x4, x13
adcs x5, x5, x16
adc x19, x19, x17
mov x1, #0x6329
movk x1, #0x5d08, lsl 16
movk x1, #0x621, lsl 32
movk x1, #0xeb21, lsl 48
mul x10, x6, x1
umulh x11, x6, x1
mul x12, x7, x1
umulh x13, x7, x1
mul x14, x8, x1
umulh x15, x8, x1
mul x16, x9, x1
umulh x17, x9, x1
adds x3, x3, x10
adcs x4, x4, x11
adcs x5, x5, x14
adcs x19, x19, x15
adc x20, xzr, xzr
adds x4, x4, x12
adcs x5, x5, x13
adcs x19, x19, x16
adc x20, x20, x17
subs x4, x4, x6
sbcs x5, x5, x7
sbcs x6, x19, x8
sbc x7, x20, x9
asr x23, x7, #57
# Conditionally subtract order starting at bit 125
mov x10, xzr
mov x13, xzr
mov x11, #0xba7d
movk x11, #0x4b9e, lsl 16
movk x11, #0x4c63, lsl 32
movk x11, #0xcb02, lsl 48
mov x12, #0xf39a
movk x12, #0xd45e, lsl 16
movk x12, #0xdf3b, lsl 32
movk x12, #0x29b, lsl 48
movk x10, #0xa000, lsl 48
movk x13, #0x200, lsl 48
and x10, x10, x23
and x11, x11, x23
and x12, x12, x23
and x13, x13, x23
adds x3, x3, x10
adcs x4, x4, x11
adcs x5, x5, x12
adcs x6, x6, xzr
adc x7, x7, x13
# Move bits 252-376 to own registers
lsl x7, x7, #4
orr x7, x7, x6, lsr 60
lsl x6, x6, #4
mov x23, #15
orr x6, x6, x5, lsr 60
bic x5, x5, x23, lsl 60
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
mov x1, #0x2c13
movk x1, #0xa30a, lsl 16
movk x1, #0x9ce5, lsl 32
movk x1, #0xa7ed, lsl 48
mul x10, x6, x1
umulh x11, x6, x1
mul x12, x7, x1
umulh x13, x7, x1
adds x2, x2, x10
adcs x3, x3, x11
adc x19, xzr, xzr
adds x3, x3, x12
adc x19, x19, x13
# * -14def9dea2f79cd7
mov x1, #0x6329
movk x1, #0x5d08, lsl 16
movk x1, #0x621, lsl 32
movk x1, #0xeb21, lsl 48
mul x10, x6, x1
umulh x11, x6, x1
mul x12, x7, x1
umulh x13, x7, x1
adds x3, x3, x10
adcs x4, x4, x11
adc x20, xzr, xzr
adds x4, x4, x12
adc x20, x20, x13
# Add overflows at 2 * 64
mov x1, #15
bic x5, x5, x1, lsl 60
adds x4, x4, x19
adc x5, x5, x20
# Subtract top at 2 * 64
subs x4, x4, x6
sbcs x5, x5, x7
sbc x1, x1, x1
# Conditional sub order
mov x10, #0xd3ed
movk x10, #0x5cf5, lsl 16
movk x10, #0x631a, lsl 32
movk x10, #0x5812, lsl 48
mov x11, #0x9cd6
movk x11, #0xa2f7, lsl 16
movk x11, #0xf9de, lsl 32
movk x11, #0x14de, lsl 48
and x10, x10, x1
and x11, x11, x1
adds x2, x2, x10
adcs x3, x3, x11
and x1, x1, #0x1000000000000000
adcs x4, x4, xzr
mov x23, #15
adc x5, x5, x1
bic x5, x5, x23, lsl 60
# Store result
stp x2, x3, [x0]
stp x4, x5, [x0, #16]
ldr x17, [x29, #16]
ldr x19, [x29, #24]
ldp x20, x21, [x29, #32]
ldp x22, x23, [x29, #48]
ldp x29, x30, [sp], #0x40
ret
#ifndef __APPLE__
.size sc_reduce,.-sc_reduce
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_muladd
.type sc_muladd,@function
.align 2
sc_muladd:
#else
.section __TEXT,__text
.globl _sc_muladd
.p2align 2
_sc_muladd:
#endif /* __APPLE__ */
stp x29, x30, [sp, #-96]!
add x29, sp, #0
str x17, [x29, #24]
str x19, [x29, #32]
stp x20, x21, [x29, #40]
stp x22, x23, [x29, #56]
stp x24, x25, [x29, #72]
str x26, [x29, #88]
# Multiply
ldp x12, x13, [x1]
ldp x14, x15, [x1, #16]
ldp x16, x17, [x2]
ldp x19, x20, [x2, #16]
# A[0] * B[0]
umulh x5, x12, x16
mul x4, x12, x16
# A[2] * B[0]
umulh x7, x14, x16
mul x6, x14, x16
# A[1] * B[0]
mul x21, x13, x16
adds x5, x5, x21
umulh x22, x13, x16
adcs x6, x6, x22
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x21, x12, x17
adds x5, x5, x21
umulh x22, x12, x17
adcs x6, x6, x22
# A[2] * B[1]
mul x21, x14, x17
adcs x7, x7, x21
umulh x22, x14, x17
adcs x8, x8, x22
adc x9, x9, xzr
# A[1] * B[2]
mul x21, x13, x19
adds x7, x7, x21
umulh x22, x13, x19
adcs x8, x8, x22
adcs x9, x9, xzr
adc x10, xzr, xzr
# A[0] * B[2]
mul x21, x12, x19
adds x6, x6, x21
umulh x22, x12, x19
adcs x7, x7, x22
adcs x8, x8, xzr
adcs x9, x9, xzr
adc x10, x10, xzr
# A[1] * B[1]
mul x21, x13, x17
adds x6, x6, x21
umulh x22, x13, x17
adcs x7, x7, x22
# A[3] * B[1]
mul x21, x15, x17
adcs x8, x8, x21
umulh x22, x15, x17
adcs x9, x9, x22
adc x10, x10, xzr
# A[2] * B[2]
mul x21, x14, x19
adds x8, x8, x21
umulh x22, x14, x19
adcs x9, x9, x22
# A[3] * B[3]
mul x21, x15, x20
adcs x10, x10, x21
umulh x11, x15, x20
adc x11, x11, xzr
# A[0] * B[3]
mul x21, x12, x20
adds x7, x7, x21
umulh x22, x12, x20
adcs x8, x8, x22
# A[2] * B[3]
mul x21, x14, x20
adcs x9, x9, x21
umulh x22, x14, x20
adcs x10, x10, x22
adc x11, x11, xzr
# A[3] * B[0]
mul x21, x15, x16
adds x7, x7, x21
umulh x22, x15, x16
adcs x8, x8, x22
# A[3] * B[2]
mul x21, x15, x19
adcs x9, x9, x21
umulh x22, x15, x19
adcs x10, x10, x22
adc x11, x11, xzr
# Add c to a * b
ldp x12, x13, [x3]
ldp x14, x15, [x3, #16]
adds x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
adcs x7, x7, x15
adcs x8, x8, xzr
adcs x9, x9, xzr
adcs x10, x10, xzr
adc x11, x11, xzr
lsr x25, x11, #56
lsl x11, x11, #4
orr x11, x11, x10, lsr 60
lsl x10, x10, #4
orr x10, x10, x9, lsr 60
lsl x9, x9, #4
orr x9, x9, x8, lsr 60
lsl x8, x8, #4
mov x26, #15
orr x8, x8, x7, lsr 60
bic x7, x7, x26, lsl 60
bic x11, x11, x26, lsl 60
# Add order times bits 504..507
mov x22, #0x2c13
movk x22, #0xa30a, lsl 16
movk x22, #0x9ce5, lsl 32
movk x22, #0xa7ed, lsl 48
mov x24, #0x6329
movk x24, #0x5d08, lsl 16
movk x24, #0x621, lsl 32
movk x24, #0xeb21, lsl 48
mul x21, x25, x22
umulh x22, x25, x22
mul x23, x25, x24
umulh x24, x25, x24
adds x8, x8, x21
adcs x9, x9, x22
adcs x10, x10, xzr
adc x11, x11, xzr
adds x9, x9, x23
adcs x10, x10, x24
adc x11, x11, xzr
subs x10, x10, x25
sbc x11, x11, xzr
# Sub product of top 4 words and order
mov x26, #0x2c13
movk x26, #0xa30a, lsl 16
movk x26, #0x9ce5, lsl 32
movk x26, #0xa7ed, lsl 48
mul x16, x8, x26
umulh x17, x8, x26
mul x19, x9, x26
umulh x20, x9, x26
mul x21, x10, x26
umulh x22, x10, x26
mul x23, x11, x26
umulh x24, x11, x26
adds x4, x4, x16
adcs x5, x5, x17
adcs x6, x6, x21
adcs x7, x7, x22
adc x12, xzr, xzr
adds x5, x5, x19
adcs x6, x6, x20
adcs x7, x7, x23
adc x12, x12, x24
mov x26, #0x6329
movk x26, #0x5d08, lsl 16
movk x26, #0x621, lsl 32
movk x26, #0xeb21, lsl 48
mul x16, x8, x26
umulh x17, x8, x26
mul x19, x9, x26
umulh x20, x9, x26
mul x21, x10, x26
umulh x22, x10, x26
mul x23, x11, x26
umulh x24, x11, x26
adds x5, x5, x16
adcs x6, x6, x17
adcs x7, x7, x21
adcs x12, x12, x22
adc x13, xzr, xzr
adds x6, x6, x19
adcs x7, x7, x20
adcs x12, x12, x23
adc x13, x13, x24
subs x6, x6, x8
sbcs x7, x7, x9
sbcs x8, x12, x10
sbc x9, x13, x11
asr x25, x9, #57
# Conditionally subtract order starting at bit 125
mov x16, xzr
mov x20, xzr
mov x17, #0xba7d
movk x17, #0x4b9e, lsl 16
movk x17, #0x4c63, lsl 32
movk x17, #0xcb02, lsl 48
mov x19, #0xf39a
movk x19, #0xd45e, lsl 16
movk x19, #0xdf3b, lsl 32
movk x19, #0x29b, lsl 48
movk x16, #0xa000, lsl 48
movk x20, #0x200, lsl 48
and x16, x16, x25
and x17, x17, x25
and x19, x19, x25
and x20, x20, x25
adds x5, x5, x16
adcs x6, x6, x17
adcs x7, x7, x19
adcs x8, x8, xzr
adc x9, x9, x20
# Move bits 252-376 to own registers
lsl x9, x9, #4
orr x9, x9, x8, lsr 60
lsl x8, x8, #4
mov x25, #15
orr x8, x8, x7, lsr 60
bic x7, x7, x25, lsl 60
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
mov x26, #0x2c13
movk x26, #0xa30a, lsl 16
movk x26, #0x9ce5, lsl 32
movk x26, #0xa7ed, lsl 48
mul x16, x8, x26
umulh x17, x8, x26
mul x19, x9, x26
umulh x20, x9, x26
adds x4, x4, x16
adcs x5, x5, x17
adc x12, xzr, xzr
adds x5, x5, x19
adc x12, x12, x20
# * -14def9dea2f79cd7
mov x26, #0x6329
movk x26, #0x5d08, lsl 16
movk x26, #0x621, lsl 32
movk x26, #0xeb21, lsl 48
mul x16, x8, x26
umulh x17, x8, x26
mul x19, x9, x26
umulh x20, x9, x26
adds x5, x5, x16
adcs x6, x6, x17
adc x13, xzr, xzr
adds x6, x6, x19
adc x13, x13, x20
# Add overflows at 2 * 64
mov x26, #15
bic x7, x7, x26, lsl 60
adds x6, x6, x12
adc x7, x7, x13
# Subtract top at 2 * 64
subs x6, x6, x8
sbcs x7, x7, x9
sbc x26, x26, x26
# Conditional sub order
mov x16, #0xd3ed
movk x16, #0x5cf5, lsl 16
movk x16, #0x631a, lsl 32
movk x16, #0x5812, lsl 48
mov x17, #0x9cd6
movk x17, #0xa2f7, lsl 16
movk x17, #0xf9de, lsl 32
movk x17, #0x14de, lsl 48
and x16, x16, x26
and x17, x17, x26
adds x4, x4, x16
adcs x5, x5, x17
and x26, x26, #0x1000000000000000
adcs x6, x6, xzr
mov x25, #15
adc x7, x7, x26
bic x7, x7, x25, lsl 60
# Store result
stp x4, x5, [x0]
stp x6, x7, [x0, #16]
ldr x17, [x29, #24]
ldr x19, [x29, #32]
ldp x20, x21, [x29, #40]
ldp x22, x23, [x29, #56]
ldp x24, x25, [x29, #72]
ldr x26, [x29, #88]
ldp x29, x30, [sp], #0x60
ret
#ifndef __APPLE__
.size sc_muladd,.-sc_muladd
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#endif /* !CURVE25519_SMALL || !ED25519_SMALL */
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
#endif /* !WOLFSSL_ARMASM_INLINE */