Source code

Revision control

Copy as Markdown

Other Tools

/*
* mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mpi-priv.h"
static int is_sse = -1;
extern unsigned long s_mpi_is_sse2();
/*
* ebp - 36: caller's esi
* ebp - 32: caller's edi
* ebp - 28:
* ebp - 24:
* ebp - 20:
* ebp - 16:
* ebp - 12:
* ebp - 8:
* ebp - 4:
* ebp + 0: caller's ebp
* ebp + 4: return address
* ebp + 8: a argument
* ebp + 12: a_len argument
* ebp + 16: b argument
* ebp + 20: c argument
* registers:
* eax:
* ebx: carry
* ecx: a_len
* edx:
* esi: a ptr
* edi: c ptr
*/
__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
__asm {
mov eax, is_sse
cmp eax, 0
je s_mpv_mul_d_x86
jg s_mpv_mul_d_sse2
call s_mpi_is_sse2
mov is_sse, eax
cmp eax, 0
jg s_mpv_mul_d_sse2
s_mpv_mul_d_x86:
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_2 ; jmp if a_len == 0
mov esi,[ebp+8] ; esi = a
cld
L_1:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_1 ; jmp if a_len != 0
L_2:
mov [edi],ebx ; *c = carry
pop ebx
pop esi
pop edi
leave
ret
nop
s_mpv_mul_d_sse2:
push ebp
mov ebp, esp
push edi
push esi
psubq mm2, mm2 ; carry = 0
mov ecx, [ebp+12] ; ecx = a_len
movd mm1, [ebp+16] ; mm1 = b
mov edi, [ebp+20]
cmp ecx, 0
je L_6 ; jmp if a_len == 0
mov esi, [ebp+8] ; esi = a
cld
L_5:
movd mm0, [esi] ; mm0 = *a++
add esi, 4
pmuludq mm0, mm1 ; mm0 = b * *a++
paddq mm2, mm0 ; add the carry
movd [edi], mm2 ; store the 32bit result
add edi, 4
psrlq mm2, 32 ; save the carry
dec ecx ; --a_len
jnz L_5 ; jmp if a_len != 0
L_6:
movd [edi], mm2 ; *c = carry
emms
pop esi
pop edi
leave
ret
nop
}
}
/*
* ebp - 36: caller's esi
* ebp - 32: caller's edi
* ebp - 28:
* ebp - 24:
* ebp - 20:
* ebp - 16:
* ebp - 12:
* ebp - 8:
* ebp - 4:
* ebp + 0: caller's ebp
* ebp + 4: return address
* ebp + 8: a argument
* ebp + 12: a_len argument
* ebp + 16: b argument
* ebp + 20: c argument
* registers:
* eax:
* ebx: carry
* ecx: a_len
* edx:
* esi: a ptr
* edi: c ptr
*/
__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
__asm {
mov eax, is_sse
cmp eax, 0
je s_mpv_mul_d_add_x86
jg s_mpv_mul_d_add_sse2
call s_mpi_is_sse2
mov is_sse, eax
cmp eax, 0
jg s_mpv_mul_d_add_sse2
s_mpv_mul_d_add_x86:
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_11 ; jmp if a_len == 0
mov esi,[ebp+8] ; esi = a
cld
L_10:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,[edi] ; add in current word from *c
add eax,ebx
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_10 ; jmp if a_len != 0
L_11:
mov [edi],ebx ; *c = carry
pop ebx
pop esi
pop edi
leave
ret
nop
s_mpv_mul_d_add_sse2:
push ebp
mov ebp, esp
push edi
push esi
psubq mm2, mm2 ; carry = 0
mov ecx, [ebp+12] ; ecx = a_len
movd mm1, [ebp+16] ; mm1 = b
mov edi, [ebp+20]
cmp ecx, 0
je L_16 ; jmp if a_len == 0
mov esi, [ebp+8] ; esi = a
cld
L_15:
movd mm0, [esi] ; mm0 = *a++
add esi, 4
pmuludq mm0, mm1 ; mm0 = b * *a++
paddq mm2, mm0 ; add the carry
movd mm0, [edi]
paddq mm2, mm0 ; add the carry
movd [edi], mm2 ; store the 32bit result
add edi, 4
psrlq mm2, 32 ; save the carry
dec ecx ; --a_len
jnz L_15 ; jmp if a_len != 0
L_16:
movd [edi], mm2 ; *c = carry
emms
pop esi
pop edi
leave
ret
nop
}
}
/*
* ebp - 36: caller's esi
* ebp - 32: caller's edi
* ebp - 28:
* ebp - 24:
* ebp - 20:
* ebp - 16:
* ebp - 12:
* ebp - 8:
* ebp - 4:
* ebp + 0: caller's ebp
* ebp + 4: return address
* ebp + 8: a argument
* ebp + 12: a_len argument
* ebp + 16: b argument
* ebp + 20: c argument
* registers:
* eax:
* ebx: carry
* ecx: a_len
* edx:
* esi: a ptr
* edi: c ptr
*/
__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
__asm {
mov eax, is_sse
cmp eax, 0
je s_mpv_mul_d_add_prop_x86
jg s_mpv_mul_d_add_prop_sse2
call s_mpi_is_sse2
mov is_sse, eax
cmp eax, 0
jg s_mpv_mul_d_add_prop_sse2
s_mpv_mul_d_add_prop_x86:
push ebp
mov ebp,esp
sub esp,28
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; ecx = a_len
mov edi,[ebp+20]
cmp ecx,0
je L_21 ; jmp if a_len == 0
cld
mov esi,[ebp+8] ; esi = a
L_20:
lodsd ; eax = [ds:esi]; esi += 4
mov edx,[ebp+16] ; edx = b
mul edx ; edx:eax = Phi:Plo = a_i * b
add eax,ebx ; add carry (ebx) to edx:eax
adc edx,0
mov ebx,[edi] ; add in current word from *c
add eax,ebx
adc edx,0
mov ebx,edx ; high half of product becomes next carry
stosd ; [es:edi] = ax; edi += 4;
dec ecx ; --a_len
jnz L_20 ; jmp if a_len != 0
L_21:
cmp ebx,0 ; is carry zero?
jz L_23
mov eax,[edi] ; add in current word from *c
add eax,ebx
stosd ; [es:edi] = ax; edi += 4;
jnc L_23
L_22:
mov eax,[edi] ; add in current word from *c
adc eax,0
stosd ; [es:edi] = ax; edi += 4;
jc L_22
L_23:
pop ebx
pop esi
pop edi
leave
ret
nop
s_mpv_mul_d_add_prop_sse2:
push ebp
mov ebp, esp
push edi
push esi
push ebx
psubq mm2, mm2 ; carry = 0
mov ecx, [ebp+12] ; ecx = a_len
movd mm1, [ebp+16] ; mm1 = b
mov edi, [ebp+20]
cmp ecx, 0
je L_26 ; jmp if a_len == 0
mov esi, [ebp+8] ; esi = a
cld
L_25:
movd mm0, [esi] ; mm0 = *a++
movd mm3, [edi] ; fetch the sum
add esi, 4
pmuludq mm0, mm1 ; mm0 = b * *a++
paddq mm2, mm0 ; add the carry
paddq mm2, mm3 ; add *c++
movd [edi], mm2 ; store the 32bit result
add edi, 4
psrlq mm2, 32 ; save the carry
dec ecx ; --a_len
jnz L_25 ; jmp if a_len != 0
L_26:
movd ebx, mm2
cmp ebx, 0 ; is carry zero?
jz L_28
mov eax, [edi]
add eax, ebx
stosd
jnc L_28
L_27:
mov eax, [edi] ; add in current word from *c
adc eax, 0
stosd ; [es:edi] = ax; edi += 4;
jc L_27
L_28:
emms
pop ebx
pop esi
pop edi
leave
ret
nop
}
}
/*
* ebp - 20: caller's esi
* ebp - 16: caller's edi
* ebp - 12:
* ebp - 8: carry
* ebp - 4: a_len local
* ebp + 0: caller's ebp
* ebp + 4: return address
* ebp + 8: pa argument
* ebp + 12: a_len argument
* ebp + 16: ps argument
* ebp + 20:
* registers:
* eax:
* ebx: carry
* ecx: a_len
* edx:
* esi: a ptr
* edi: c ptr
*/
__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
{
__asm {
mov eax, is_sse
cmp eax, 0
je s_mpv_sqr_add_prop_x86
jg s_mpv_sqr_add_prop_sse2
call s_mpi_is_sse2
mov is_sse, eax
cmp eax, 0
jg s_mpv_sqr_add_prop_sse2
s_mpv_sqr_add_prop_x86:
push ebp
mov ebp,esp
sub esp,12
push edi
push esi
push ebx
mov ebx,0 ; carry = 0
mov ecx,[ebp+12] ; a_len
mov edi,[ebp+16] ; edi = ps
cmp ecx,0
je L_31 ; jump if a_len == 0
cld
mov esi,[ebp+8] ; esi = pa
L_30:
lodsd ; eax = [ds:si]; si += 4;
mul eax
add eax,ebx ; add "carry"
adc edx,0
mov ebx,[edi]
add eax,ebx ; add low word from result
mov ebx,[edi+4]
stosd ; [es:di] = eax; di += 4;
adc edx,ebx ; add high word from result
mov ebx,0
mov eax,edx
adc ebx,0
stosd ; [es:di] = eax; di += 4;
dec ecx ; --a_len
jnz L_30 ; jmp if a_len != 0
L_31:
cmp ebx,0 ; is carry zero?
jz L_34
mov eax,[edi] ; add in current word from *c
add eax,ebx
stosd ; [es:edi] = ax; edi += 4;
jnc L_34
L_32:
mov eax,[edi] ; add in current word from *c
adc eax,0
stosd ; [es:edi] = ax; edi += 4;
jc L_32
L_34:
pop ebx
pop esi
pop edi
leave
ret
nop
s_mpv_sqr_add_prop_sse2:
push ebp
mov ebp, esp
push edi
push esi
push ebx
psubq mm2, mm2 ; carry = 0
mov ecx, [ebp+12] ; ecx = a_len
mov edi, [ebp+16]
cmp ecx, 0
je L_36 ; jmp if a_len == 0
mov esi, [ebp+8] ; esi = a
cld
L_35:
movd mm0, [esi] ; mm0 = *a
movd mm3, [edi] ; fetch the sum
add esi, 4
pmuludq mm0, mm0 ; mm0 = sqr(a)
paddq mm2, mm0 ; add the carry
paddq mm2, mm3 ; add the low word
movd mm3, [edi+4]
movd [edi], mm2 ; store the 32bit result
psrlq mm2, 32
paddq mm2, mm3 ; add the high word
movd [edi+4], mm2 ; store the 32bit result
psrlq mm2, 32 ; save the carry.
add edi, 8
dec ecx ; --a_len
jnz L_35 ; jmp if a_len != 0
L_36:
movd ebx, mm2
cmp ebx, 0 ; is carry zero?
jz L_38
mov eax, [edi]
add eax, ebx
stosd
jnc L_38
L_37:
mov eax, [edi] ; add in current word from *c
adc eax, 0
stosd ; [es:edi] = ax; edi += 4;
jc L_37
L_38:
emms
pop ebx
pop esi
pop edi
leave
ret
nop
}
}
/*
* Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
* so its high bit is 1. This code is from NSPR.
*
* Dump of assembler code for function s_mpv_div_2dx1d:
*
* esp + 0: Caller's ebx
* esp + 4: return address
* esp + 8: Nhi argument
* esp + 12: Nlo argument
* esp + 16: divisor argument
* esp + 20: qp argument
* esp + 24: rp argument
* registers:
* eax:
* ebx: carry
* ecx: a_len
* edx:
* esi: a ptr
* edi: c ptr
*/
__declspec(naked) mp_err
s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
mp_digit *qp, mp_digit *rp)
{
__asm {
push ebx
mov edx,[esp+8]
mov eax,[esp+12]
mov ebx,[esp+16]
div ebx
mov ebx,[esp+20]
mov [ebx],eax
mov ebx,[esp+24]
mov [ebx],edx
xor eax,eax ; return zero
pop ebx
ret
nop
}
}