mpi_x86.s - mozsearch

mozilla-central/security/nss/lib/freebl/mpi/mpi_x86.s

Enable keyboard shortcuts

Source code

File a bug in NSS :: Libraries

Revision control

Copy as Markdown

Other Tools

# This Source Code Form is subject to the terms of the Mozilla Public

# License, v. 2.0. If a copy of the MPL was not distributed with this

# file, You can obtain one at http://mozilla.org/MPL/2.0/.

.data

.align 4

 # -1 means to call s_mpi_is_sse to determine if we support sse

 #    instructions.

 #  0 means to use x86 instructions

 #  1 means to use sse2 instructions

.type	is_sse,@object

.size	is_sse,4

is_sse: .long	-1

# sigh, handle the difference between -fPIC and not PIC

# default to pic, since this file seems to be exclusively

# linux right now (solaris uses mpi_i86pc.s and windows uses

# mpi_x86_asm.c)

.ifndef NO_PIC

.macro GET   var,reg

    movl   \var@GOTOFF(%ebx),\reg

.endm

.macro PUT   reg,var

    movl   \reg,\var@GOTOFF(%ebx)

.endm

.else

.macro GET   var,reg

    movl   \var,\reg

.endm

.macro PUT   reg,var

    movl   \reg,\var

.endm

.endif

.text

 #  ebp - 36:	caller's esi

 #  ebp - 32:	caller's edi

 #  ebp - 28:

 #  ebp - 24:

 #  ebp - 20:

 #  ebp - 16:

 #  ebp - 12:

 #  ebp - 8:

 #  ebp - 4:

 #  ebp + 0:	caller's ebp

 #  ebp + 4:	return address

 #  ebp + 8:	a	argument

 #  ebp + 12:	a_len	argument

 #  ebp + 16:	b	argument

 #  ebp + 20:	c	argument

 #  registers:

 # 	eax:

 #	ebx:	carry

 #	ecx:	a_len

 #	edx:

 #	esi:	a ptr

 #	edi:	c ptr

.globl	s_mpv_mul_d

.type	s_mpv_mul_d,@function

s_mpv_mul_d:

    GET    is_sse,%eax

    cmp    $0,%eax

    je     s_mpv_mul_d_x86

    jg     s_mpv_mul_d_sse2

    call   s_mpi_is_sse2

    PUT    %eax,is_sse

    cmp    $0,%eax

    jg     s_mpv_mul_d_sse2

s_mpv_mul_d_x86:

    push   %ebp

    mov    %esp,%ebp

    sub    $28,%esp

    push   %edi

    push   %esi

    push   %ebx

    movl   $0,%ebx		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     2f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

1:

    lodsl			# eax = [ds:esi]; esi += 4

    mov    16(%ebp),%edx	# edx = b

    mull   %edx			# edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax		# add carry (%ebx) to edx:eax

    adc    $0,%edx

    mov    %edx,%ebx		# high half of product becomes next carry

    stosl			# [es:edi] = ax; edi += 4;

    dec    %ecx			# --a_len

    jnz    1b			# jmp if a_len != 0

2:

    mov    %ebx,0(%edi)		# *c = carry

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

s_mpv_mul_d_sse2:

    push   %ebp

    mov    %esp,%ebp

    push   %edi

    push   %esi

    psubq  %mm2,%mm2		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    movd   16(%ebp),%mm1	# mm1 = b

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     6f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

5:

    movd   0(%esi),%mm0         # mm0 = *a++

    add    $4,%esi

    pmuludq %mm1,%mm0           # mm0 = b * *a++

    paddq  %mm0,%mm2            # add the carry

    movd   %mm2,0(%edi)         # store the 32bit result

    add    $4,%edi

    psrlq  $32, %mm2		# save the carry

    dec    %ecx			# --a_len

    jnz    5b			# jmp if a_len != 0

6:

    movd   %mm2,0(%edi)		# *c = carry

    emms

    pop    %esi

    pop    %edi

    leave

ret

nop

 #  ebp - 36:	caller's esi

 #  ebp - 32:	caller's edi

 #  ebp - 28:

 #  ebp - 24:

 #  ebp - 20:

 #  ebp - 16:

 #  ebp - 12:

 #  ebp - 8:

 #  ebp - 4:

 #  ebp + 0:	caller's ebp

 #  ebp + 4:	return address

 #  ebp + 8:	a	argument

 #  ebp + 12:	a_len	argument

 #  ebp + 16:	b	argument

 #  ebp + 20:	c	argument

 #  registers:

 # 	eax:

 #	ebx:	carry

 #	ecx:	a_len

 #	edx:

 #	esi:	a ptr

 #	edi:	c ptr

.globl	s_mpv_mul_d_add

.type	s_mpv_mul_d_add,@function

s_mpv_mul_d_add:

    GET    is_sse,%eax

    cmp    $0,%eax

    je     s_mpv_mul_d_add_x86

    jg     s_mpv_mul_d_add_sse2

    call   s_mpi_is_sse2

    PUT    %eax,is_sse

    cmp    $0,%eax

    jg     s_mpv_mul_d_add_sse2

s_mpv_mul_d_add_x86:

    push   %ebp

    mov    %esp,%ebp

    sub    $28,%esp

    push   %edi

    push   %esi

    push   %ebx

    movl   $0,%ebx		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     11f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

10:

    lodsl			# eax = [ds:esi]; esi += 4

    mov    16(%ebp),%edx	# edx = b

    mull   %edx			# edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax		# add carry (%ebx) to edx:eax

    adc    $0,%edx

    mov    0(%edi),%ebx		# add in current word from *c

    add    %ebx,%eax

    adc    $0,%edx

    mov    %edx,%ebx		# high half of product becomes next carry

    stosl			# [es:edi] = ax; edi += 4;

    dec    %ecx			# --a_len

    jnz    10b			# jmp if a_len != 0

11:

    mov    %ebx,0(%edi)		# *c = carry

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

s_mpv_mul_d_add_sse2:

    push   %ebp

    mov    %esp,%ebp

    push   %edi

    push   %esi

    psubq  %mm2,%mm2		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    movd   16(%ebp),%mm1	# mm1 = b

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     16f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

15:

    movd   0(%esi),%mm0         # mm0 = *a++

    add    $4,%esi

    pmuludq %mm1,%mm0           # mm0 = b * *a++

    paddq  %mm0,%mm2            # add the carry

    movd   0(%edi),%mm0

    paddq  %mm0,%mm2            # add the carry

    movd   %mm2,0(%edi)         # store the 32bit result

    add    $4,%edi

    psrlq  $32, %mm2		# save the carry

    dec    %ecx			# --a_len

    jnz    15b			# jmp if a_len != 0

16:

    movd   %mm2,0(%edi)		# *c = carry

    emms

    pop    %esi

    pop    %edi

    leave

ret

nop

 #  ebp - 8:	caller's esi

 #  ebp - 4:	caller's edi

 #  ebp + 0:	caller's ebp

 #  ebp + 4:	return address

 #  ebp + 8:	a	argument

 #  ebp + 12:	a_len	argument

 #  ebp + 16:	b	argument

 #  ebp + 20:	c	argument

 #  registers:

 # 	eax:

 #	ebx:	carry

 #	ecx:	a_len

 #	edx:

 #	esi:	a ptr

 #	edi:	c ptr

.globl	s_mpv_mul_d_add_prop

.type	s_mpv_mul_d_add_prop,@function

s_mpv_mul_d_add_prop:

    GET    is_sse,%eax

    cmp    $0,%eax

    je     s_mpv_mul_d_add_prop_x86

    jg     s_mpv_mul_d_add_prop_sse2

    call   s_mpi_is_sse2

    PUT    %eax,is_sse

    cmp    $0,%eax

    jg     s_mpv_mul_d_add_prop_sse2

s_mpv_mul_d_add_prop_x86:

    push   %ebp

    mov    %esp,%ebp

    sub    $28,%esp

    push   %edi

    push   %esi

    push   %ebx

    movl   $0,%ebx		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     21f			# jmp if a_len == 0

cld

    mov    8(%ebp),%esi		# esi = a

20:

    lodsl			# eax = [ds:esi]; esi += 4

    mov    16(%ebp),%edx	# edx = b

    mull   %edx			# edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax		# add carry (%ebx) to edx:eax

    adc    $0,%edx

    mov    0(%edi),%ebx		# add in current word from *c

    add    %ebx,%eax

    adc    $0,%edx

    mov    %edx,%ebx		# high half of product becomes next carry

    stosl			# [es:edi] = ax; edi += 4;

    dec    %ecx			# --a_len

    jnz    20b			# jmp if a_len != 0

21:

    cmp    $0,%ebx		# is carry zero?

    jz     23f

    mov    0(%edi),%eax		# add in current word from *c

    add	   %ebx,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jnc    23f

22:

    mov    0(%edi),%eax		# add in current word from *c

    adc	   $0,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jc     22b

23:

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

s_mpv_mul_d_add_prop_sse2:

    push   %ebp

    mov    %esp,%ebp

    push   %edi

    push   %esi

    push   %ebx

    psubq  %mm2,%mm2		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    movd   16(%ebp),%mm1	# mm1 = b

    mov    20(%ebp),%edi

    cmp    $0,%ecx

    je     26f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

25:

    movd   0(%esi),%mm0         # mm0 = *a++

    movd   0(%edi),%mm3		# fetch the sum

    add    $4,%esi

    pmuludq %mm1,%mm0           # mm0 = b * *a++

    paddq  %mm0,%mm2            # add the carry

    paddq  %mm3,%mm2            # add *c++

    movd   %mm2,0(%edi)         # store the 32bit result

    add    $4,%edi

    psrlq  $32, %mm2		# save the carry

    dec    %ecx			# --a_len

    jnz    25b			# jmp if a_len != 0

26:

    movd   %mm2,%ebx

    cmp    $0,%ebx		# is carry zero?

    jz     28f

    mov    0(%edi),%eax

    add    %ebx, %eax

    stosl

    jnc    28f

27:

    mov    0(%edi),%eax		# add in current word from *c

    adc	   $0,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jc     27b

28:

    emms

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

 #  ebp - 20:	caller's esi

 #  ebp - 16:	caller's edi

 #  ebp - 12:

 #  ebp - 8:	carry

 #  ebp - 4:	a_len	local

 #  ebp + 0:	caller's ebp

 #  ebp + 4:	return address

 #  ebp + 8:	pa	argument

 #  ebp + 12:	a_len	argument

 #  ebp + 16:	ps	argument

 #  ebp + 20:

 #  registers:

 # 	eax:

 #	ebx:	carry

 #	ecx:	a_len

 #	edx:

 #	esi:	a ptr

 #	edi:	c ptr

.globl	s_mpv_sqr_add_prop

.type	s_mpv_sqr_add_prop,@function

s_mpv_sqr_add_prop:

     GET   is_sse,%eax

     cmp    $0,%eax

     je     s_mpv_sqr_add_prop_x86

     jg     s_mpv_sqr_add_prop_sse2

     call   s_mpi_is_sse2

     PUT    %eax,is_sse

     cmp    $0,%eax

     jg     s_mpv_sqr_add_prop_sse2

s_mpv_sqr_add_prop_x86:

     push   %ebp

     mov    %esp,%ebp

     sub    $12,%esp

     push   %edi

     push   %esi

     push   %ebx

     movl   $0,%ebx		# carry = 0

     mov    12(%ebp),%ecx	# a_len

     mov    16(%ebp),%edi	# edi = ps

     cmp    $0,%ecx

     je     31f			# jump if a_len == 0

cld

     mov    8(%ebp),%esi	# esi = pa

30:

     lodsl			# %eax = [ds:si]; si += 4;

     mull   %eax

     add    %ebx,%eax		# add "carry"

     adc    $0,%edx

     mov    0(%edi),%ebx

     add    %ebx,%eax		# add low word from result

     mov    4(%edi),%ebx

     stosl			# [es:di] = %eax; di += 4;

     adc    %ebx,%edx		# add high word from result

     movl   $0,%ebx

     mov    %edx,%eax

     adc    $0,%ebx

     stosl			# [es:di] = %eax; di += 4;

     dec    %ecx		# --a_len

     jnz    30b			# jmp if a_len != 0

31:

    cmp    $0,%ebx		# is carry zero?

    jz     34f

    mov    0(%edi),%eax		# add in current word from *c

    add	   %ebx,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jnc    34f

32:

    mov    0(%edi),%eax		# add in current word from *c

    adc	   $0,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jc     32b

34:

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

s_mpv_sqr_add_prop_sse2:

    push   %ebp

    mov    %esp,%ebp

    push   %edi

    push   %esi

    push   %ebx

    psubq  %mm2,%mm2		# carry = 0

    mov    12(%ebp),%ecx	# ecx = a_len

    mov    16(%ebp),%edi

    cmp    $0,%ecx

    je     36f			# jmp if a_len == 0

    mov    8(%ebp),%esi		# esi = a

cld

35:

    movd   0(%esi),%mm0        # mm0 = *a

    movd   0(%edi),%mm3	       # fetch the sum

    add	   $4,%esi

    pmuludq %mm0,%mm0          # mm0 = sqr(a)

    paddq  %mm0,%mm2           # add the carry

    paddq  %mm3,%mm2           # add the low word

    movd   4(%edi),%mm3

    movd   %mm2,0(%edi)        # store the 32bit result

    psrlq  $32, %mm2

    paddq  %mm3,%mm2           # add the high word

    movd   %mm2,4(%edi)        # store the 32bit result

    psrlq  $32, %mm2	       # save the carry.

    add    $8,%edi

    dec    %ecx			# --a_len

    jnz    35b			# jmp if a_len != 0

36:

    movd   %mm2,%ebx

    cmp    $0,%ebx		# is carry zero?

    jz     38f

    mov    0(%edi),%eax

    add    %ebx, %eax

    stosl

    jnc    38f

37:

    mov    0(%edi),%eax		# add in current word from *c

    adc	   $0,%eax

    stosl			# [es:edi] = ax; edi += 4;

    jc     37b

38:

    emms

    pop    %ebx

    pop    %esi

    pop    %edi

    leave

ret

nop

 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized

 # so its high bit is 1.   This code is from NSPR.

 # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,

 # 		          mp_digit *qp, mp_digit *rp)

 #  esp +  0:   Caller's ebx

 #  esp +  4:	return address

 #  esp +  8:	Nhi	argument

 #  esp + 12:	Nlo	argument

 #  esp + 16:	divisor	argument

 #  esp + 20:	qp	argument

 #  esp + 24:   rp	argument

 #  registers:

 # 	eax:

 #	ebx:	carry

 #	ecx:	a_len

 #	edx:

 #	esi:	a ptr

 #	edi:	c ptr

.globl	s_mpv_div_2dx1d

.type	s_mpv_div_2dx1d,@function

s_mpv_div_2dx1d:

       push   %ebx

       mov    8(%esp),%edx

       mov    12(%esp),%eax

       mov    16(%esp),%ebx

       div    %ebx

       mov    20(%esp),%ebx

       mov    %eax,0(%ebx)

       mov    24(%esp),%ebx

       mov    %edx,0(%ebx)

       xor    %eax,%eax		# return zero

       pop    %ebx

ret

nop

 # Magic indicating no need for an executable stack

.section .note.GNU-stack, "", @progbits

.previous