linux/arch/i386/math-emu/reg_round.S

	.file "reg_round.S"
/*---------------------------------------------------------------------------+
 |  reg_round.S                                                              |
 |                                                                           |
 | Rounding/truncation/etc for FPU basic arithmetic functions.               |
 |                                                                           |
 | Copyright (C) 1993,1995,1997                                              |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@suburbia.net               |
 |                                                                           |
 | This code has four possible entry points.                                 |
 | The following must be entered by a jmp instruction:                       |
 |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
 |                                                                           |
 | The FPU_round entry point is intended to be used by C code.               |
 | From C, call as:                                                          |
 |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
 |                                                                           |
 |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
 |    one was raised, or -1 on internal error.                               |
 |                                                                           |
 | For correct "up" and "down" rounding, the argument must have the correct  |
 | sign.                                                                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 | Four entry points.                                                        |
 |                                                                           |
 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
 |  %eax:%ebx  64 bit significand                                            |
 |  %edx       32 bit extension of the significand                           |
 |  %edi       pointer to an FPU_REG for the result to be stored             |
 |  stack      calling function must have set up a C stack frame and         |
 |             pushed %esi, %edi, and %ebx                                   |
 |                                                                           |
 | Needed just for the fpu_reg_round_sqrt entry point:                       |
 |  %cx  A control word in the same format as the FPU control word.          |
 | Otherwise, PARAM4 must give such a value.                                 |
 |                                                                           |
 |                                                                           |
 | The significand and its extension are assumed to be exact in the          |
 | following sense:                                                          |
 |   If the significand by itself is the exact result then the significand   |
 |   extension (%edx) must contain 0, otherwise the significand extension    |
 |   must be non-zero.                                                       |
 |   If the significand extension is non-zero then the significand is        |
 |   smaller than the magnitude of the correct exact result by an amount     |
 |   greater than zero and less than one ls bit of the significand.          |
 |   The significand extension is only required to have three possible       |
 |   non-zero values:                                                        |
 |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
 |                                 smaller than the magnitude of the true    |
 |                                 exact result.                             |
 |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
 |                                 bit smaller than the magnitude of the     |
 |                                 true exact result.                        |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  The code in this module has become quite complex, but it should handle   |
 |  all of the FPU flags which are set at this stage of the basic arithmetic |
 |  computations.                                                            |
 |  There are a few rare cases where the results are not set identically to  |
 |  a real FPU. These require a bit more thought because at this stage the   |
 |  results of the code here appear to be more consistent...                 |
 |  This may be changed in a future version.                                 |
 +---------------------------------------------------------------------------*/


#include "fpu_emu.h"
#include "exception.h"
#include "control_w.h"

/* Flags for FPU_bits_lost */
#define	LOST_DOWN	$1
#define	LOST_UP		$2

/* Flags for FPU_denormal */
#define	DENORMAL	$1
#define	UNMASKED_UNDERFLOW $2


#ifndef NON_REENTRANT_FPU
/*	Make the code re-entrant by putting
	local storage on the stack: */
#define FPU_bits_lost	(%esp)
#define FPU_denormal	1(%esp)

#else
/*	Not re-entrant, so we can gain speed by putting
	local storage in a static area: */
.data
	.align 4,0
FPU_bits_lost:
	.byte	0
FPU_denormal:
	.byte	0
#endif /* NON_REENTRANT_FPU */


.text
.globl fpu_reg_round
.globl fpu_Arith_exit

/* Entry point when called from C */
ENTRY(FPU_round)
	pushl	%ebp
	movl	%esp,%ebp
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%edi
	movl	SIGH(%edi),%eax
	movl	SIGL(%edi),%ebx
	movl	PARAM2,%edx

fpu_reg_round:			/* Normal entry point */
	movl	PARAM4,%ecx

#ifndef NON_REENTRANT_FPU
	pushl	%ebx		/* adjust the stack pointer */
#endif /* NON_REENTRANT_FPU */ 

#ifdef PARANOID
/* Cannot use this here yet */
/*	orl	%eax,%eax */
/*	jns	L_entry_bugged */
#endif /* PARANOID */

	cmpw	EXP_UNDER,EXP(%edi)
	jle	L_Make_denorm			/* The number is a de-normal */

	movb	$0,FPU_denormal			/* 0 -> not a de-normal */

Denorm_done:
	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */

	movl	%ecx,%esi
	andl	CW_PC,%ecx
	cmpl	PR_64_BITS,%ecx
	je	LRound_To_64

	cmpl	PR_53_BITS,%ecx
	je	LRound_To_53

	cmpl	PR_24_BITS,%ecx
	je	LRound_To_24

#ifdef PECULIAR_486
/* With the precision control bits set to 01 "(reserved)", a real 80486
   behaves as if the precision control bits were set to 11 "64 bits" */
	cmpl	PR_RESERVED_BITS,%ecx
	je	LRound_To_64
#ifdef PARANOID
	jmp	L_bugged_denorm_486
#endif /* PARANOID */ 
#else
#ifdef PARANOID
	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
#endif /* PARANOID */ 
#endif /* PECULIAR_486 */


/* Round etc to 24 bit precision */
LRound_To_24:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_24

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_24

	cmpl	RC_UP,%ecx		/* Towards +infinity */
	je	LUp_24

	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
	je	LDown_24

#ifdef PARANOID
	jmp	L_bugged_round24
#endif /* PARANOID */ 

LUp_24:
	cmpb	SIGN_POS,PARAM5
	jne	LCheck_truncate_24	/* If negative then  up==truncate */

	jmp	LCheck_24_round_up

LDown_24:
	cmpb	SIGN_POS,PARAM5
	je	LCheck_truncate_24	/* If positive then  down==truncate */

LCheck_24_round_up:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jnz	LDo_24_round_up
	jmp	L_Re_normalise

LRound_nearest_24:
	/* Do rounding of the 24th bit if needed (nearest or even) */
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	cmpl	$0x00000080,%ecx
	jc	LCheck_truncate_24	/* less than half, no increment needed */

	jne	LGreater_Half_24	/* greater than half, increment needed */

	/* Possibly half, we need to check the ls bits */
	orl	%ebx,%ebx
	jnz	LGreater_Half_24	/* greater than half, increment needed */

	orl	%edx,%edx
	jnz	LGreater_Half_24	/* greater than half, increment needed */

	/* Exactly half, increment only if 24th bit is 1 (round to even) */
	testl	$0x00000100,%eax
	jz	LDo_truncate_24

LGreater_Half_24:			/* Rounding: increment at the 24th bit */
LDo_24_round_up:
	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
	xorl	%ebx,%ebx
	movb	LOST_UP,FPU_bits_lost
	addl	$0x00000100,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_24:
	movl	%eax,%ecx
	andl	$0x000000ff,%ecx
	orl	%ebx,%ecx
	orl	%edx,%ecx
	jz	L_Re_normalise		/* No truncation needed */

LDo_truncate_24:
	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
	xorl	%ebx,%ebx
	movb	LOST_DOWN,FPU_bits_lost
	jmp	L_Re_normalise


/* Round etc to 53 bit precision */
LRound_To_53:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_53

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_53

	cmpl	RC_UP,%ecx		/* Towards +infinity */
	je	LUp_53

	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
	je	LDown_53

#ifdef PARANOID
	jmp	L_bugged_round53
#endif /* PARANOID */ 

LUp_53:
	cmpb	SIGN_POS,PARAM5
	jne	LCheck_truncate_53	/* If negative then  up==truncate */

	jmp	LCheck_53_round_up

LDown_53:
	cmpb	SIGN_POS,PARAM5
	je	LCheck_truncate_53	/* If positive then  down==truncate */

LCheck_53_round_up:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jnz	LDo_53_round_up
	jmp	L_Re_normalise

LRound_nearest_53:
	/* Do rounding of the 53rd bit if needed (nearest or even) */
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	cmpl	$0x00000400,%ecx
	jc	LCheck_truncate_53	/* less than half, no increment needed */

	jnz	LGreater_Half_53	/* greater than half, increment needed */

	/* Possibly half, we need to check the ls bits */
	orl	%edx,%edx
	jnz	LGreater_Half_53	/* greater than half, increment needed */

	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
	testl	$0x00000800,%ebx
	jz	LTruncate_53

LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
LDo_53_round_up:
	movb	LOST_UP,FPU_bits_lost
	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
	addl	$0x00000800,%ebx
	adcl	$0,%eax
	jmp	LCheck_Round_Overflow

LCheck_truncate_53:
	movl	%ebx,%ecx
	andl	$0x000007ff,%ecx
	orl	%edx,%ecx
	jz	L_Re_normalise

LTruncate_53:
	movb	LOST_DOWN,FPU_bits_lost
	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
	jmp	L_Re_normalise


/* Round etc to 64 bit precision */
LRound_To_64:
	movl	%esi,%ecx
	andl	CW_RC,%ecx
	cmpl	RC_RND,%ecx
	je	LRound_nearest_64

	cmpl	RC_CHOP,%ecx
	je	LCheck_truncate_64

	cmpl	RC_UP,%ecx		/* Towards +infinity */
	je	LUp_64

	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
	je	LDown_64

#ifdef PARANOID
	jmp	L_bugged_round64
#endif /* PARANOID */ 

LUp_64:
	cmpb	SIGN_POS,PARAM5
	jne	LCheck_truncate_64	/* If negative then  up==truncate */

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	L_Re_normalise

LDown_64:
	cmpb	SIGN_POS,PARAM5
	je	LCheck_truncate_64	/* If positive then  down==truncate */

	orl	%edx,%edx
	jnz	LDo_64_round_up
	jmp	L_Re_normalise

LRound_nearest_64:
	cmpl	$0x80000000,%edx
	jc	LCheck_truncate_64

	jne	LDo_64_round_up

	/* Now test for round-to-even */
	testb	$1,%bl
	jz	LCheck_truncate_64

LDo_64_round_up:
	movb	LOST_UP,FPU_bits_lost
	addl	$1,%ebx
	adcl	$0,%eax

LCheck_Round_Overflow:
	jnc	L_Re_normalise

	/* Overflow, adjust the result (significand to 1.0) */
	rcrl	$1,%eax
	rcrl	$1,%ebx
	incw	EXP(%edi)
	jmp	L_Re_normalise

LCheck_truncate_64:
	orl	%edx,%edx
	jz	L_Re_normalise

LTruncate_64:
	movb	LOST_DOWN,FPU_bits_lost

L_Re_normalise:
	testb	$0xff,FPU_denormal
	jnz	Normalise_result

L_Normalised:
	movl	TAG_Valid,%edx

L_deNormalised:
	cmpb	LOST_UP,FPU_bits_lost
	je	L_precision_lost_up

	cmpb	LOST_DOWN,FPU_bits_lost
	je	L_precision_lost_down

L_no_precision_loss:
	/* store the result */

L_Store_significand:
	movl	%eax,SIGH(%edi)
	movl	%ebx,SIGL(%edi)

	cmpw	EXP_OVER,EXP(%edi)
	jge	L_overflow

	movl	%edx,%eax

	/* Convert the exponent to 80x87 form. */
	addw	EXTENDED_Ebias,EXP(%edi)
	andw	$0x7fff,EXP(%edi)

fpu_reg_round_signed_special_exit:

	cmpb	SIGN_POS,PARAM5
	je	fpu_reg_round_special_exit

	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */

fpu_reg_round_special_exit:

#ifndef NON_REENTRANT_FPU
	popl	%ebx		/* adjust the stack pointer */
#endif /* NON_REENTRANT_FPU */ 

fpu_Arith_exit:
	popl	%ebx
	popl	%edi
	popl	%esi
	leave
	ret


/*
 * Set the FPU status flags to represent precision loss due to
 * round-up.
 */
L_precision_lost_up:
	push	%edx
	push	%eax
	call	set_precision_flag_up
	popl	%eax
	popl	%edx
	jmp	L_no_precision_loss

/*
 * Set the FPU status flags to represent precision loss due to
 * truncation.
 */
L_precision_lost_down:
	push	%edx
	push	%eax
	call	set_precision_flag_down
	popl	%eax
	popl	%edx
	jmp	L_no_precision_loss


/*
 * The number is a denormal (which might get rounded up to a normal)
 * Shift the number right the required number of bits, which will
 * have to be undone later...
 */
L_Make_denorm:
	/* The action to be taken depends upon whether the underflow
	   exception is masked */
	testb	CW_Underflow,%cl		/* Underflow mask. */
	jz	Unmasked_underflow		/* Do not make a denormal. */

	movb	DENORMAL,FPU_denormal

	pushl	%ecx		/* Save */
	movw	EXP_UNDER+1,%cx
	subw	EXP(%edi),%cx

	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
	jnc	Denorm_shift_more_than_63

	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
	jnc	Denorm_shift_more_than_32

/*
 * We got here without jumps by assuming that the most common requirement
 *   is for a small de-normalising shift.
 * Shift by [1..31] bits
 */
	addw	%cx,EXP(%edi)
	orl	%edx,%edx	/* extension */
	setne	%ch		/* Save whether %edx is non-zero */
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orb	%ch,%dl
	popl	%ecx
	jmp	Denorm_done

/* Shift by [32..63] bits */
Denorm_shift_more_than_32:
	addw	%cx,EXP(%edi)
	subb	$32,%cl
	orl	%edx,%edx
	setne	%ch
	orb	%ch,%bl
	xorl	%edx,%edx
	shrd	%cl,%ebx,%edx
	shrd	%cl,%eax,%ebx
	shr	%cl,%eax
	orl	%edx,%edx		/* test these 32 bits */
	setne	%cl
	orb	%ch,%bl
	orb	%cl,%bl
	movl	%ebx,%edx
	movl	%eax,%ebx
	xorl	%eax,%eax
	popl	%ecx
	jmp	Denorm_done

/* Shift by [64..) bits */
Denorm_shift_more_than_63:
	cmpw	$64,%cx
	jne	Denorm_shift_more_than_64

/* Exactly 64 bit shift */
	addw	%cx,EXP(%edi)
	xorl	%ecx,%ecx
	orl	%edx,%edx
	setne	%cl
	orl	%ebx,%ebx
	setne	%ch
	orb	%ch,%cl
	orb	%cl,%al
	movl	%eax,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	Denorm_done

Denorm_shift_more_than_64:
	movw	EXP_UNDER+1,EXP(%edi)
/* This is easy, %eax must be non-zero, so.. */
	movl	$1,%edx
	xorl	%eax,%eax
	xorl	%ebx,%ebx
	popl	%ecx
	jmp	Denorm_done


Unmasked_underflow:
	movb	UNMASKED_UNDERFLOW,FPU_denormal
	jmp	Denorm_done


/* Undo the de-normalisation. */
Normalise_result:
	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
	je	Signal_underflow

/* The number must be a denormal if we got here. */
#ifdef PARANOID
	/* But check it... just in case. */
	cmpw	EXP_UNDER+1,EXP(%edi)
	jne	L_norm_bugged
#endif /* PARANOID */

#ifdef PECULIAR_486
	/*
	 * This implements a special feature of 80486 behaviour.
	 * Underflow will be signalled even if the number is
	 * not a denormal after rounding.
	 * This difference occurs only for masked underflow, and not
	 * in the unmasked case.
	 * Actual 80486 behaviour differs from this in some circumstances.
	 */
	orl	%eax,%eax		/* ms bits */
	js	LPseudoDenormal		/* Will be masked underflow */
#else
	orl	%eax,%eax		/* ms bits */
	js	L_Normalised		/* No longer a denormal */
#endif /* PECULIAR_486 */ 

	jnz	LDenormal_adj_exponent

	orl	%ebx,%ebx
	jz	L_underflow_to_zero	/* The contents are zero */

LDenormal_adj_exponent:
	decw	EXP(%edi)

LPseudoDenormal:
	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
	movl	TAG_Special,%edx
	jz	L_deNormalised

	/* There must be a masked underflow */
	push	%eax
	pushl	EX_Underflow
	call	EXCEPTION
	popl	%eax
	popl	%eax
	movl	TAG_Special,%edx
	jmp	L_deNormalised


/*
 * The operations resulted in a number too small to represent.
 * Masked response.
 */
L_underflow_to_zero:
	push	%eax
	call	set_precision_flag_down
	popl	%eax

	push	%eax
	pushl	EX_Underflow
	call	EXCEPTION
	popl	%eax
	popl	%eax

/* Reduce the exponent to EXP_UNDER */
	movw	EXP_UNDER,EXP(%edi)
	movl	TAG_Zero,%edx
	jmp	L_Store_significand


/* The operations resulted in a number too large to represent. */
L_overflow:
	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
	push	%edi
	call	arith_overflow
	pop	%edi
	jmp	fpu_reg_round_signed_special_exit


Signal_underflow:
	/* The number may have been changed to a non-denormal */
	/* by the rounding operations. */
	cmpw	EXP_UNDER,EXP(%edi)
	jle	Do_unmasked_underflow

	jmp	L_Normalised

Do_unmasked_underflow:
	/* Increase the exponent by the magic number */
	addw	$(3*(1<<13)),EXP(%edi)
	push	%eax
	pushl	EX_Underflow
	call	EXCEPTION
	popl	%eax
	popl	%eax
	jmp	L_Normalised


#ifdef PARANOID
#ifdef PECULIAR_486
L_bugged_denorm_486:
	pushl	EX_INTERNAL|0x236
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit
#else
L_bugged_denorm:
	pushl	EX_INTERNAL|0x230
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit
#endif /* PECULIAR_486 */ 

L_bugged_round24:
	pushl	EX_INTERNAL|0x231
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_bugged_round53:
	pushl	EX_INTERNAL|0x232
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_bugged_round64:
	pushl	EX_INTERNAL|0x233
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_norm_bugged:
	pushl	EX_INTERNAL|0x234
	call	EXCEPTION
	popl	%ebx
	jmp	L_exception_exit

L_entry_bugged:
	pushl	EX_INTERNAL|0x235
	call	EXCEPTION
	popl	%ebx
L_exception_exit:
	mov	$-1,%eax
	jmp	fpu_reg_round_special_exit
#endif /* PARANOID */
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 15:20:36 -07:00			`.file "reg_round.S"`
			`/*---------------------------------------------------------------------------+`
			`\| reg_round.S \|`
			`\| \|`
			`\| Rounding/truncation/etc for FPU basic arithmetic functions. \|`
			`\| \|`
			`\| Copyright (C) 1993,1995,1997 \|`
			`\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|`
			`\| Australia. E-mail billm@suburbia.net \|`
			`\| \|`
			`\| This code has four possible entry points. \|`
			`\| The following must be entered by a jmp instruction: \|`
			`\| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. \|`
			`\| \|`
			`\| The FPU_round entry point is intended to be used by C code. \|`
			`\| From C, call as: \|`
			`\| int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) \|`
			`\| \|`
			`\| Return value is the tag of the answer, or-ed with FPU_Exception if \|`
			`\| one was raised, or -1 on internal error. \|`
			`\| \|`
			`\| For correct "up" and "down" rounding, the argument must have the correct \|`
			`\| sign. \|`
			`\| \|`
			`+---------------------------------------------------------------------------*/`

			`/*---------------------------------------------------------------------------+`
			`\| Four entry points. \|`
			`\| \|`
			`\| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: \|`
			`\| %eax:%ebx 64 bit significand \|`
			`\| %edx 32 bit extension of the significand \|`
			`\| %edi pointer to an FPU_REG for the result to be stored \|`
			`\| stack calling function must have set up a C stack frame and \|`
			`\| pushed %esi, %edi, and %ebx \|`
			`\| \|`
			`\| Needed just for the fpu_reg_round_sqrt entry point: \|`
			`\| %cx A control word in the same format as the FPU control word. \|`
			`\| Otherwise, PARAM4 must give such a value. \|`
			`\| \|`
			`\| \|`
			`\| The significand and its extension are assumed to be exact in the \|`
			`\| following sense: \|`
			`\| If the significand by itself is the exact result then the significand \|`
			`\| extension (%edx) must contain 0, otherwise the significand extension \|`
			`\| must be non-zero. \|`
			`\| If the significand extension is non-zero then the significand is \|`
			`\| smaller than the magnitude of the correct exact result by an amount \|`
			`\| greater than zero and less than one ls bit of the significand. \|`
			`\| The significand extension is only required to have three possible \|`
			`\| non-zero values: \|`
			`\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|`
			`\| bit smaller than the magnitude of the \|`
			`\| true exact result. \|`
			`\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|`
			`\| smaller than the magnitude of the true \|`
			`\| exact result. \|`
			`\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|`
			`\| bit smaller than the magnitude of the \|`
			`\| true exact result. \|`
			`\| \|`
			`+---------------------------------------------------------------------------*/`

			`/*---------------------------------------------------------------------------+`
			`\| The code in this module has become quite complex, but it should handle \|`
			`\| all of the FPU flags which are set at this stage of the basic arithmetic \|`
			`\| computations. \|`
			`\| There are a few rare cases where the results are not set identically to \|`
			`\| a real FPU. These require a bit more thought because at this stage the \|`
			`\| results of the code here appear to be more consistent... \|`
			`\| This may be changed in a future version. \|`
			`+---------------------------------------------------------------------------*/`


			`#include "fpu_emu.h"`
			`#include "exception.h"`
			`#include "control_w.h"`

			`/* Flags for FPU_bits_lost */`
			`#define LOST_DOWN $1`
			`#define LOST_UP $2`

			`/* Flags for FPU_denormal */`
			`#define DENORMAL $1`
			`#define UNMASKED_UNDERFLOW $2`


			`#ifndef NON_REENTRANT_FPU`
			`/* Make the code re-entrant by putting`
			`local storage on the stack: */`
			`#define FPU_bits_lost (%esp)`
			`#define FPU_denormal 1(%esp)`

			`#else`
			`/* Not re-entrant, so we can gain speed by putting`
			`local storage in a static area: */`
			`.data`
			`.align 4,0`
			`FPU_bits_lost:`
			`.byte 0`
			`FPU_denormal:`
			`.byte 0`
			`#endif /* NON_REENTRANT_FPU */`


			`.text`
			`.globl fpu_reg_round`
			`.globl fpu_Arith_exit`

			`/* Entry point when called from C */`
			`ENTRY(FPU_round)`
			`pushl %ebp`
			`movl %esp,%ebp`
			`pushl %esi`
			`pushl %edi`
			`pushl %ebx`

			`movl PARAM1,%edi`
			`movl SIGH(%edi),%eax`
			`movl SIGL(%edi),%ebx`
			`movl PARAM2,%edx`

			`fpu_reg_round: /* Normal entry point */`
			`movl PARAM4,%ecx`

			`#ifndef NON_REENTRANT_FPU`
			`pushl %ebx /* adjust the stack pointer */`
			`#endif /* NON_REENTRANT_FPU */`

			`#ifdef PARANOID`
			`/* Cannot use this here yet */`
			`/* orl %eax,%eax */`
			`/* jns L_entry_bugged */`
			`#endif /* PARANOID */`

			`cmpw EXP_UNDER,EXP(%edi)`
			`jle L_Make_denorm /* The number is a de-normal */`

			`movb $0,FPU_denormal /* 0 -> not a de-normal */`

			`Denorm_done:`
			`movb $0,FPU_bits_lost /* No bits yet lost in rounding */`

			`movl %ecx,%esi`
			`andl CW_PC,%ecx`
			`cmpl PR_64_BITS,%ecx`
			`je LRound_To_64`

			`cmpl PR_53_BITS,%ecx`
			`je LRound_To_53`

			`cmpl PR_24_BITS,%ecx`
			`je LRound_To_24`

			`#ifdef PECULIAR_486`
			`/* With the precision control bits set to 01 "(reserved)", a real 80486`
			`behaves as if the precision control bits were set to 11 "64 bits" */`
			`cmpl PR_RESERVED_BITS,%ecx`
			`je LRound_To_64`
			`#ifdef PARANOID`
			`jmp L_bugged_denorm_486`
			`#endif /* PARANOID */`
			`#else`
			`#ifdef PARANOID`
			`jmp L_bugged_denorm /* There is no bug, just a bad control word */`
			`#endif /* PARANOID */`
			`#endif /* PECULIAR_486 */`


			`/* Round etc to 24 bit precision */`
			`LRound_To_24:`
			`movl %esi,%ecx`
			`andl CW_RC,%ecx`
			`cmpl RC_RND,%ecx`
			`je LRound_nearest_24`

			`cmpl RC_CHOP,%ecx`
			`je LCheck_truncate_24`

			`cmpl RC_UP,%ecx /* Towards +infinity */`
			`je LUp_24`

			`cmpl RC_DOWN,%ecx /* Towards -infinity */`
			`je LDown_24`

			`#ifdef PARANOID`
			`jmp L_bugged_round24`
			`#endif /* PARANOID */`

			`LUp_24:`
			`cmpb SIGN_POS,PARAM5`
			`jne LCheck_truncate_24 /* If negative then up==truncate */`

			`jmp LCheck_24_round_up`

			`LDown_24:`
			`cmpb SIGN_POS,PARAM5`
			`je LCheck_truncate_24 /* If positive then down==truncate */`

			`LCheck_24_round_up:`
			`movl %eax,%ecx`
			`andl $0x000000ff,%ecx`
			`orl %ebx,%ecx`
			`orl %edx,%ecx`
			`jnz LDo_24_round_up`
			`jmp L_Re_normalise`

			`LRound_nearest_24:`
			`/* Do rounding of the 24th bit if needed (nearest or even) */`
			`movl %eax,%ecx`
			`andl $0x000000ff,%ecx`
			`cmpl $0x00000080,%ecx`
			`jc LCheck_truncate_24 /* less than half, no increment needed */`

			`jne LGreater_Half_24 /* greater than half, increment needed */`

			`/* Possibly half, we need to check the ls bits */`
			`orl %ebx,%ebx`
			`jnz LGreater_Half_24 /* greater than half, increment needed */`

			`orl %edx,%edx`
			`jnz LGreater_Half_24 /* greater than half, increment needed */`

			`/* Exactly half, increment only if 24th bit is 1 (round to even) */`
			`testl $0x00000100,%eax`
			`jz LDo_truncate_24`

			`LGreater_Half_24: /* Rounding: increment at the 24th bit */`
			`LDo_24_round_up:`
			`andl $0xffffff00,%eax /* Truncate to 24 bits */`
			`xorl %ebx,%ebx`
			`movb LOST_UP,FPU_bits_lost`
			`addl $0x00000100,%eax`
			`jmp LCheck_Round_Overflow`

			`LCheck_truncate_24:`
			`movl %eax,%ecx`
			`andl $0x000000ff,%ecx`
			`orl %ebx,%ecx`
			`orl %edx,%ecx`
			`jz L_Re_normalise /* No truncation needed */`

			`LDo_truncate_24:`
			`andl $0xffffff00,%eax /* Truncate to 24 bits */`
			`xorl %ebx,%ebx`
			`movb LOST_DOWN,FPU_bits_lost`
			`jmp L_Re_normalise`


			`/* Round etc to 53 bit precision */`
			`LRound_To_53:`
			`movl %esi,%ecx`
			`andl CW_RC,%ecx`
			`cmpl RC_RND,%ecx`
			`je LRound_nearest_53`

			`cmpl RC_CHOP,%ecx`
			`je LCheck_truncate_53`

			`cmpl RC_UP,%ecx /* Towards +infinity */`
			`je LUp_53`

			`cmpl RC_DOWN,%ecx /* Towards -infinity */`
			`je LDown_53`

			`#ifdef PARANOID`
			`jmp L_bugged_round53`
			`#endif /* PARANOID */`

			`LUp_53:`
			`cmpb SIGN_POS,PARAM5`
			`jne LCheck_truncate_53 /* If negative then up==truncate */`

			`jmp LCheck_53_round_up`

			`LDown_53:`
			`cmpb SIGN_POS,PARAM5`
			`je LCheck_truncate_53 /* If positive then down==truncate */`

			`LCheck_53_round_up:`
			`movl %ebx,%ecx`
			`andl $0x000007ff,%ecx`
			`orl %edx,%ecx`
			`jnz LDo_53_round_up`
			`jmp L_Re_normalise`

			`LRound_nearest_53:`
			`/* Do rounding of the 53rd bit if needed (nearest or even) */`
			`movl %ebx,%ecx`
			`andl $0x000007ff,%ecx`
			`cmpl $0x00000400,%ecx`
			`jc LCheck_truncate_53 /* less than half, no increment needed */`

			`jnz LGreater_Half_53 /* greater than half, increment needed */`

			`/* Possibly half, we need to check the ls bits */`
			`orl %edx,%edx`
			`jnz LGreater_Half_53 /* greater than half, increment needed */`

			`/* Exactly half, increment only if 53rd bit is 1 (round to even) */`
			`testl $0x00000800,%ebx`
			`jz LTruncate_53`

			`LGreater_Half_53: /* Rounding: increment at the 53rd bit */`
			`LDo_53_round_up:`
			`movb LOST_UP,FPU_bits_lost`
			`andl $0xfffff800,%ebx /* Truncate to 53 bits */`
			`addl $0x00000800,%ebx`
			`adcl $0,%eax`
			`jmp LCheck_Round_Overflow`

			`LCheck_truncate_53:`
			`movl %ebx,%ecx`
			`andl $0x000007ff,%ecx`
			`orl %edx,%ecx`
			`jz L_Re_normalise`

			`LTruncate_53:`
			`movb LOST_DOWN,FPU_bits_lost`
			`andl $0xfffff800,%ebx /* Truncate to 53 bits */`
			`jmp L_Re_normalise`


			`/* Round etc to 64 bit precision */`
			`LRound_To_64:`
			`movl %esi,%ecx`
			`andl CW_RC,%ecx`
			`cmpl RC_RND,%ecx`
			`je LRound_nearest_64`

			`cmpl RC_CHOP,%ecx`
			`je LCheck_truncate_64`

			`cmpl RC_UP,%ecx /* Towards +infinity */`
			`je LUp_64`

			`cmpl RC_DOWN,%ecx /* Towards -infinity */`
			`je LDown_64`

			`#ifdef PARANOID`
			`jmp L_bugged_round64`
			`#endif /* PARANOID */`

			`LUp_64:`
			`cmpb SIGN_POS,PARAM5`
			`jne LCheck_truncate_64 /* If negative then up==truncate */`

			`orl %edx,%edx`
			`jnz LDo_64_round_up`
			`jmp L_Re_normalise`

			`LDown_64:`
			`cmpb SIGN_POS,PARAM5`
			`je LCheck_truncate_64 /* If positive then down==truncate */`

			`orl %edx,%edx`
			`jnz LDo_64_round_up`
			`jmp L_Re_normalise`

			`LRound_nearest_64:`
			`cmpl $0x80000000,%edx`
			`jc LCheck_truncate_64`

			`jne LDo_64_round_up`

			`/* Now test for round-to-even */`
			`testb $1,%bl`
			`jz LCheck_truncate_64`

			`LDo_64_round_up:`
			`movb LOST_UP,FPU_bits_lost`
			`addl $1,%ebx`
			`adcl $0,%eax`

			`LCheck_Round_Overflow:`
			`jnc L_Re_normalise`

			`/* Overflow, adjust the result (significand to 1.0) */`
			`rcrl $1,%eax`
			`rcrl $1,%ebx`
			`incw EXP(%edi)`
			`jmp L_Re_normalise`

			`LCheck_truncate_64:`
			`orl %edx,%edx`
			`jz L_Re_normalise`

			`LTruncate_64:`
			`movb LOST_DOWN,FPU_bits_lost`

			`L_Re_normalise:`
			`testb $0xff,FPU_denormal`
			`jnz Normalise_result`

			`L_Normalised:`
			`movl TAG_Valid,%edx`

			`L_deNormalised:`
			`cmpb LOST_UP,FPU_bits_lost`
			`je L_precision_lost_up`

			`cmpb LOST_DOWN,FPU_bits_lost`
			`je L_precision_lost_down`

			`L_no_precision_loss:`
			`/* store the result */`

			`L_Store_significand:`
			`movl %eax,SIGH(%edi)`
			`movl %ebx,SIGL(%edi)`

			`cmpw EXP_OVER,EXP(%edi)`
			`jge L_overflow`

			`movl %edx,%eax`

			`/* Convert the exponent to 80x87 form. */`
			`addw EXTENDED_Ebias,EXP(%edi)`
			`andw $0x7fff,EXP(%edi)`

			`fpu_reg_round_signed_special_exit:`

			`cmpb SIGN_POS,PARAM5`
			`je fpu_reg_round_special_exit`

			`orw $0x8000,EXP(%edi) /* Negative sign for the result. */`

			`fpu_reg_round_special_exit:`

			`#ifndef NON_REENTRANT_FPU`
			`popl %ebx /* adjust the stack pointer */`
			`#endif /* NON_REENTRANT_FPU */`

			`fpu_Arith_exit:`
			`popl %ebx`
			`popl %edi`
			`popl %esi`
			`leave`
			`ret`


			`/*`
			`* Set the FPU status flags to represent precision loss due to`
			`* round-up.`
			`*/`
			`L_precision_lost_up:`
			`push %edx`
			`push %eax`
			`call set_precision_flag_up`
			`popl %eax`
			`popl %edx`
			`jmp L_no_precision_loss`

			`/*`
			`* Set the FPU status flags to represent precision loss due to`
			`* truncation.`
			`*/`
			`L_precision_lost_down:`
			`push %edx`
			`push %eax`
			`call set_precision_flag_down`
			`popl %eax`
			`popl %edx`
			`jmp L_no_precision_loss`


			`/*`
			`* The number is a denormal (which might get rounded up to a normal)`
			`* Shift the number right the required number of bits, which will`
			`* have to be undone later...`
			`*/`
			`L_Make_denorm:`
			`/* The action to be taken depends upon whether the underflow`
			`exception is masked */`
			`testb CW_Underflow,%cl /* Underflow mask. */`
			`jz Unmasked_underflow /* Do not make a denormal. */`

			`movb DENORMAL,FPU_denormal`

			`pushl %ecx /* Save */`
			`movw EXP_UNDER+1,%cx`
			`subw EXP(%edi),%cx`

			`cmpw $64,%cx /* shrd only works for 0..31 bits */`
			`jnc Denorm_shift_more_than_63`

			`cmpw $32,%cx /* shrd only works for 0..31 bits */`
			`jnc Denorm_shift_more_than_32`

			`/*`
			`* We got here without jumps by assuming that the most common requirement`
			`* is for a small de-normalising shift.`
			`* Shift by [1..31] bits`
			`*/`
			`addw %cx,EXP(%edi)`
			`orl %edx,%edx /* extension */`
			`setne %ch /* Save whether %edx is non-zero */`
			`xorl %edx,%edx`
			`shrd %cl,%ebx,%edx`
			`shrd %cl,%eax,%ebx`
			`shr %cl,%eax`
			`orb %ch,%dl`
			`popl %ecx`
			`jmp Denorm_done`

			`/* Shift by [32..63] bits */`
			`Denorm_shift_more_than_32:`
			`addw %cx,EXP(%edi)`
			`subb $32,%cl`
			`orl %edx,%edx`
			`setne %ch`
			`orb %ch,%bl`
			`xorl %edx,%edx`
			`shrd %cl,%ebx,%edx`
			`shrd %cl,%eax,%ebx`
			`shr %cl,%eax`
			`orl %edx,%edx /* test these 32 bits */`
			`setne %cl`
			`orb %ch,%bl`
			`orb %cl,%bl`
			`movl %ebx,%edx`
			`movl %eax,%ebx`
			`xorl %eax,%eax`
			`popl %ecx`
			`jmp Denorm_done`

			`/* Shift by [64..) bits */`
			`Denorm_shift_more_than_63:`
			`cmpw $64,%cx`
			`jne Denorm_shift_more_than_64`

			`/* Exactly 64 bit shift */`
			`addw %cx,EXP(%edi)`
			`xorl %ecx,%ecx`
			`orl %edx,%edx`
			`setne %cl`
			`orl %ebx,%ebx`
			`setne %ch`
			`orb %ch,%cl`
			`orb %cl,%al`
			`movl %eax,%edx`
			`xorl %eax,%eax`
			`xorl %ebx,%ebx`
			`popl %ecx`
			`jmp Denorm_done`

			`Denorm_shift_more_than_64:`
			`movw EXP_UNDER+1,EXP(%edi)`
			`/* This is easy, %eax must be non-zero, so.. */`
			`movl $1,%edx`
			`xorl %eax,%eax`
			`xorl %ebx,%ebx`
			`popl %ecx`
			`jmp Denorm_done`


			`Unmasked_underflow:`
			`movb UNMASKED_UNDERFLOW,FPU_denormal`
			`jmp Denorm_done`


			`/* Undo the de-normalisation. */`
			`Normalise_result:`
			`cmpb UNMASKED_UNDERFLOW,FPU_denormal`
			`je Signal_underflow`

			`/* The number must be a denormal if we got here. */`
			`#ifdef PARANOID`
			`/* But check it... just in case. */`
			`cmpw EXP_UNDER+1,EXP(%edi)`
			`jne L_norm_bugged`
			`#endif /* PARANOID */`

			`#ifdef PECULIAR_486`
			`/*`
			`* This implements a special feature of 80486 behaviour.`
			`* Underflow will be signalled even if the number is`
			`* not a denormal after rounding.`
			`* This difference occurs only for masked underflow, and not`
			`* in the unmasked case.`
			`* Actual 80486 behaviour differs from this in some circumstances.`
			`*/`
			`orl %eax,%eax /* ms bits */`
			`js LPseudoDenormal /* Will be masked underflow */`
			`#else`
			`orl %eax,%eax /* ms bits */`
			`js L_Normalised /* No longer a denormal */`
			`#endif /* PECULIAR_486 */`

			`jnz LDenormal_adj_exponent`

			`orl %ebx,%ebx`
			`jz L_underflow_to_zero /* The contents are zero */`

			`LDenormal_adj_exponent:`
			`decw EXP(%edi)`

			`LPseudoDenormal:`
			`testb $0xff,FPU_bits_lost /* bits lost == underflow */`
			`movl TAG_Special,%edx`
			`jz L_deNormalised`

			`/* There must be a masked underflow */`
			`push %eax`
			`pushl EX_Underflow`
			`call EXCEPTION`
			`popl %eax`
			`popl %eax`
			`movl TAG_Special,%edx`
			`jmp L_deNormalised`


			`/*`
			`* The operations resulted in a number too small to represent.`
			`* Masked response.`
			`*/`
			`L_underflow_to_zero:`
			`push %eax`
			`call set_precision_flag_down`
			`popl %eax`

			`push %eax`
			`pushl EX_Underflow`
			`call EXCEPTION`
			`popl %eax`
			`popl %eax`

			`/* Reduce the exponent to EXP_UNDER */`
			`movw EXP_UNDER,EXP(%edi)`
			`movl TAG_Zero,%edx`
			`jmp L_Store_significand`


			`/* The operations resulted in a number too large to represent. */`
			`L_overflow:`
			`addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */`
			`push %edi`
			`call arith_overflow`
			`pop %edi`
			`jmp fpu_reg_round_signed_special_exit`


			`Signal_underflow:`
			`/* The number may have been changed to a non-denormal */`
			`/* by the rounding operations. */`
			`cmpw EXP_UNDER,EXP(%edi)`
			`jle Do_unmasked_underflow`

			`jmp L_Normalised`

			`Do_unmasked_underflow:`
			`/* Increase the exponent by the magic number */`
			`addw $(3*(1<<13)),EXP(%edi)`
			`push %eax`
			`pushl EX_Underflow`
			`call EXCEPTION`
			`popl %eax`
			`popl %eax`
			`jmp L_Normalised`


			`#ifdef PARANOID`
			`#ifdef PECULIAR_486`
			`L_bugged_denorm_486:`
			`pushl EX_INTERNAL\|0x236`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`
			`#else`
			`L_bugged_denorm:`
			`pushl EX_INTERNAL\|0x230`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`
			`#endif /* PECULIAR_486 */`

			`L_bugged_round24:`
			`pushl EX_INTERNAL\|0x231`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`

			`L_bugged_round53:`
			`pushl EX_INTERNAL\|0x232`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`

			`L_bugged_round64:`
			`pushl EX_INTERNAL\|0x233`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`

			`L_norm_bugged:`
			`pushl EX_INTERNAL\|0x234`
			`call EXCEPTION`
			`popl %ebx`
			`jmp L_exception_exit`

			`L_entry_bugged:`
			`pushl EX_INTERNAL\|0x235`
			`call EXCEPTION`
			`popl %ebx`
			`L_exception_exit:`
			`mov $-1,%eax`
			`jmp fpu_reg_round_special_exit`
			`#endif /* PARANOID */`