47ba5f39ea
Now that we can dispatch all VFP and iWMMXT related undef exceptions using undef hooks implemented in C code, we no longer need the asm entry code that takes care of this unless we are using FPE, so we can move it into the FPE entry code. As this means it is ARM only, we can remove the Thumb2 specific decorations as well. It also means the non-standard, asm-only calling convention where returning via LR means failure and returning via R9 means success is now only used on legacy platforms that lack any kind of function return prediction, avoiding the associated performance impact. Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
191 lines
5.8 KiB
ArmAsm
191 lines
5.8 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
NetWinder Floating Point Emulator
|
|
(c) Rebel.COM, 1998
|
|
(c) 1998, 1999 Philip Blundell
|
|
|
|
Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
|
|
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/opcodes.h>
|
|
|
|
/* This is the kernel's entry point into the floating point emulator.
|
|
It is called from the kernel with code similar to this:
|
|
|
|
sub r4, r5, #4
|
|
ldrt r0, [r4] @ r0 = instruction
|
|
adrsvc al, r9, ret_from_exception @ r9 = normal FP return
|
|
adrsvc al, lr, fpundefinstr @ lr = undefined instr return
|
|
|
|
get_current_task r10
|
|
mov r8, #1
|
|
strb r8, [r10, #TSK_USED_MATH] @ set current->used_math
|
|
add r10, r10, #TSS_FPESAVE @ r10 = workspace
|
|
ldr r4, .LC2
|
|
ldr pc, [r4] @ Call FP emulator entry point
|
|
|
|
The kernel expects the emulator to return via one of two possible
|
|
points of return it passes to the emulator. The emulator, if
|
|
successful in its emulation, jumps to ret_from_exception (passed in
|
|
r9) and the kernel takes care of returning control from the trap to
|
|
the user code. If the emulator is unable to emulate the instruction,
|
|
it returns via _fpundefinstr (passed via lr) and the kernel halts the
|
|
user program with a core dump.
|
|
|
|
On entry to the emulator r10 points to an area of private FP workspace
|
|
reserved in the thread structure for this process. This is where the
|
|
emulator saves its registers across calls. The first word of this area
|
|
is used as a flag to detect the first time a process uses floating point,
|
|
so that the emulator startup cost can be avoided for tasks that don't
|
|
want it.
|
|
|
|
This routine does three things:
|
|
|
|
1) The kernel has created a struct pt_regs on the stack and saved the
|
|
user registers into it. See /usr/include/asm/proc/ptrace.h for details.
|
|
|
|
2) It calls EmulateAll to emulate a floating point instruction.
|
|
EmulateAll returns 1 if the emulation was successful, or 0 if not.
|
|
|
|
3) If an instruction has been emulated successfully, it looks ahead at
|
|
the next instruction. If it is a floating point instruction, it
|
|
executes the instruction, without returning to user space. In this
|
|
way it repeatedly looks ahead and executes floating point instructions
|
|
until it encounters a non floating point instruction, at which time it
|
|
returns via _fpreturn.
|
|
|
|
This is done to reduce the effect of the trap overhead on each
|
|
floating point instructions. GCC attempts to group floating point
|
|
instructions to allow the emulator to spread the cost of the trap over
|
|
several floating point instructions. */
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
.globl nwfpe_enter
|
|
nwfpe_enter:
|
|
mov r4, lr @ save the failure-return addresses
|
|
mov sl, sp @ we access the registers via 'sl'
|
|
|
|
ldr r5, [sp, #S_PC] @ get contents of PC;
|
|
mov r6, r0 @ save the opcode
|
|
emulate:
|
|
ldr r1, [sp, #S_PSR] @ fetch the PSR
|
|
bl arm_check_condition @ check the condition
|
|
cmp r0, #ARM_OPCODE_CONDTEST_PASS @ condition passed?
|
|
|
|
@ if condition code failed to match, next insn
|
|
bne next @ get the next instruction;
|
|
|
|
mov r0, r6 @ prepare for EmulateAll()
|
|
bl EmulateAll @ emulate the instruction
|
|
cmp r0, #0 @ was emulation successful
|
|
reteq r4 @ no, return failure
|
|
|
|
next:
|
|
uaccess_enable r3
|
|
.Lx1: ldrt r6, [r5], #4 @ get the next instruction and
|
|
@ increment PC
|
|
uaccess_disable r3
|
|
and r2, r6, #0x0F000000 @ test for FP insns
|
|
teq r2, #0x0C000000
|
|
teqne r2, #0x0D000000
|
|
teqne r2, #0x0E000000
|
|
retne r9 @ return ok if not a fp insn
|
|
|
|
str r5, [sp, #S_PC] @ update PC copy in regs
|
|
|
|
mov r0, r6 @ save a copy
|
|
b emulate @ check condition and emulate
|
|
|
|
@ We need to be prepared for the instructions at .Lx1 and .Lx2
|
|
@ to fault. Emit the appropriate exception gunk to fix things up.
|
|
@ ??? For some reason, faults can happen at .Lx2 even with a
|
|
@ plain LDR instruction. Weird, but it seems harmless.
|
|
.pushsection .text.fixup,"ax"
|
|
.align 2
|
|
.Lrep: str r4, [sp, #S_PC] @ retry current instruction
|
|
.Lfix: ret r9 @ let the user eat segfaults
|
|
.popsection
|
|
|
|
.pushsection __ex_table,"a"
|
|
.align 3
|
|
.long .Lx1, .Lfix
|
|
.popsection
|
|
|
|
@
|
|
@ Check whether the instruction is a co-processor instruction.
|
|
@ If yes, we need to call the relevant co-processor handler.
|
|
@ Only FPE instructions are dispatched here, everything else
|
|
@ is handled by undef hooks.
|
|
@
|
|
@ Emulators may wish to make use of the following registers:
|
|
@ r4 = PC value to resume execution after successful emulation
|
|
@ r9 = normal "successful" return address
|
|
@ lr = unrecognised instruction return address
|
|
@ IRQs enabled, FIQs enabled.
|
|
@
|
|
ENTRY(call_fpe)
|
|
mov r2, r4
|
|
sub r4, r4, #4 @ ARM instruction at user PC - 4
|
|
USERL( .Lrep, ldrt r0, [r4]) @ load opcode from user space
|
|
ARM_BE8(rev r0, r0) @ little endian instruction
|
|
|
|
uaccess_disable ip
|
|
|
|
get_thread_info r10 @ get current thread
|
|
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
|
|
reteq lr
|
|
and r8, r0, #0x00000f00 @ mask out CP number
|
|
#ifdef CONFIG_IWMMXT
|
|
@ Test if we need to give access to iWMMXt coprocessors
|
|
ldr r5, [r10, #TI_FLAGS]
|
|
rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only
|
|
movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1)
|
|
movcs r0, sp @ pass struct pt_regs
|
|
bcs iwmmxt_task_enable
|
|
#endif
|
|
add pc, pc, r8, lsr #6
|
|
nop
|
|
|
|
ret lr @ CP#0
|
|
b do_fpe @ CP#1 (FPE)
|
|
b do_fpe @ CP#2 (FPE)
|
|
ret lr @ CP#3
|
|
ret lr @ CP#4
|
|
ret lr @ CP#5
|
|
ret lr @ CP#6
|
|
ret lr @ CP#7
|
|
ret lr @ CP#8
|
|
ret lr @ CP#9
|
|
ret lr @ CP#10 (VFP)
|
|
ret lr @ CP#11 (VFP)
|
|
ret lr @ CP#12
|
|
ret lr @ CP#13
|
|
ret lr @ CP#14 (Debug)
|
|
ret lr @ CP#15 (Control)
|
|
|
|
do_fpe:
|
|
add r10, r10, #TI_FPSTATE @ r10 = workspace
|
|
ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point
|
|
|
|
@
|
|
@ The FP module is called with these registers set:
|
|
@ r0 = instruction
|
|
@ r2 = PC+4
|
|
@ r9 = normal "successful" return address
|
|
@ r10 = FP workspace
|
|
@ lr = unrecognised FP instruction return address
|
|
@
|
|
|
|
.pushsection .data
|
|
.align 2
|
|
ENTRY(fp_enter)
|
|
.word no_fp
|
|
.popsection
|
|
|
|
no_fp:
|
|
ret lr
|
|
ENDPROC(no_fp)
|