1
linux/arch/powerpc/include/asm/ppc_asm.h

907 lines
23 KiB
C
Raw Permalink Normal View History

/*
* Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
*/
#ifndef _ASM_POWERPC_PPC_ASM_H
#define _ASM_POWERPC_PPC_ASM_H
#include <linux/stringify.h>
#include <asm/asm-compat.h>
#include <asm/processor.h>
#include <asm/ppc-opcode.h>
powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-26 12:56:43 -07:00
#include <asm/firmware.h>
#include <asm/feature-fixups.h>
powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto Using asm goto in __WARN_FLAGS() and WARN_ON() allows more flexibility to GCC. For that add an entry to the exception table so that program_check_exception() knowns where to resume execution after a WARNING. Here are two exemples. The first one is done on PPC32 (which benefits from the previous patch), the second is on PPC64. unsigned long test(struct pt_regs *regs) { int ret; WARN_ON(regs->msr & MSR_PR); return regs->gpr[3]; } unsigned long test9w(unsigned long a, unsigned long b) { if (WARN_ON(!b)) return 0; return a / b; } Before the patch: 000003a8 <test>: 3a8: 81 23 00 84 lwz r9,132(r3) 3ac: 71 29 40 00 andi. r9,r9,16384 3b0: 40 82 00 0c bne 3bc <test+0x14> 3b4: 80 63 00 0c lwz r3,12(r3) 3b8: 4e 80 00 20 blr 3bc: 0f e0 00 00 twui r0,0 3c0: 80 63 00 0c lwz r3,12(r3) 3c4: 4e 80 00 20 blr 0000000000000bf0 <.test9w>: bf0: 7c 89 00 74 cntlzd r9,r4 bf4: 79 29 d1 82 rldicl r9,r9,58,6 bf8: 0b 09 00 00 tdnei r9,0 bfc: 2c 24 00 00 cmpdi r4,0 c00: 41 82 00 0c beq c0c <.test9w+0x1c> c04: 7c 63 23 92 divdu r3,r3,r4 c08: 4e 80 00 20 blr c0c: 38 60 00 00 li r3,0 c10: 4e 80 00 20 blr After the patch: 000003a8 <test>: 3a8: 81 23 00 84 lwz r9,132(r3) 3ac: 71 29 40 00 andi. r9,r9,16384 3b0: 40 82 00 0c bne 3bc <test+0x14> 3b4: 80 63 00 0c lwz r3,12(r3) 3b8: 4e 80 00 20 blr 3bc: 0f e0 00 00 twui r0,0 0000000000000c50 <.test9w>: c50: 7c 89 00 74 cntlzd r9,r4 c54: 79 29 d1 82 rldicl r9,r9,58,6 c58: 0b 09 00 00 tdnei r9,0 c5c: 7c 63 23 92 divdu r3,r3,r4 c60: 4e 80 00 20 blr c70: 38 60 00 00 li r3,0 c74: 4e 80 00 20 blr In the first exemple, we see GCC doesn't need to duplicate what happens after the trap. In the second exemple, we see that GCC doesn't need to emit a test and a branch in the likely path in addition to the trap. We've got some WARN_ON() in .softirqentry.text section so it needs to be added in the OTHER_TEXT_SECTIONS in modpost.c Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/389962b1b702e3c78d169e59bcfac56282889173.1618331882.git.christophe.leroy@csgroup.eu
2021-04-13 09:38:10 -07:00
#include <asm/extable.h>
#ifdef __ASSEMBLY__
#define SZL (BITS_PER_LONG/8)
/*
* This expands to a sequence of operations with reg incrementing from
* start to end inclusive, of this form:
*
* op reg, (offset + (width * reg))(base)
*
* Note that offset is not the offset of the first operation unless start
* is zero (or width is zero).
*/
.macro OP_REGS op, width, start, end, base, offset
.Lreg=\start
.rept (\end - \start + 1)
\op .Lreg, \offset + \width * .Lreg(\base)
.Lreg=.Lreg+1
.endr
.endm
/*
* This expands to a sequence of register clears for regs start to end
* inclusive, of the form:
*
* li rN, 0
*/
.macro ZEROIZE_REGS start, end
.Lreg=\start
.rept (\end - \start + 1)
li .Lreg, 0
.Lreg=.Lreg+1
.endr
.endm
/*
* Macros for storing registers into and loading registers from
* exception frames.
*/
#ifdef __powerpc64__
#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0
#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0
#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base)
#define REST_NVGPRS(base) REST_GPRS(14, 31, base)
#else
#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0
#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0
#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base)
#define REST_NVGPRS(base) REST_GPRS(13, 31, base)
#endif
#define ZEROIZE_GPRS(start, end) ZEROIZE_REGS start, end
#ifdef __powerpc64__
#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(14, 31)
#else
#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(13, 31)
#endif
#define ZEROIZE_GPR(n) ZEROIZE_GPRS(n, n)
#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
#define REST_GPR(n, base) REST_GPRS(n, n, base)
/* macros for handling user register sanitisation */
#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \
ZEROIZE_GPRS(5, 12); \
ZEROIZE_NVGPRS()
#define SANITIZE_GPR(n) ZEROIZE_GPR(n)
#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end)
#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS()
#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1)
#define HANDLER_RESTORE_NVGPRS()
#else
#define SANITIZE_SYSCALL_GPRS()
#define SANITIZE_GPR(n)
#define SANITIZE_GPRS(start, end)
#define SANITIZE_NVGPRS()
#define SANITIZE_RESTORE_NVGPRS()
#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1)
#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
#define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
#define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
#define SAVE_8FPRS(n, base) SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
#define SAVE_16FPRS(n, base) SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
#define SAVE_32FPRS(n, base) SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
#define REST_FPR(n, base) lfd n,8*TS_FPRWIDTH*(n)(base)
#define REST_2FPRS(n, base) REST_FPR(n, base); REST_FPR(n+1, base)
#define REST_4FPRS(n, base) REST_2FPRS(n, base); REST_2FPRS(n+2, base)
#define REST_8FPRS(n, base) REST_4FPRS(n, base); REST_4FPRS(n+4, base)
#define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base)
#define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base)
#define SAVE_VR(n,b,base) li b,16*(n); stvx n,base,b
#define SAVE_2VRS(n,b,base) SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
#define SAVE_4VRS(n,b,base) SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
#define SAVE_8VRS(n,b,base) SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
#define SAVE_16VRS(n,b,base) SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
#define SAVE_32VRS(n,b,base) SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
#define REST_VR(n,b,base) li b,16*(n); lvx n,base,b
#define REST_2VRS(n,b,base) REST_VR(n,b,base); REST_VR(n+1,b,base)
#define REST_4VRS(n,b,base) REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
#define REST_8VRS(n,b,base) REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
#define REST_16VRS(n,b,base) REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
#define REST_32VRS(n,b,base) REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
#ifdef __BIG_ENDIAN__
#define STXVD2X_ROT(n,b,base) STXVD2X(n,b,base)
#define LXVD2X_ROT(n,b,base) LXVD2X(n,b,base)
#else
#define STXVD2X_ROT(n,b,base) XXSWAPD(n,n); \
STXVD2X(n,b,base); \
XXSWAPD(n,n)
#define LXVD2X_ROT(n,b,base) LXVD2X(n,b,base); \
XXSWAPD(n,n)
#endif
/* Save the lower 32 VSRs in the thread VSR region */
#define SAVE_VSR(n,b,base) li b,16*(n); STXVD2X_ROT(n,R##base,R##b)
#define SAVE_2VSRS(n,b,base) SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
#define SAVE_4VSRS(n,b,base) SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
#define SAVE_8VSRS(n,b,base) SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
#define SAVE_16VSRS(n,b,base) SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
#define SAVE_32VSRS(n,b,base) SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
#define REST_VSR(n,b,base) li b,16*(n); LXVD2X_ROT(n,R##base,R##b)
#define REST_2VSRS(n,b,base) REST_VSR(n,b,base); REST_VSR(n+1,b,base)
#define REST_4VSRS(n,b,base) REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
#define REST_8VSRS(n,b,base) REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
#define REST_16VSRS(n,b,base) REST_8VSRS(n,b,base); REST_8VSRS(n+8,b,base)
#define REST_32VSRS(n,b,base) REST_16VSRS(n,b,base); REST_16VSRS(n+16,b,base)
/*
* b = base register for addressing, o = base offset from register of 1st EVR
* n = first EVR, s = scratch
*/
#define SAVE_EVR(n,s,b,o) evmergehi s,s,n; stw s,o+4*(n)(b)
#define SAVE_2EVRS(n,s,b,o) SAVE_EVR(n,s,b,o); SAVE_EVR(n+1,s,b,o)
#define SAVE_4EVRS(n,s,b,o) SAVE_2EVRS(n,s,b,o); SAVE_2EVRS(n+2,s,b,o)
#define SAVE_8EVRS(n,s,b,o) SAVE_4EVRS(n,s,b,o); SAVE_4EVRS(n+4,s,b,o)
#define SAVE_16EVRS(n,s,b,o) SAVE_8EVRS(n,s,b,o); SAVE_8EVRS(n+8,s,b,o)
#define SAVE_32EVRS(n,s,b,o) SAVE_16EVRS(n,s,b,o); SAVE_16EVRS(n+16,s,b,o)
#define REST_EVR(n,s,b,o) lwz s,o+4*(n)(b); evmergelo n,s,n
#define REST_2EVRS(n,s,b,o) REST_EVR(n,s,b,o); REST_EVR(n+1,s,b,o)
#define REST_4EVRS(n,s,b,o) REST_2EVRS(n,s,b,o); REST_2EVRS(n+2,s,b,o)
#define REST_8EVRS(n,s,b,o) REST_4EVRS(n,s,b,o); REST_4EVRS(n+4,s,b,o)
#define REST_16EVRS(n,s,b,o) REST_8EVRS(n,s,b,o); REST_8EVRS(n+8,s,b,o)
#define REST_32EVRS(n,s,b,o) REST_16EVRS(n,s,b,o); REST_16EVRS(n+16,s,b,o)
/* Macros to adjust thread priority for hardware multithreading */
#define HMT_VERY_LOW or 31,31,31 # very low priority
#define HMT_LOW or 1,1,1
#define HMT_MEDIUM_LOW or 6,6,6 # medium low priority
#define HMT_MEDIUM or 2,2,2
#define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority
#define HMT_HIGH or 3,3,3
#define HMT_EXTRA_HIGH or 7,7,7 # power7 only
#ifdef CONFIG_PPC64
#define ULONG_SIZE 8
#else
#define ULONG_SIZE 4
#endif
#define __VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
#define VCPU_GPR(n) __VCPU_GPR(__REG_##n)
#ifdef __KERNEL__
powerpc/vdso: Fix DOTSYM for 32-bit LE VDSO Skirmisher reported on IRC that the 32-bit LE VDSO was hanging. This turned out to be due to a branch to self in eg. __kernel_gettimeofday. Looking at the disassembly with objdump -dR shows why: 00000528 <__kernel_gettimeofday>: 528: f0 ff 21 94 stwu r1,-16(r1) 52c: a6 02 08 7c mflr r0 530: f0 ff 21 94 stwu r1,-16(r1) 534: 14 00 01 90 stw r0,20(r1) 538: 05 00 9f 42 bcl 20,4*cr7+so,53c <__kernel_gettimeofday+0x14> 53c: a6 02 a8 7c mflr r5 540: ff ff a5 3c addis r5,r5,-1 544: c4 fa a5 38 addi r5,r5,-1340 548: f0 00 a5 38 addi r5,r5,240 54c: 01 00 00 48 bl 54c <__kernel_gettimeofday+0x24> 54c: R_PPC_REL24 .__c_kernel_gettimeofday Because we don't process relocations for the VDSO, this branch remains a branch from 0x54c to 0x54c. With the preceding patch to prohibit R_PPC_REL24 relocations, we instead get a build failure: 0000054c R_PPC_REL24 .__c_kernel_gettimeofday 00000598 R_PPC_REL24 .__c_kernel_clock_gettime 000005e4 R_PPC_REL24 .__c_kernel_clock_gettime64 00000630 R_PPC_REL24 .__c_kernel_clock_getres 0000067c R_PPC_REL24 .__c_kernel_time arch/powerpc/kernel/vdso32/vdso32.so.dbg: dynamic relocations are not supported The root cause is that we're branching to `.__c_kernel_gettimeofday`. But this is 32-bit LE code, which doesn't use function descriptors, so there are no dot symbols. The reason we're trying to branch to a dot symbol is because we're using the DOTSYM macro, but the ifdefs we use to define the DOTSYM macro do not currently work for 32-bit LE. So like previous commits we need to differentiate if the current compilation unit is 64-bit, rather than the kernel as a whole. ie. switch from CONFIG_PPC64 to __powerpc64__. With that fixed 32-bit LE code gets the empty version of DOTSYM, which just resolves to the original symbol name, leading to a direct branch and no relocations: 000003f8 <__kernel_gettimeofday>: 3f8: f0 ff 21 94 stwu r1,-16(r1) 3fc: a6 02 08 7c mflr r0 400: f0 ff 21 94 stwu r1,-16(r1) 404: 14 00 01 90 stw r0,20(r1) 408: 05 00 9f 42 bcl 20,4*cr7+so,40c <__kernel_gettimeofday+0x14> 40c: a6 02 a8 7c mflr r5 410: ff ff a5 3c addis r5,r5,-1 414: f4 fb a5 38 addi r5,r5,-1036 418: f0 00 a5 38 addi r5,r5,240 41c: 85 06 00 48 bl aa0 <__c_kernel_gettimeofday> Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Reported-by: "Will Springer <skirmisher@protonmail.com>" Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201218111619.1206391-3-mpe@ellerman.id.au
2020-12-18 04:16:19 -07:00
/*
* Used to name C functions called from asm
*/
#ifdef CONFIG_PPC_KERNEL_PCREL
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#define CFUNC(name) name@notoc
#else
#define CFUNC(name) name
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#endif
powerpc/vdso: Fix DOTSYM for 32-bit LE VDSO Skirmisher reported on IRC that the 32-bit LE VDSO was hanging. This turned out to be due to a branch to self in eg. __kernel_gettimeofday. Looking at the disassembly with objdump -dR shows why: 00000528 <__kernel_gettimeofday>: 528: f0 ff 21 94 stwu r1,-16(r1) 52c: a6 02 08 7c mflr r0 530: f0 ff 21 94 stwu r1,-16(r1) 534: 14 00 01 90 stw r0,20(r1) 538: 05 00 9f 42 bcl 20,4*cr7+so,53c <__kernel_gettimeofday+0x14> 53c: a6 02 a8 7c mflr r5 540: ff ff a5 3c addis r5,r5,-1 544: c4 fa a5 38 addi r5,r5,-1340 548: f0 00 a5 38 addi r5,r5,240 54c: 01 00 00 48 bl 54c <__kernel_gettimeofday+0x24> 54c: R_PPC_REL24 .__c_kernel_gettimeofday Because we don't process relocations for the VDSO, this branch remains a branch from 0x54c to 0x54c. With the preceding patch to prohibit R_PPC_REL24 relocations, we instead get a build failure: 0000054c R_PPC_REL24 .__c_kernel_gettimeofday 00000598 R_PPC_REL24 .__c_kernel_clock_gettime 000005e4 R_PPC_REL24 .__c_kernel_clock_gettime64 00000630 R_PPC_REL24 .__c_kernel_clock_getres 0000067c R_PPC_REL24 .__c_kernel_time arch/powerpc/kernel/vdso32/vdso32.so.dbg: dynamic relocations are not supported The root cause is that we're branching to `.__c_kernel_gettimeofday`. But this is 32-bit LE code, which doesn't use function descriptors, so there are no dot symbols. The reason we're trying to branch to a dot symbol is because we're using the DOTSYM macro, but the ifdefs we use to define the DOTSYM macro do not currently work for 32-bit LE. So like previous commits we need to differentiate if the current compilation unit is 64-bit, rather than the kernel as a whole. ie. switch from CONFIG_PPC64 to __powerpc64__. With that fixed 32-bit LE code gets the empty version of DOTSYM, which just resolves to the original symbol name, leading to a direct branch and no relocations: 000003f8 <__kernel_gettimeofday>: 3f8: f0 ff 21 94 stwu r1,-16(r1) 3fc: a6 02 08 7c mflr r0 400: f0 ff 21 94 stwu r1,-16(r1) 404: 14 00 01 90 stw r0,20(r1) 408: 05 00 9f 42 bcl 20,4*cr7+so,40c <__kernel_gettimeofday+0x14> 40c: a6 02 a8 7c mflr r5 410: ff ff a5 3c addis r5,r5,-1 414: f4 fb a5 38 addi r5,r5,-1036 418: f0 00 a5 38 addi r5,r5,240 41c: 85 06 00 48 bl aa0 <__c_kernel_gettimeofday> Fixes: ab037dd87a2f ("powerpc/vdso: Switch VDSO to generic C implementation.") Reported-by: "Will Springer <skirmisher@protonmail.com>" Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201218111619.1206391-3-mpe@ellerman.id.au
2020-12-18 04:16:19 -07:00
/*
* We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
* version below in the else case of the ifdef.
*/
#ifdef __powerpc64__
#define STACKFRAMESIZE 256
#define __STK_REG(i) (112 + ((i)-14)*8)
#define STK_REG(i) __STK_REG(__REG_##i)
#ifdef CONFIG_PPC64_ELF_ABI_V2
#define STK_GOT 24
#define STK_PARAM_AREA 32
#else
#define STK_GOT 40
#define STK_PARAM_AREA 48
#endif
#define __STK_PARAM(i) (STK_PARAM_AREA + ((i)-3)*8)
#define STK_PARAM(i) __STK_PARAM(__REG_##i)
#ifdef CONFIG_PPC64_ELF_ABI_V2
#define _GLOBAL(name) \
.align 2 ; \
.type name,@function; \
.globl name; \
name:
#ifdef CONFIG_PPC_KERNEL_PCREL
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#define _GLOBAL_TOC _GLOBAL
#else
#define _GLOBAL_TOC(name) \
.align 2 ; \
.type name,@function; \
.globl name; \
name: \
0: addis r2,r12,(.TOC.-0b)@ha; \
addi r2,r2,(.TOC.-0b)@l; \
.localentry name,.-name
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#endif
#define DOTSYM(a) a
#else
#define XGLUE(a,b) a##b
#define GLUE(a,b) XGLUE(a,b)
#define _GLOBAL(name) \
.align 2 ; \
.globl name; \
.globl GLUE(.,name); \
.pushsection ".opd","aw"; \
name: \
.quad GLUE(.,name); \
.quad .TOC.@tocbase; \
.quad 0; \
.popsection; \
.type GLUE(.,name),@function; \
GLUE(.,name):
#define _GLOBAL_TOC(name) _GLOBAL(name)
#define DOTSYM(a) GLUE(.,a)
#endif
#else /* 32-bit */
#define _GLOBAL(n) \
.globl n; \
n:
#define _GLOBAL_TOC(name) _GLOBAL(name)
powerpc/vdso: Prepare for switching VDSO to generic C implementation. Prepare for switching VDSO to generic C implementation in following patch. Here, we: - Prepare the helpers to call the C VDSO functions - Prepare the required callbacks for the C VDSO functions - Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES - Add the C trampolines to the generic C VDSO functions powerpc is a bit special for VDSO as well as system calls in the way that it requires setting CR SO bit which cannot be done in C. Therefore, entry/exit needs to be performed in ASM. Implementing __arch_get_vdso_data() would clobber the link register, requiring the caller to save it. As the ASM calling function already has to set a stack frame and saves the link register before calling the C vdso function, retriving the vdso data pointer there is lighter. Implement __arch_vdso_capable() and always return true. Provide vdso_shift_ns(), as the generic x >> s gives the following bad result: 18: 35 25 ff e0 addic. r9,r5,-32 1c: 41 80 00 10 blt 2c <shift+0x14> 20: 7c 64 4c 30 srw r4,r3,r9 24: 38 60 00 00 li r3,0 ... 2c: 54 69 08 3c rlwinm r9,r3,1,0,30 30: 21 45 00 1f subfic r10,r5,31 34: 7c 84 2c 30 srw r4,r4,r5 38: 7d 29 50 30 slw r9,r9,r10 3c: 7c 63 2c 30 srw r3,r3,r5 40: 7d 24 23 78 or r4,r9,r4 In our case the shift is always <= 32. In addition, the upper 32 bits of the result are likely nul. Lets GCC know it, it also optimises the following calculations. With the patch, we get: 0: 21 25 00 20 subfic r9,r5,32 4: 7c 69 48 30 slw r9,r3,r9 8: 7c 84 2c 30 srw r4,r4,r5 c: 7d 24 23 78 or r4,r9,r4 10: 7c 63 2c 30 srw r3,r3,r5 Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-26 06:10:03 -07:00
#define DOTSYM(a) a
#endif
/*
* __kprobes (the C annotation) puts the symbol into the .kprobes.text
* section, which gets emitted at the end of regular text.
*
* _ASM_NOKPROBE_SYMBOL and NOKPROBE_SYMBOL just adds the symbol to
* a blacklist. The former is for core kprobe functions/data, the
* latter is for those that incdentially must be excluded from probing
* and allows them to be linked at more optimal location within text.
*/
#ifdef CONFIG_KPROBES
#define _ASM_NOKPROBE_SYMBOL(entry) \
.pushsection "_kprobe_blacklist","aw"; \
PPC_LONG (entry) ; \
.popsection
#else
#define _ASM_NOKPROBE_SYMBOL(entry)
#endif
#define FUNC_START(name) _GLOBAL(name)
#define FUNC_END(name)
/*
* LOAD_REG_IMMEDIATE(rn, expr)
* Loads the value of the constant expression 'expr' into register 'rn'
* using immediate instructions only. Use this when it's important not
* to reference other data (i.e. on ppc64 when the TOC pointer is not
* valid) and when 'expr' is a constant or absolute address.
*
* LOAD_REG_ADDR(rn, name)
* Loads the address of label 'name' into register 'rn'. Use this when
* you don't particularly need immediate instructions only, but you need
* the whole address in one register (e.g. it's a structure address and
* you want to access various offsets within it). On ppc32 this is
* identical to LOAD_REG_IMMEDIATE.
*
* LOAD_REG_ADDR_PIC(rn, name)
* Loads the address of label 'name' into register 'run'. Use this when
* the kernel doesn't run at the linked or relocated address. Please
* note that this macro will clobber the lr register.
*
* LOAD_REG_ADDRBASE(rn, name)
* ADDROFF(name)
* LOAD_REG_ADDRBASE loads part of the address of label 'name' into
* register 'rn'. ADDROFF(name) returns the remainder of the address as
* a constant expression. ADDROFF(name) is a signed expression < 16 bits
* in size, so is suitable for use directly as an offset in load and store
* instructions. Use this when loading/storing a single word or less as:
* LOAD_REG_ADDRBASE(rX, name)
* ld rY,ADDROFF(name)(rX)
*/
/* Be careful, this will clobber the lr register. */
#define LOAD_REG_ADDR_PIC(reg, name) \
bcl 20,31,$+4; \
0: mflr reg; \
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
powerpc: Fix 64 bit builds with binutils 2.24 With binutils 2.24, various 64 bit builds fail with relocation errors such as arch/powerpc/kernel/built-in.o: In function `exc_debug_crit_book3e': (.text+0x165ee): relocation truncated to fit: R_PPC64_ADDR16_HI against symbol `interrupt_base_book3e' defined in .text section in arch/powerpc/kernel/built-in.o arch/powerpc/kernel/built-in.o: In function `exc_debug_crit_book3e': (.text+0x16602): relocation truncated to fit: R_PPC64_ADDR16_HI against symbol `interrupt_end_book3e' defined in .text section in arch/powerpc/kernel/built-in.o The assembler maintainer says: I changed the ABI, something that had to be done but unfortunately happens to break the booke kernel code. When building up a 64-bit value with lis, ori, shl, oris, ori or similar sequences, you now should use @high and @higha in place of @h and @ha. @h and @ha (and their associated relocs R_PPC64_ADDR16_HI and R_PPC64_ADDR16_HA) now report overflow if the value is out of 32-bit signed range. ie. @h and @ha assume you're building a 32-bit value. This is needed to report out-of-range -mcmodel=medium toc pointer offsets in @toc@h and @toc@ha expressions, and for consistency I did the same for all other @h and @ha relocs. Replacing @h with @high in one strategic location fixes the relocation errors. This has to be done conditionally since the assembler either supports @h or @high but not both. Cc: <stable@vger.kernel.org> Signed-off-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2014-05-15 09:33:42 -07:00
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
.macro __LOAD_REG_IMMEDIATE_32 r, x
.if (\x) >= 0x8000 || (\x) < -0x8000
lis \r, (\x)@__AS_ATHIGH
.if (\x) & 0xffff != 0
ori \r, \r, (\x)@l
.endif
.else
li \r, (\x)@l
.endif
.endm
.macro __LOAD_REG_IMMEDIATE r, x
.if (\x) >= 0x80000000 || (\x) < -0x80000000
__LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
sldi \r, \r, 32
.if (\x) & 0xffff0000 != 0
oris \r, \r, (\x)@__AS_ATHIGH
.endif
.if (\x) & 0xffff != 0
ori \r, \r, (\x)@l
.endif
.else
__LOAD_REG_IMMEDIATE_32 \r, \x
.endif
.endm
#ifdef __powerpc64__
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#ifdef CONFIG_PPC_KERNEL_PCREL
#define __LOAD_PACA_TOC(reg) \
li reg,-1
#else
#define __LOAD_PACA_TOC(reg) \
ld reg,PACATOC(r13)
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#endif
#define LOAD_PACA_TOC() \
__LOAD_PACA_TOC(r2)
#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \
lis tmp, (expr)@highest; \
lis reg, (expr)@__AS_ATHIGH; \
ori tmp, tmp, (expr)@higher; \
ori reg, reg, (expr)@l; \
rldimi reg, tmp, 32, 0
#ifdef CONFIG_PPC_KERNEL_PCREL
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#define LOAD_REG_ADDR(reg,name) \
pla reg,name@pcrel
#else
#define LOAD_REG_ADDR(reg,name) \
addis reg,r2,name@toc@ha; \
addi reg,reg,name@toc@l
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-07 19:17:51 -07:00
#endif
#ifdef CONFIG_PPC_BOOK3E_64
/*
* This is used in register-constrained interrupt handlers. Not to be used
* by BOOK3S. ld complains with "got/toc optimization is not supported" if r2
* is not used for the TOC offset, so use @got(tocreg). If the interrupt
* handlers saved r2 instead, LOAD_REG_ADDR could be used.
*/
#define LOAD_REG_ADDR_ALTTOC(reg,tocreg,name) \
ld reg,name@got(tocreg)
#endif
#define LOAD_REG_ADDRBASE(reg,name) LOAD_REG_ADDR(reg,name)
#define ADDROFF(name) 0
/* offsets for stack frame layout */
#define LRSAVE 16
/*
* GCC stack frames follow a different pattern on 32 vs 64. This can be used
* to make asm frames be consistent with C.
*/
#define PPC_CREATE_STACK_FRAME(size) \
mflr r0; \
std r0,16(r1); \
stdu r1,-(size)(r1)
#else /* 32-bit */
#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
/* offsets for stack frame layout */
#define LRSAVE 4
#define PPC_CREATE_STACK_FRAME(size) \
stwu r1,-(size)(r1); \
mflr r0; \
stw r0,(size+4)(r1)
#endif
/* various errata or part fixups */
#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500)
#define MFTB(dest) \
90: mfspr dest, SPRN_TBRL; \
BEGIN_FTR_SECTION_NESTED(96); \
cmpwi dest,0; \
beq- 90b; \
END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
#else
#define MFTB(dest) MFTBL(dest)
#endif
#ifdef CONFIG_PPC_8xx
#define MFTBL(dest) mftb dest
#define MFTBU(dest) mftbu dest
#else
#define MFTBL(dest) mfspr dest, SPRN_TBRL
#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
#ifndef CONFIG_SMP
#define TLBSYNC
#else
#define TLBSYNC tlbsync; sync
#endif
#ifdef CONFIG_PPC64
#define MTOCRF(FXM, RS) \
BEGIN_FTR_SECTION_NESTED(848); \
mtcrf (FXM), RS; \
FTR_SECTION_ELSE_NESTED(848); \
mtocrf (FXM), RS; \
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
#endif
/*
* This instruction is not implemented on the PPC 603 or 601; however, on
* the 403GCX and 405GP tlbia IS defined and tlbie is not.
* All of these instructions exist in the 8xx, they have magical powers,
* and they must be used.
*/
#if !defined(CONFIG_44x) && !defined(CONFIG_PPC_8xx)
#define tlbia \
li r4,1024; \
mtctr r4; \
lis r4,KERNELBASE@h; \
.machine push; \
.machine "power4"; \
0: tlbie r4; \
.machine pop; \
addi r4,r4,0x1000; \
bdnz 0b
#endif
#ifdef CONFIG_IBM440EP_ERR42
#define PPC440EP_ERR42 isync
#else
#define PPC440EP_ERR42
#endif
/* The following stops all load and store data streams associated with stream
* ID (ie. streams created explicitly). The embedded and server mnemonics for
* dcbt are different so this must only be used for server.
*/
#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch) \
lis scratch,0x60000000@h; \
.machine push; \
.machine power4; \
dcbt 0,scratch,0b01010; \
.machine pop;
#define DCBT_SETUP_STREAMS(from, from_parms, to, to_parms, scratch) \
lis scratch,0x8000; /* GO=1 */ \
clrldi scratch,scratch,32; \
.machine push; \
.machine power4; \
/* setup read stream 0 */ \
dcbt 0,from,0b01000; /* addr from */ \
dcbt 0,from_parms,0b01010; /* length and depth from */ \
/* setup write stream 1 */ \
dcbtst 0,to,0b01000; /* addr to */ \
dcbtst 0,to_parms,0b01010; /* length and depth to */ \
eieio; \
dcbt 0,scratch,0b01010; /* all streams GO */ \
.machine pop;
/*
* toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
* keep the address intact to be compatible with code shared with
* 32-bit classic.
*
* On the other hand, I find it useful to have them behave as expected
* by their name (ie always do the addition) on 64-bit BookE
*/
#if defined(CONFIG_BOOKE) && !defined(CONFIG_PPC64)
#define toreal(rd)
#define fromreal(rd)
/*
* We use addis to ensure compatibility with the "classic" ppc versions of
* these macros, which use rs = 0 to get the tophys offset in rd, rather than
* converting the address in r0, and so this version has to do that too
* (i.e. set register rd to 0 when rs == 0).
*/
#define tophys(rd,rs) \
addis rd,rs,0
#define tovirt(rd,rs) \
addis rd,rs,0
#elif defined(CONFIG_PPC64)
#define toreal(rd) /* we can access c000... in real mode */
#define fromreal(rd)
#define tophys(rd,rs) \
clrldi rd,rs,2
#define tovirt(rd,rs) \
rotldi rd,rs,16; \
ori rd,rd,((KERNELBASE>>48)&0xFFFF);\
rotldi rd,rd,48
#else
#define toreal(rd) tophys(rd,rd)
#define fromreal(rd) tovirt(rd,rd)
#define tophys(rd, rs) addis rd, rs, -PAGE_OFFSET@h
#define tovirt(rd, rs) addis rd, rs, PAGE_OFFSET@h
#endif
#ifdef CONFIG_PPC_BOOK3S_64
#define MTMSRD(r) mtmsrd r
#define MTMSR_EERI(reg) mtmsrd reg,1
#else
#define MTMSRD(r) mtmsr r
#define MTMSR_EERI(reg) mtmsr reg
#endif
#endif /* __KERNEL__ */
/* The boring bits... */
/* Condition Register Bit Fields */
#define cr0 0
#define cr1 1
#define cr2 2
#define cr3 3
#define cr4 4
#define cr5 5
#define cr6 6
#define cr7 7
/*
* General Purpose Registers (GPRs)
*
* The lower case r0-r31 should be used in preference to the upper
* case R0-R31 as they provide more error checking in the assembler.
* Use R0-31 only when really nessesary.
*/
#define r0 %r0
#define r1 %r1
#define r2 %r2
#define r3 %r3
#define r4 %r4
#define r5 %r5
#define r6 %r6
#define r7 %r7
#define r8 %r8
#define r9 %r9
#define r10 %r10
#define r11 %r11
#define r12 %r12
#define r13 %r13
#define r14 %r14
#define r15 %r15
#define r16 %r16
#define r17 %r17
#define r18 %r18
#define r19 %r19
#define r20 %r20
#define r21 %r21
#define r22 %r22
#define r23 %r23
#define r24 %r24
#define r25 %r25
#define r26 %r26
#define r27 %r27
#define r28 %r28
#define r29 %r29
#define r30 %r30
#define r31 %r31
/* Floating Point Registers (FPRs) */
#define fr0 0
#define fr1 1
#define fr2 2
#define fr3 3
#define fr4 4
#define fr5 5
#define fr6 6
#define fr7 7
#define fr8 8
#define fr9 9
#define fr10 10
#define fr11 11
#define fr12 12
#define fr13 13
#define fr14 14
#define fr15 15
#define fr16 16
#define fr17 17
#define fr18 18
#define fr19 19
#define fr20 20
#define fr21 21
#define fr22 22
#define fr23 23
#define fr24 24
#define fr25 25
#define fr26 26
#define fr27 27
#define fr28 28
#define fr29 29
#define fr30 30
#define fr31 31
/* AltiVec Registers (VPRs) */
#define v0 0
#define v1 1
#define v2 2
#define v3 3
#define v4 4
#define v5 5
#define v6 6
#define v7 7
#define v8 8
#define v9 9
#define v10 10
#define v11 11
#define v12 12
#define v13 13
#define v14 14
#define v15 15
#define v16 16
#define v17 17
#define v18 18
#define v19 19
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31
/* VSX Registers (VSRs) */
#define vs0 0
#define vs1 1
#define vs2 2
#define vs3 3
#define vs4 4
#define vs5 5
#define vs6 6
#define vs7 7
#define vs8 8
#define vs9 9
#define vs10 10
#define vs11 11
#define vs12 12
#define vs13 13
#define vs14 14
#define vs15 15
#define vs16 16
#define vs17 17
#define vs18 18
#define vs19 19
#define vs20 20
#define vs21 21
#define vs22 22
#define vs23 23
#define vs24 24
#define vs25 25
#define vs26 26
#define vs27 27
#define vs28 28
#define vs29 29
#define vs30 30
#define vs31 31
#define vs32 32
#define vs33 33
#define vs34 34
#define vs35 35
#define vs36 36
#define vs37 37
#define vs38 38
#define vs39 39
#define vs40 40
#define vs41 41
#define vs42 42
#define vs43 43
#define vs44 44
#define vs45 45
#define vs46 46
#define vs47 47
#define vs48 48
#define vs49 49
#define vs50 50
#define vs51 51
#define vs52 52
#define vs53 53
#define vs54 54
#define vs55 55
#define vs56 56
#define vs57 57
#define vs58 58
#define vs59 59
#define vs60 60
#define vs61 61
#define vs62 62
#define vs63 63
/* SPE Registers (EVPRs) */
#define evr0 0
#define evr1 1
#define evr2 2
#define evr3 3
#define evr4 4
#define evr5 5
#define evr6 6
#define evr7 7
#define evr8 8
#define evr9 9
#define evr10 10
#define evr11 11
#define evr12 12
#define evr13 13
#define evr14 14
#define evr15 15
#define evr16 16
#define evr17 17
#define evr18 18
#define evr19 19
#define evr20 20
#define evr21 21
#define evr22 22
#define evr23 23
#define evr24 24
#define evr25 25
#define evr26 26
#define evr27 27
#define evr28 28
#define evr29 29
#define evr30 30
#define evr31 31
#define RFSCV .long 0x4c0000a4
/*
* Create an endian fixup trampoline
*
* This starts with a "tdi 0,0,0x48" instruction which is
* essentially a "trap never", and thus akin to a nop.
*
* The opcode for this instruction read with the wrong endian
* however results in a b . + 8
*
* So essentially we use that trick to execute the following
* trampoline in "reverse endian" if we are running with the
* MSR_LE bit set the "wrong" way for whatever endianness the
* kernel is built for.
*/
#ifdef CONFIG_PPC_BOOK3E_64
#define FIXUP_ENDIAN
#else
/*
* This version may be used in HV or non-HV context.
* MSR[EE] must be disabled.
*/
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
b 191f; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
.long 0x00004039; /* li r10,0 */ \
.long 0x6401417d; /* mtmsrd r10,1 */ \
.long 0x05009f42; /* bcl 20,31,$+4 */ \
.long 0xa602487d; /* mflr r10 */ \
.long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c; /* rfid */ \
191:
/*
* This version that may only be used with MSR[HV]=1
* - Does not clear MSR[RI], so more robust.
* - Slightly smaller and faster.
*/
#define FIXUP_ENDIAN_HV \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
b 191f; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
.long 0x05009f42; /* bcl 20,31,$+4 */ \
.long 0xa602487d; /* mflr r10 */ \
.long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa64b5a7d; /* mthsrr0 r10 */ \
.long 0xa64b7b7d; /* mthsrr1 r11 */ \
.long 0x2402004c; /* hrfid */ \
191:
#endif /* !CONFIG_PPC_BOOK3E_64 */
#endif /* __ASSEMBLY__ */
#define SOFT_MASK_TABLE(_start, _end) \
stringify_in_c(.section __soft_mask_table,"a";)\
stringify_in_c(.balign 8;) \
stringify_in_c(.llong (_start);) \
stringify_in_c(.llong (_end);) \
stringify_in_c(.previous)
#define RESTART_TABLE(_start, _end, _target) \
stringify_in_c(.section __restart_table,"a";)\
stringify_in_c(.balign 8;) \
stringify_in_c(.llong (_start);) \
stringify_in_c(.llong (_end);) \
stringify_in_c(.llong (_target);) \
stringify_in_c(.previous)
#ifdef CONFIG_PPC_E500
#define BTB_FLUSH(reg) \
lis reg,BUCSR_INIT@h; \
ori reg,reg,BUCSR_INIT@l; \
mtspr SPRN_BUCSR,reg; \
isync;
#else
#define BTB_FLUSH(reg)
#endif /* CONFIG_PPC_E500 */
#if defined(CONFIG_PPC64_ELF_ABI_V1)
#define STACK_FRAME_PARAMS 48
#elif defined(CONFIG_PPC64_ELF_ABI_V2)
#define STACK_FRAME_PARAMS 32
#elif defined(CONFIG_PPC32)
#define STACK_FRAME_PARAMS 8
#endif
#endif /* _ASM_POWERPC_PPC_ASM_H */