Merge local branch 'x86-codegen'
Merge trivial x86 code generation annoyances - Introduce helper macros for clang asm input problems - use said macros to improve trivially stupid code generation issues in bitops and array_index_mask_nospec - also improve codegen with 32-bit array index comparisons None of these really matter, but I look at code generation and profiles fairly regularly, and these misfeatures caused the generated code to look really odd and distract from the real issues. * branch 'x86-codegen' of local tree: x86: improve bitop code generation with clang x86: improve array_index_mask_nospec() code generation clang: work around asm input constraint problems
This commit is contained in:
commit
f8a6e48c6c
@ -33,20 +33,16 @@
|
||||
* Returns:
|
||||
* 0 - (index < size)
|
||||
*/
|
||||
static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
|
||||
unsigned long size)
|
||||
{
|
||||
unsigned long mask;
|
||||
|
||||
asm volatile ("cmp %1,%2; sbb %0,%0;"
|
||||
:"=r" (mask)
|
||||
:"g"(size),"r" (index)
|
||||
:"cc");
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Override the default implementation from linux/nospec.h. */
|
||||
#define array_index_mask_nospec array_index_mask_nospec
|
||||
#define array_index_mask_nospec(idx,sz) ({ \
|
||||
typeof((idx)+(sz)) __idx = (idx); \
|
||||
typeof(__idx) __sz = (sz); \
|
||||
unsigned long __mask; \
|
||||
asm volatile ("cmp %1,%2; sbb %0,%0" \
|
||||
:"=r" (__mask) \
|
||||
:ASM_INPUT_G (__sz), \
|
||||
"r" (__idx) \
|
||||
:"cc"); \
|
||||
__mask; })
|
||||
|
||||
/* Prevent speculative execution past this barrier. */
|
||||
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
|
||||
|
@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
|
||||
{
|
||||
asm("rep; bsf %1,%0"
|
||||
: "=r" (word)
|
||||
: "rm" (word));
|
||||
: ASM_INPUT_RM (word));
|
||||
return word;
|
||||
}
|
||||
|
||||
@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
|
||||
|
||||
asm("bsr %1,%0"
|
||||
: "=r" (word)
|
||||
: "rm" (word));
|
||||
: ASM_INPUT_RM (word));
|
||||
return word;
|
||||
}
|
||||
|
||||
@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
|
||||
*/
|
||||
asm("bsfl %1,%0"
|
||||
: "=r" (r)
|
||||
: "rm" (x), "0" (-1));
|
||||
: ASM_INPUT_RM (x), "0" (-1));
|
||||
#elif defined(CONFIG_X86_CMOV)
|
||||
asm("bsfl %1,%0\n\t"
|
||||
"cmovzl %2,%0"
|
||||
@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
|
||||
*/
|
||||
asm("bsrl %1,%0"
|
||||
: "=r" (r)
|
||||
: "rm" (x), "0" (-1));
|
||||
: ASM_INPUT_RM (x), "0" (-1));
|
||||
#elif defined(CONFIG_X86_CMOV)
|
||||
asm("bsrl %1,%0\n\t"
|
||||
"cmovzl %2,%0"
|
||||
@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
|
||||
*/
|
||||
asm("bsrq %1,%q0"
|
||||
: "+r" (bitpos)
|
||||
: "rm" (x));
|
||||
: ASM_INPUT_RM (x));
|
||||
return bitpos + 1;
|
||||
}
|
||||
#else
|
||||
|
@ -118,3 +118,13 @@
|
||||
|
||||
#define __diag_ignore_all(option, comment) \
|
||||
__diag_clang(13, ignore, option)
|
||||
|
||||
/*
|
||||
* clang has horrible behavior with "g" or "rm" constraints for asm
|
||||
* inputs, turning them into something worse than "m". Avoid using
|
||||
* constraints with multiple possible uses (but "ir" seems to be ok):
|
||||
*
|
||||
* https://github.com/llvm/llvm-project/issues/20571
|
||||
*/
|
||||
#define ASM_INPUT_G "ir"
|
||||
#define ASM_INPUT_RM "r"
|
||||
|
@ -409,6 +409,15 @@ struct ftrace_likely_data {
|
||||
#define asm_goto_output(x...) asm volatile goto(x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Clang has trouble with constraints with multiple
|
||||
* alternative behaviors (mainly "g" and "rm").
|
||||
*/
|
||||
#ifndef ASM_INPUT_G
|
||||
#define ASM_INPUT_G "g"
|
||||
#define ASM_INPUT_RM "rm"
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_HAS_ASM_INLINE
|
||||
#define asm_inline asm __inline
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user