1

Merge patch series "Rework & improve riscv cmpxchg.h and atomic.h"

Leonardo Bras <leobras@redhat.com> says:

While studying riscv's cmpxchg.h file, I got really interested in
understanding how RISCV asm implemented the different versions of
{cmp,}xchg.

When I understood the pattern, it made sense for me to remove the
duplications and create macros to make it easier to understand what exactly
changes between the versions: Instruction sufixes & barriers.

Also, did the same kind of work on atomic.c.

After that, I noted both cmpxchg and xchg only accept variables of
size 4 and 8, compared to x86 and arm64 which do 1,2,4,8.

Now that deduplication is done, it is quite direct to implement them
for variable sizes 1 and 2, so I did it. Then Guo Ren already presented
me some possible users :)

I did compare the generated asm on a test.c that contained usage for every
changed function, and could not detect any change on patches 1 + 2 + 3
compared with upstream.

Pathes 4 & 5 were compiled-tested, merged with guoren/qspinlock_v11 and
booted just fine with qemu -machine virt -append "qspinlock".

(tree: https://gitlab.com/LeoBras/linux/-/commits/guo_qspinlock_v11)

Latest tests happened based on this tree:
https://github.com/guoren83/linux/tree/qspinlock_v12

* b4-shazam-lts:
  riscv/cmpxchg: Implement xchg for variables of size 1 and 2
  riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2
  riscv/atomic.h : Deduplicate arch_atomic.*
  riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
  riscv/cmpxchg: Deduplicate xchg() asm functions

Link: https://lore.kernel.org/r/20240103163203.72768-2-leobras@redhat.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Palmer Dabbelt 2024-04-08 10:55:03 -07:00
commit 300ce44cbe
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
2 changed files with 200 additions and 368 deletions

View File

@ -195,22 +195,28 @@ ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_FETCH_OP #undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN #undef ATOMIC_OP_RETURN
#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" beq %[p], %[u], 1f\n" \
" add %[rc], %[p], %[a]\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: [a]"r" (_a), [u]"r" (_u) \
: "memory"); \
})
/* This is required to provide a full barrier on success. */ /* This is required to provide a full barrier on success. */
static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{ {
int prev, rc; int prev, rc;
__asm__ __volatile__ ( _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
"0: lr.w %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
return prev; return prev;
} }
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
@ -221,77 +227,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
s64 prev; s64 prev;
long rc; long rc;
__asm__ __volatile__ ( _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
"0: lr.d %[p], %[c]\n"
" beq %[p], %[u], 1f\n"
" add %[rc], %[p], %[a]\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
: [a]"r" (a), [u]"r" (u)
: "memory");
return prev; return prev;
} }
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif #endif
#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bltz %[p], 1f\n" \
" addi %[rc], %[p], 1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{ {
int prev, rc; int prev, rc;
__asm__ __volatile__ ( _arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
"0: lr.w %[p], %[c]\n"
" bltz %[p], 1f\n"
" addi %[rc], %[p], 1\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return !(prev < 0); return !(prev < 0);
} }
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bgtz %[p], 1f\n" \
" addi %[rc], %[p], -1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
{ {
int prev, rc; int prev, rc;
__asm__ __volatile__ ( _arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
"0: lr.w %[p], %[c]\n"
" bgtz %[p], 1f\n"
" addi %[rc], %[p], -1\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return !(prev > 0); return !(prev > 0);
} }
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" addi %[rc], %[p], -1\n" \
" bltz %[rc], 1f\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
{ {
int prev, rc; int prev, rc;
__asm__ __volatile__ ( _arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
"0: lr.w %[p], %[c]\n"
" addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.w.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return prev - 1; return prev - 1;
} }
@ -303,17 +318,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
s64 prev; s64 prev;
long rc; long rc;
__asm__ __volatile__ ( _arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
"0: lr.d %[p], %[c]\n"
" bltz %[p], 1f\n"
" addi %[rc], %[p], 1\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return !(prev < 0); return !(prev < 0);
} }
@ -324,17 +330,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
s64 prev; s64 prev;
long rc; long rc;
__asm__ __volatile__ ( _arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
"0: lr.d %[p], %[c]\n"
" bgtz %[p], 1f\n"
" addi %[rc], %[p], -1\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return !(prev > 0); return !(prev > 0);
} }
@ -345,17 +342,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
s64 prev; s64 prev;
long rc; long rc;
__asm__ __volatile__ ( _arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
"0: lr.d %[p], %[c]\n"
" addi %[rc], %[p], -1\n"
" bltz %[rc], 1f\n"
" sc.d.rl %[rc], %[rc], %[c]\n"
" bnez %[rc], 0b\n"
RISCV_FULL_BARRIER
"1:\n"
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
:
: "memory");
return prev - 1; return prev - 1;
} }

View File

@ -10,140 +10,79 @@
#include <asm/fence.h> #include <asm/fence.h>
#define __xchg_relaxed(ptr, new, size) \ #define __arch_xchg_masked(prepend, append, r, p, n) \
({ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
<< __s; \
ulong __newx = (ulong)(n) << __s; \
ulong __retx; \
ulong __rc; \
\
__asm__ __volatile__ ( \
prepend \
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
" sc.w %1, %1, %2\n" \
" bnez %1, 0b\n" \
append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" (__newx), "rJ" (~__mask) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
})
#define __arch_xchg(sfx, prepend, append, r, p, n) \
({ \
__asm__ __volatile__ ( \
prepend \
" amoswap" sfx " %0, %2, %1\n" \
append \
: "=r" (r), "+A" (*(p)) \
: "r" (n) \
: "memory"); \
})
#define _arch_xchg(ptr, new, sfx, prepend, append) \
({ \ ({ \
__typeof__(ptr) __ptr = (ptr); \ __typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \ __typeof__(*(__ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \ __typeof__(*(__ptr)) __ret; \
switch (size) { \ \
switch (sizeof(*__ptr)) { \
case 1: \
case 2: \
__arch_xchg_masked(prepend, append, \
__ret, __ptr, __new); \
break; \
case 4: \ case 4: \
__asm__ __volatile__ ( \ __arch_xchg(".w" sfx, prepend, append, \
" amoswap.w %0, %2, %1\n" \ __ret, __ptr, __new); \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \ break; \
case 8: \ case 8: \
__asm__ __volatile__ ( \ __arch_xchg(".d" sfx, prepend, append, \
" amoswap.d %0, %2, %1\n" \ __ret, __ptr, __new); \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \ break; \
default: \ default: \
BUILD_BUG(); \ BUILD_BUG(); \
} \ } \
__ret; \ (__typeof__(*(__ptr)))__ret; \
}) })
#define arch_xchg_relaxed(ptr, x) \ #define arch_xchg_relaxed(ptr, x) \
({ \ _arch_xchg(ptr, x, "", "", "")
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_acquire(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d %0, %2, %1\n" \
RISCV_ACQUIRE_BARRIER \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_xchg_acquire(ptr, x) \ #define arch_xchg_acquire(ptr, x) \
({ \ _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_acquire((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __xchg_release(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.w %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
" amoswap.d %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_xchg_release(ptr, x) \ #define arch_xchg_release(ptr, x) \
({ \ _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_release((ptr), \
_x_, sizeof(*(ptr))); \
})
#define __arch_xchg(ptr, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
" amoswap.w.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
" amoswap.d.aqrl %0, %2, %1\n" \
: "=r" (__ret), "+A" (*__ptr) \
: "r" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_xchg(ptr, x) \ #define arch_xchg(ptr, x) \
({ \ _arch_xchg(ptr, x, ".aqrl", "", "")
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __arch_xchg((ptr), _x_, sizeof(*(ptr))); \
})
#define xchg32(ptr, x) \ #define xchg32(ptr, x) \
({ \ ({ \
@ -162,190 +101,95 @@
* store NEW in MEM. Return the initial value in MEM. Success is * store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD. * indicated by comparing RETURN with OLD.
*/ */
#define __cmpxchg_relaxed(ptr, old, new, size) \
#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
({ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
<< __s; \
ulong __newx = (ulong)(n) << __s; \
ulong __oldx = (ulong)(o) << __s; \
ulong __retx; \
ulong __rc; \
\
__asm__ __volatile__ ( \
prepend \
"0: lr.w %0, %2\n" \
" and %1, %0, %z5\n" \
" bne %1, %z3, 1f\n" \
" and %1, %0, %z6\n" \
" or %1, %1, %z4\n" \
" sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
append \
"1:\n" \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" ((long)__oldx), "rJ" (__newx), \
"rJ" (__mask), "rJ" (~__mask) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
})
#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
({ \
register unsigned int __rc; \
\
__asm__ __volatile__ ( \
prepend \
"0: lr" lr_sfx " %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc" sc_sfx " %1, %z4, %2\n" \
" bnez %1, 0b\n" \
append \
"1:\n" \
: "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
: "rJ" (co o), "rJ" (n) \
: "memory"); \
})
#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
({ \ ({ \
__typeof__(ptr) __ptr = (ptr); \ __typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \ __typeof__(*(__ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \ __typeof__(*(__ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \ __typeof__(*(__ptr)) __ret; \
register unsigned int __rc; \ \
switch (size) { \ switch (sizeof(*__ptr)) { \
case 1: \
case 2: \
__arch_cmpxchg_masked(sc_sfx, prepend, append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \ case 4: \
__asm__ __volatile__ ( \ __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
"0: lr.w %0, %2\n" \ __ret, __ptr, (long), __old, __new); \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \ break; \
case 8: \ case 8: \
__asm__ __volatile__ ( \ __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \
"0: lr.d %0, %2\n" \ __ret, __ptr, /**/, __old, __new); \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \ break; \
default: \ default: \
BUILD_BUG(); \ BUILD_BUG(); \
} \ } \
__ret; \ (__typeof__(*(__ptr)))__ret; \
}) })
#define arch_cmpxchg_relaxed(ptr, o, n) \ #define arch_cmpxchg_relaxed(ptr, o, n) \
({ \ _arch_cmpxchg((ptr), (o), (n), "", "", "")
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_acquire(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_ACQUIRE_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_ACQUIRE_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_cmpxchg_acquire(ptr, o, n) \ #define arch_cmpxchg_acquire(ptr, o, n) \
({ \ _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER)
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg_release(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
RISCV_RELEASE_BARRIER \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d %1, %z4, %2\n" \
" bnez %1, 0b\n" \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_cmpxchg_release(ptr, o, n) \ #define arch_cmpxchg_release(ptr, o, n) \
({ \ _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "")
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg_release((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define __cmpxchg(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__typeof__(*(ptr)) __ret; \
register unsigned int __rc; \
switch (size) { \
case 4: \
__asm__ __volatile__ ( \
"0: lr.w %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.w.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_FULL_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" ((long)__old), "rJ" (__new) \
: "memory"); \
break; \
case 8: \
__asm__ __volatile__ ( \
"0: lr.d %0, %2\n" \
" bne %0, %z3, 1f\n" \
" sc.d.rl %1, %z4, %2\n" \
" bnez %1, 0b\n" \
RISCV_FULL_BARRIER \
"1:\n" \
: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
: "rJ" (__old), "rJ" (__new) \
: "memory"); \
break; \
default: \
BUILD_BUG(); \
} \
__ret; \
})
#define arch_cmpxchg(ptr, o, n) \ #define arch_cmpxchg(ptr, o, n) \
({ \ _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
(__typeof__(*(ptr))) __cmpxchg((ptr), \
_o_, _n_, sizeof(*(ptr))); \
})
#define arch_cmpxchg_local(ptr, o, n) \ #define arch_cmpxchg_local(ptr, o, n) \
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) arch_cmpxchg_relaxed((ptr), (o), (n))
#define arch_cmpxchg64(ptr, o, n) \ #define arch_cmpxchg64(ptr, o, n) \
({ \ ({ \