1
linux/arch/riscv/include/asm/atomic.h

354 lines
10 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#ifndef _ASM_RISCV_ATOMIC_H
#define _ASM_RISCV_ATOMIC_H
#ifdef CONFIG_GENERIC_ATOMIC64
# include <asm-generic/atomic64.h>
#else
# if (__riscv_xlen < 64)
# error "64-bit atomics require XLEN to be at least 64"
# endif
#endif
#include <asm/cmpxchg.h>
locking/atomics: Rework ordering barriers Currently architectures can override __atomic_op_*() to define the barriers used before/after a relaxed atomic when used to build acquire/release/fence variants. This has the unfortunate property of requiring the architecture to define the full wrapper for the atomics, rather than just the barriers they care about, and gets in the way of generating atomics which can be easily read. Instead, this patch has architectures define an optional set of barriers: * __atomic_acquire_fence() * __atomic_release_fence() * __atomic_pre_full_fence() * __atomic_post_full_fence() ... which <linux/atomic.h> uses to build the wrappers. It would be nice if we could undef these, along with the __atomic_op_*() wrappers, but that would break the cmpxchg() wrappers, which are written in preprocessor. Undefs would have been nice, but alas. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Will Deacon <will.deacon@arm.com> Cc: Andrea Parri <parri.andrea@gmail.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: andy.shevchenko@gmail.com Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: dvyukov@google.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-7-mark.rutland@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2018-07-16 04:30:11 -07:00
#define __atomic_acquire_fence() \
__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
#define __atomic_release_fence() \
__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
static __always_inline int arch_atomic_read(const atomic_t *v)
{
return READ_ONCE(v->counter);
}
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
WRITE_ONCE(v->counter, i);
}
#ifndef CONFIG_GENERIC_ATOMIC64
#define ATOMIC64_INIT(i) { (i) }
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE(v->counter);
}
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
#endif
/*
* First, the atomic ops that have no ordering constraints and therefor don't
* have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about.
*/
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
{ \
__asm__ __volatile__ ( \
" amo" #asm_op "." #asm_type " zero, %1, %0" \
: "+A" (v->counter) \
: "r" (I) \
: "memory"); \
} \
#ifdef CONFIG_GENERIC_ATOMIC64
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, I, w, int, )
#else
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_OP (op, asm_op, I, w, int, ) \
ATOMIC_OP (op, asm_op, I, d, s64, 64)
#endif
ATOMIC_OPS(add, add, i)
ATOMIC_OPS(sub, add, -i)
ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OP
#undef ATOMIC_OPS
/*
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
* Atomic ops that have ordered, relaxed, acquire, and release variants.
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i, \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
atomic##prefix##_t *v) \
{ \
register c_type ret; \
__asm__ __volatile__ ( \
" amo" #asm_op "." #asm_type " %1, %2, %0" \
: "+A" (v->counter), "=r" (ret) \
: "r" (I) \
: "memory"); \
return ret; \
} \
static __always_inline \
c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
{ \
register c_type ret; \
__asm__ __volatile__ ( \
" amo" #asm_op "." #asm_type ".aqrl %1, %2, %0" \
: "+A" (v->counter), "=r" (ret) \
: "r" (I) \
: "memory"); \
return ret; \
}
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \
static __always_inline \
c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i, \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
atomic##prefix##_t *v) \
{ \
return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
} \
static __always_inline \
c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
{ \
return arch_atomic##prefix##_fetch_##op(i, v) c_op I; \
}
#ifdef CONFIG_GENERIC_ATOMIC64
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
#else
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, c_op, I) \
ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64)
#endif
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
ATOMIC_OPS(add, add, +, i)
ATOMIC_OPS(sub, add, +, -i)
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
#define arch_atomic_add_return arch_atomic_add_return
#define arch_atomic_sub_return arch_atomic_sub_return
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#define arch_atomic_fetch_add arch_atomic_fetch_add
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
#define arch_atomic64_add_return arch_atomic64_add_return
#define arch_atomic64_sub_return arch_atomic64_sub_return
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#endif
#undef ATOMIC_OPS
#ifdef CONFIG_GENERIC_ATOMIC64
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
#else
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#define ATOMIC_OPS(op, asm_op, I) \
ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64)
#endif
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
ATOMIC_OPS(and, and, i)
ATOMIC_OPS( or, or, i)
ATOMIC_OPS(xor, xor, i)
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_or arch_atomic_fetch_or
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#ifndef CONFIG_GENERIC_ATOMIC64
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
#endif
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" beq %[p], %[u], 1f\n" \
" add %[rc], %[p], %[a]\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: [a]"r" (_a), [u]"r" (_u) \
: "memory"); \
})
riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig <dlustig@nvidia.com> Signed-off-by: Andrea Parri <parri.andrea@gmail.com> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Albert Ou <albert@sifive.com> Cc: Daniel Lustig <dlustig@nvidia.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Will Deacon <will.deacon@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: David Howells <dhowells@redhat.com> Cc: Jade Alglave <j.alglave@ucl.ac.uk> Cc: Luc Maranget <luc.maranget@inria.fr> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Akira Yokosawa <akiyks@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
2018-03-09 05:13:40 -07:00
/* This is required to provide a full barrier on success. */
static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int prev, rc;
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
return prev;
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
#ifndef CONFIG_GENERIC_ATOMIC64
static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 prev;
long rc;
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
return prev;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bltz %[p], 1f\n" \
" addi %[rc], %[p], 1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{
int prev, rc;
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
return !(prev < 0);
}
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" bgtz %[p], 1f\n" \
" addi %[rc], %[p], -1\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
{
int prev, rc;
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
return !(prev > 0);
}
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
({ \
__asm__ __volatile__ ( \
"0: lr." sfx " %[p], %[c]\n" \
" addi %[rc], %[p], -1\n" \
" bltz %[rc], 1f\n" \
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
" bnez %[rc], 0b\n" \
" fence rw, rw\n" \
"1:\n" \
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
: \
: "memory"); \
})
static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
{
int prev, rc;
_arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
return prev - 1;
}
#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
#ifndef CONFIG_GENERIC_ATOMIC64
static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
{
s64 prev;
long rc;
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
return !(prev < 0);
}
#define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
{
s64 prev;
long rc;
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
return !(prev > 0);
}
#define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 prev;
long rc;
_arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
return prev - 1;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
#endif
#endif /* _ASM_RISCV_ATOMIC_H */