linux/arch/arm/common/vlock.S

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * vlock.S - simple voting lock implementation for ARM
 *
 * Created by:	Dave Martin, 2012-08-16
 * Copyright:	(C) 2012-2013  Linaro Limited
 *
 * This algorithm is described in more detail in
 * Documentation/arch/arm/vlocks.rst.
 */

#include <linux/linkage.h>
#include "vlock.h"

.arch armv7-a

/* Select different code if voting flags  can fit in a single word. */
#if VLOCK_VOTING_SIZE > 4
#define FEW(x...)
#define MANY(x...) x
#else
#define FEW(x...) x
#define MANY(x...)
#endif

@ voting lock for first-man coordination

.macro voting_begin rbase:req, rcpu:req, rscratch:req
	mov	\rscratch, #1
	strb	\rscratch, [\rbase, \rcpu]
	dmb
.endm

.macro voting_end rbase:req, rcpu:req, rscratch:req
	dmb
	mov	\rscratch, #0
	strb	\rscratch, [\rbase, \rcpu]
	dsb	st
	sev
.endm

/*
 * The vlock structure must reside in Strongly-Ordered or Device memory.
 * This implementation deliberately eliminates most of the barriers which
 * would be required for other memory types, and assumes that independent
 * writes to neighbouring locations within a cacheline do not interfere
 * with one another.
 */

@ r0: lock structure base
@ r1: CPU ID (0-based index within cluster)
ENTRY(vlock_trylock)
	add	r1, r1, #VLOCK_VOTING_OFFSET

	voting_begin	r0, r1, r2

	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]	@ check whether lock is held
	cmp	r2, #VLOCK_OWNER_NONE
	bne	trylock_fail			@ fail if so

	@ Control dependency implies strb not observable before previous ldrb.

	strb	r1, [r0, #VLOCK_OWNER_OFFSET]	@ submit my vote

	voting_end	r0, r1, r2		@ implies DMB

	@ Wait for the current round of voting to finish:

 MANY(	mov	r3, #VLOCK_VOTING_OFFSET			)
0:
 MANY(	ldr	r2, [r0, r3]					)
 FEW(	ldr	r2, [r0, #VLOCK_VOTING_OFFSET]			)
	cmp	r2, #0
	wfene
	bne	0b
 MANY(	add	r3, r3, #4					)
 MANY(	cmp	r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE	)
 MANY(	bne	0b						)

	@ Check who won:

	dmb
	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]
	eor	r0, r1, r2			@ zero if I won, else nonzero
	bx	lr

trylock_fail:
	voting_end	r0, r1, r2
	mov	r0, #1				@ nonzero indicates that I lost
	bx	lr
ENDPROC(vlock_trylock)

@ r0: lock structure base
ENTRY(vlock_unlock)
	dmb
	mov	r1, #VLOCK_OWNER_NONE
	strb	r1, [r0, #VLOCK_OWNER_OFFSET]
	dsb	st
	sev
	bx	lr
ENDPROC(vlock_unlock)
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 174 Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license version 2 as published by the free software foundation this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference in 655 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Allison Randal <allison@lohutok.net> Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Richard Fontana <rfontana@redhat.com> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190527070034.575739538@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2019-05-26 23:55:21 -07:00			`/* SPDX-License-Identifier: GPL-2.0-only */`
ARM: mcpm: Add baremetal voting mutexes This patch adds a simple low-level voting mutex implementation to be used to arbitrate during first man selection when no load/store exclusive instructions are usable. For want of a better name, these are called "vlocks". (I was tempted to call them ballot locks, but "block" is way too confusing an abbreviation...) There is no function to wait for the lock to be released, and no vlock_lock() function since we don't need these at the moment. These could straightforwardly be added if vlocks get used for other purposes. For architectural correctness even Strongly-Ordered memory accesses require barriers in order to guarantee that multiple CPUs have a coherent view of the ordering of memory accesses. Whether or not this matters depends on hardware implementation details of the memory system. Since the purpose of this code is to provide a clean, generic locking mechanism with no platform-specific dependencies the barriers should be present to avoid unpleasant surprises on future platforms. Note: * When taking the lock, we don't care about implicit background memory operations and other signalling which may be pending, because those are not part of the critical section anyway. A DMB is sufficient to ensure correctly observed ordering if the explicit memory accesses in vlock_trylock. * No barrier is required after checking the election result, because the result is determined by the store to VLOCK_OWNER_OFFSET and is already globally observed due to the barriers in voting_end. This means that global agreement on the winner is guaranteed, even before the winner is known locally. Signed-off-by: Dave Martin <dave.martin@linaro.org> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Will Deacon <will.deacon@arm.com> 2012-08-17 08:07:01 -07:00			`/*`
			`* vlock.S - simple voting lock implementation for ARM`
			`*`
			`* Created by: Dave Martin, 2012-08-16`
			`* Copyright: (C) 2012-2013 Linaro Limited`
			`*`
			`* This algorithm is described in more detail in`
arm: update in-source documentation references The Arm documentation has moved to Documentation/arch/arm; update references within arch/arm to match. Cc: Russell King <linux@armlinux.org.uk> Cc: Alim Akhtar <alim.akhtar@samsung.com> Cc: Patrice Chotard <patrice.chotard@foss.st.com> Cc: linux-doc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Signed-off-by: Jonathan Corbet <corbet@lwn.net> 2023-05-03 15:50:54 -07:00			`* Documentation/arch/arm/vlocks.rst.`
ARM: mcpm: Add baremetal voting mutexes This patch adds a simple low-level voting mutex implementation to be used to arbitrate during first man selection when no load/store exclusive instructions are usable. For want of a better name, these are called "vlocks". (I was tempted to call them ballot locks, but "block" is way too confusing an abbreviation...) There is no function to wait for the lock to be released, and no vlock_lock() function since we don't need these at the moment. These could straightforwardly be added if vlocks get used for other purposes. For architectural correctness even Strongly-Ordered memory accesses require barriers in order to guarantee that multiple CPUs have a coherent view of the ordering of memory accesses. Whether or not this matters depends on hardware implementation details of the memory system. Since the purpose of this code is to provide a clean, generic locking mechanism with no platform-specific dependencies the barriers should be present to avoid unpleasant surprises on future platforms. Note: * When taking the lock, we don't care about implicit background memory operations and other signalling which may be pending, because those are not part of the critical section anyway. A DMB is sufficient to ensure correctly observed ordering if the explicit memory accesses in vlock_trylock. * No barrier is required after checking the election result, because the result is determined by the store to VLOCK_OWNER_OFFSET and is already globally observed due to the barriers in voting_end. This means that global agreement on the winner is guaranteed, even before the winner is known locally. Signed-off-by: Dave Martin <dave.martin@linaro.org> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Will Deacon <will.deacon@arm.com> 2012-08-17 08:07:01 -07:00			`*/`

			`#include <linux/linkage.h>`
			`#include "vlock.h"`

ARM: 9263/1: use .arch directives instead of assembler command line flags Similar to commit a6c30873ee4a ("ARM: 8989/1: use .fpu assembler directives instead of assembler arguments"). GCC and GNU binutils support setting the "sub arch" via -march=, -Wa,-march, target function attribute, and .arch assembler directive. Clang was missing support for -Wa,-march=, but this was implemented in clang-13. The behavior of both GCC and Clang is to prefer -Wa,-march= over -march= for assembler and assembler-with-cpp sources, but Clang will warn about the -march= being unused. clang: warning: argument unused during compilation: '-march=armv6k' [-Wunused-command-line-argument] Since most assembler is non-conditionally assembled with one sub arch (modulo arch/arm/delay-loop.S which conditionally is assembled as armv4 based on CONFIG_ARCH_RPC, and arch/arm/mach-at91/pm-suspend.S which is conditionally assembled as armv7-a based on CONFIG_CPU_V7), prefer the .arch assembler directive. Add a few more instances found in compile testing as found by Arnd and Nathan. Link: https://github.com/llvm/llvm-project/commit/1d51c699b9e2ebc5bcfdbe85c74cc871426333d4 Link: https://bugs.llvm.org/show_bug.cgi?id=48894 Link: https://github.com/ClangBuiltLinux/linux/issues/1195 Link: https://github.com/ClangBuiltLinux/linux/issues/1315 Suggested-by: Arnd Bergmann <arnd@arndb.de> Suggested-by: Nathan Chancellor <nathan@kernel.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Tested-by: Nathan Chancellor <nathan@kernel.org> Signed-off-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> 2022-10-24 12:44:41 -07:00			`.arch armv7-a`

ARM: mcpm: Add baremetal voting mutexes This patch adds a simple low-level voting mutex implementation to be used to arbitrate during first man selection when no load/store exclusive instructions are usable. For want of a better name, these are called "vlocks". (I was tempted to call them ballot locks, but "block" is way too confusing an abbreviation...) There is no function to wait for the lock to be released, and no vlock_lock() function since we don't need these at the moment. These could straightforwardly be added if vlocks get used for other purposes. For architectural correctness even Strongly-Ordered memory accesses require barriers in order to guarantee that multiple CPUs have a coherent view of the ordering of memory accesses. Whether or not this matters depends on hardware implementation details of the memory system. Since the purpose of this code is to provide a clean, generic locking mechanism with no platform-specific dependencies the barriers should be present to avoid unpleasant surprises on future platforms. Note: * When taking the lock, we don't care about implicit background memory operations and other signalling which may be pending, because those are not part of the critical section anyway. A DMB is sufficient to ensure correctly observed ordering if the explicit memory accesses in vlock_trylock. * No barrier is required after checking the election result, because the result is determined by the store to VLOCK_OWNER_OFFSET and is already globally observed due to the barriers in voting_end. This means that global agreement on the winner is guaranteed, even before the winner is known locally. Signed-off-by: Dave Martin <dave.martin@linaro.org> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Will Deacon <will.deacon@arm.com> 2012-08-17 08:07:01 -07:00			`/* Select different code if voting flags can fit in a single word. */`
			`#if VLOCK_VOTING_SIZE > 4`
			`#define FEW(x...)`
			`#define MANY(x...) x`
			`#else`
			`#define FEW(x...) x`
			`#define MANY(x...)`
			`#endif`

			`@ voting lock for first-man coordination`

			`.macro voting_begin rbase:req, rcpu:req, rscratch:req`
			`mov \rscratch, #1`
			`strb \rscratch, [\rbase, \rcpu]`
			`dmb`
			`.endm`

			`.macro voting_end rbase:req, rcpu:req, rscratch:req`
			`dmb`
			`mov \rscratch, #0`
			`strb \rscratch, [\rbase, \rcpu]`
ARM: mcpm: use -st dsb option prior to sev instructions In a similar manner to our spinlock implementation, mcpm uses sev to wake up cores waiting on a lock when the lock is unlocked. In order to ensure that the final write unlocking the lock is visible, a dsb instruction is executed immediately prior to the sev. This patch changes these dsbs to use the -st option, since we only require that the store unlocking the lock is made visible. Acked-by: Nicolas Pitre <nico@linaro.org> Reviewed-by: Dave Martin <dave.martin@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> 2013-05-14 02:08:07 -07:00			`dsb st`
ARM: mcpm: Add baremetal voting mutexes This patch adds a simple low-level voting mutex implementation to be used to arbitrate during first man selection when no load/store exclusive instructions are usable. For want of a better name, these are called "vlocks". (I was tempted to call them ballot locks, but "block" is way too confusing an abbreviation...) There is no function to wait for the lock to be released, and no vlock_lock() function since we don't need these at the moment. These could straightforwardly be added if vlocks get used for other purposes. For architectural correctness even Strongly-Ordered memory accesses require barriers in order to guarantee that multiple CPUs have a coherent view of the ordering of memory accesses. Whether or not this matters depends on hardware implementation details of the memory system. Since the purpose of this code is to provide a clean, generic locking mechanism with no platform-specific dependencies the barriers should be present to avoid unpleasant surprises on future platforms. Note: * When taking the lock, we don't care about implicit background memory operations and other signalling which may be pending, because those are not part of the critical section anyway. A DMB is sufficient to ensure correctly observed ordering if the explicit memory accesses in vlock_trylock. * No barrier is required after checking the election result, because the result is determined by the store to VLOCK_OWNER_OFFSET and is already globally observed due to the barriers in voting_end. This means that global agreement on the winner is guaranteed, even before the winner is known locally. Signed-off-by: Dave Martin <dave.martin@linaro.org> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Will Deacon <will.deacon@arm.com> 2012-08-17 08:07:01 -07:00			`sev`
			`.endm`

			`/*`
			`* The vlock structure must reside in Strongly-Ordered or Device memory.`
			`* This implementation deliberately eliminates most of the barriers which`
			`* would be required for other memory types, and assumes that independent`
			`* writes to neighbouring locations within a cacheline do not interfere`
			`* with one another.`
			`*/`

			`@ r0: lock structure base`
			`@ r1: CPU ID (0-based index within cluster)`
			`ENTRY(vlock_trylock)`
			`add r1, r1, #VLOCK_VOTING_OFFSET`

			`voting_begin r0, r1, r2`

			`ldrb r2, [r0, #VLOCK_OWNER_OFFSET] @ check whether lock is held`
			`cmp r2, #VLOCK_OWNER_NONE`
			`bne trylock_fail @ fail if so`

			`@ Control dependency implies strb not observable before previous ldrb.`

			`strb r1, [r0, #VLOCK_OWNER_OFFSET] @ submit my vote`

			`voting_end r0, r1, r2 @ implies DMB`

			`@ Wait for the current round of voting to finish:`

			`MANY( mov r3, #VLOCK_VOTING_OFFSET )`
			`0:`
			`MANY( ldr r2, [r0, r3] )`
			`FEW( ldr r2, [r0, #VLOCK_VOTING_OFFSET] )`
			`cmp r2, #0`
			`wfene`
			`bne 0b`
			`MANY( add r3, r3, #4 )`
			`MANY( cmp r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE )`
			`MANY( bne 0b )`

			`@ Check who won:`

			`dmb`
			`ldrb r2, [r0, #VLOCK_OWNER_OFFSET]`
			`eor r0, r1, r2 @ zero if I won, else nonzero`
			`bx lr`

			`trylock_fail:`
			`voting_end r0, r1, r2`
			`mov r0, #1 @ nonzero indicates that I lost`
			`bx lr`
			`ENDPROC(vlock_trylock)`

			`@ r0: lock structure base`
			`ENTRY(vlock_unlock)`
			`dmb`
			`mov r1, #VLOCK_OWNER_NONE`
			`strb r1, [r0, #VLOCK_OWNER_OFFSET]`
ARM: mcpm: use -st dsb option prior to sev instructions In a similar manner to our spinlock implementation, mcpm uses sev to wake up cores waiting on a lock when the lock is unlocked. In order to ensure that the final write unlocking the lock is visible, a dsb instruction is executed immediately prior to the sev. This patch changes these dsbs to use the -st option, since we only require that the store unlocking the lock is made visible. Acked-by: Nicolas Pitre <nico@linaro.org> Reviewed-by: Dave Martin <dave.martin@arm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> 2013-05-14 02:08:07 -07:00			`dsb st`
ARM: mcpm: Add baremetal voting mutexes This patch adds a simple low-level voting mutex implementation to be used to arbitrate during first man selection when no load/store exclusive instructions are usable. For want of a better name, these are called "vlocks". (I was tempted to call them ballot locks, but "block" is way too confusing an abbreviation...) There is no function to wait for the lock to be released, and no vlock_lock() function since we don't need these at the moment. These could straightforwardly be added if vlocks get used for other purposes. For architectural correctness even Strongly-Ordered memory accesses require barriers in order to guarantee that multiple CPUs have a coherent view of the ordering of memory accesses. Whether or not this matters depends on hardware implementation details of the memory system. Since the purpose of this code is to provide a clean, generic locking mechanism with no platform-specific dependencies the barriers should be present to avoid unpleasant surprises on future platforms. Note: * When taking the lock, we don't care about implicit background memory operations and other signalling which may be pending, because those are not part of the critical section anyway. A DMB is sufficient to ensure correctly observed ordering if the explicit memory accesses in vlock_trylock. * No barrier is required after checking the election result, because the result is determined by the store to VLOCK_OWNER_OFFSET and is already globally observed due to the barriers in voting_end. This means that global agreement on the winner is guaranteed, even before the winner is known locally. Signed-off-by: Dave Martin <dave.martin@linaro.org> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org> Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Reviewed-by: Will Deacon <will.deacon@arm.com> 2012-08-17 08:07:01 -07:00			`sev`
			`bx lr`
			`ENDPROC(vlock_unlock)`