1
linux/include/asm-ia64/bitops.h
David Mosberger-Tang 821376bf15 [IA64] fix fls()
The ia64-version of fls() never worked as intended (the bitnumbering
was off by 1 and fls(0) was undefined).  This patch fixes the problem
by using a popcnt-based fls(), which on McKinley-derived cores is
slightly faster than both ia64_fls() and generic_fls().  The resulting
code, however, is bigger (7-8 bundles instead of about 3 bundles).
Also switch ia64_popcnt() to __builtin_popcountl() for GCC v3.4 or
newer since the compiler can predicate that and schedule it better.

Thanks to Simon Derr and Matt Mackall for tracking down this bug.

Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2005-04-21 11:07:59 -07:00

424 lines
10 KiB
C

#ifndef _ASM_IA64_BITOPS_H
#define _ASM_IA64_BITOPS_H
/*
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 O(1)
* scheduler patch
*/
#include <linux/compiler.h>
#include <linux/types.h>
#include <asm/bitops.h>
#include <asm/intrinsics.h>
/**
* set_bit - Atomically set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* This function is atomic and may not be reordered. See __set_bit()
* if you do not require the atomic guarantees.
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*
* The address must be (at least) "long" aligned.
* Note that there are driver (e.g., eepro100) which use these operations to operate on
* hw-defined data-structures, so we can't easily change these operations to force a
* bigger alignment.
*
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
static __inline__ void
set_bit (int nr, volatile void *addr)
{
__u32 bit, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
bit = 1 << (nr & 31);
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old | bit;
} while (cmpxchg_acq(m, old, new) != old);
}
/**
* __set_bit - Set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* Unlike set_bit(), this function is non-atomic and may be reordered.
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static __inline__ void
__set_bit (int nr, volatile void *addr)
{
*((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31));
}
/*
* clear_bit() has "acquire" semantics.
*/
#define smp_mb__before_clear_bit() smp_mb()
#define smp_mb__after_clear_bit() do { /* skip */; } while (0)
/**
* clear_bit - Clears a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* clear_bit() is atomic and may not be reordered. However, it does
* not contain a memory barrier, so if it is used for locking purposes,
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
* in order to ensure changes are visible on other processors.
*/
static __inline__ void
clear_bit (int nr, volatile void *addr)
{
__u32 mask, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
mask = ~(1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old & mask;
} while (cmpxchg_acq(m, old, new) != old);
}
/**
* __clear_bit - Clears a bit in memory (non-atomic version)
*/
static __inline__ void
__clear_bit (int nr, volatile void *addr)
{
volatile __u32 *p = (__u32 *) addr + (nr >> 5);
__u32 m = 1 << (nr & 31);
*p &= ~m;
}
/**
* change_bit - Toggle a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* change_bit() is atomic and may not be reordered.
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static __inline__ void
change_bit (int nr, volatile void *addr)
{
__u32 bit, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
bit = (1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old ^ bit;
} while (cmpxchg_acq(m, old, new) != old);
}
/**
* __change_bit - Toggle a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* Unlike change_bit(), this function is non-atomic and may be reordered.
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static __inline__ void
__change_bit (int nr, volatile void *addr)
{
*((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31));
}
/**
* test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __inline__ int
test_and_set_bit (int nr, volatile void *addr)
{
__u32 bit, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
bit = 1 << (nr & 31);
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old | bit;
} while (cmpxchg_acq(m, old, new) != old);
return (old & bit) != 0;
}
/**
* __test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static __inline__ int
__test_and_set_bit (int nr, volatile void *addr)
{
__u32 *p = (__u32 *) addr + (nr >> 5);
__u32 m = 1 << (nr & 31);
int oldbitset = (*p & m) != 0;
*p |= m;
return oldbitset;
}
/**
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __inline__ int
test_and_clear_bit (int nr, volatile void *addr)
{
__u32 mask, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
mask = ~(1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old & mask;
} while (cmpxchg_acq(m, old, new) != old);
return (old & ~mask) != 0;
}
/**
* __test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static __inline__ int
__test_and_clear_bit(int nr, volatile void * addr)
{
__u32 *p = (__u32 *) addr + (nr >> 5);
__u32 m = 1 << (nr & 31);
int oldbitset = *p & m;
*p &= ~m;
return oldbitset;
}
/**
* test_and_change_bit - Change a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __inline__ int
test_and_change_bit (int nr, volatile void *addr)
{
__u32 bit, old, new;
volatile __u32 *m;
CMPXCHG_BUGCHECK_DECL
m = (volatile __u32 *) addr + (nr >> 5);
bit = (1 << (nr & 31));
do {
CMPXCHG_BUGCHECK(m);
old = *m;
new = old ^ bit;
} while (cmpxchg_acq(m, old, new) != old);
return (old & bit) != 0;
}
/*
* WARNING: non atomic version.
*/
static __inline__ int
__test_and_change_bit (int nr, void *addr)
{
__u32 old, bit = (1 << (nr & 31));
__u32 *m = (__u32 *) addr + (nr >> 5);
old = *m;
*m = old ^ bit;
return (old & bit) != 0;
}
static __inline__ int
test_bit (int nr, const volatile void *addr)
{
return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
}
/**
* ffz - find the first zero bit in a long word
* @x: The long word to find the bit in
*
* Returns the bit-number (0..63) of the first (least significant) zero bit. Undefined if
* no zero exists, so code should check against ~0UL first...
*/
static inline unsigned long
ffz (unsigned long x)
{
unsigned long result;
result = ia64_popcnt(x & (~x - 1));
return result;
}
/**
* __ffs - find first bit in word.
* @x: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static __inline__ unsigned long
__ffs (unsigned long x)
{
unsigned long result;
result = ia64_popcnt((x-1) & ~x);
return result;
}
#ifdef __KERNEL__
/*
* Return bit number of last (most-significant) bit set. Undefined
* for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
*/
static inline unsigned long
ia64_fls (unsigned long x)
{
long double d = x;
long exp;
exp = ia64_getf_exp(d);
return exp - 0xffff;
}
/*
* Find the last (most significant) bit set. Returns 0 for x==0 and
* bits are numbered from 1..32 (e.g., fls(9) == 4).
*/
static inline int
fls (int t)
{
unsigned long x = t & 0xffffffffu;
if (!x)
return 0;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return ia64_popcnt(x);
}
/*
* ffs: find first bit set. This is defined the same way as the libc and compiler builtin
* ffs routines, therefore differs in spirit from the above ffz (man ffs): it operates on
* "int" values only and the result value is the bit number + 1. ffs(0) is defined to
* return zero.
*/
#define ffs(x) __builtin_ffs(x)
/*
* hweightN: returns the hamming weight (i.e. the number
* of bits set) of a N-bit word
*/
static __inline__ unsigned long
hweight64 (unsigned long x)
{
unsigned long result;
result = ia64_popcnt(x);
return result;
}
#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful)
#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful)
#define hweight8(x) (unsigned int) hweight64((x) & 0xfful)
#endif /* __KERNEL__ */
extern int __find_next_zero_bit (const void *addr, unsigned long size,
unsigned long offset);
extern int __find_next_bit(const void *addr, unsigned long size,
unsigned long offset);
#define find_next_zero_bit(addr, size, offset) \
__find_next_zero_bit((addr), (size), (offset))
#define find_next_bit(addr, size, offset) \
__find_next_bit((addr), (size), (offset))
/*
* The optimizer actually does good code for this case..
*/
#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
#ifdef __KERNEL__
#define __clear_bit(nr, addr) clear_bit(nr, addr)
#define ext2_set_bit test_and_set_bit
#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
#define ext2_clear_bit test_and_clear_bit
#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
#define ext2_test_bit test_bit
#define ext2_find_first_zero_bit find_first_zero_bit
#define ext2_find_next_zero_bit find_next_zero_bit
/* Bitmap functions for the minix filesystem. */
#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
#define minix_set_bit(nr,addr) set_bit(nr,addr)
#define minix_test_and_clear_bit(nr,addr) test_and_clear_bit(nr,addr)
#define minix_test_bit(nr,addr) test_bit(nr,addr)
#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
static inline int
sched_find_first_bit (unsigned long *b)
{
if (unlikely(b[0]))
return __ffs(b[0]);
if (unlikely(b[1]))
return 64 + __ffs(b[1]);
return __ffs(b[2]) + 128;
}
#endif /* __KERNEL__ */
#endif /* _ASM_IA64_BITOPS_H */