bcachefs: Etyzinger cleanups
Pull out eytzinger.c and kill eytzinger_cmp_fn. We now provide eytzinger0_sort and eytzinger0_sort_r, which use the standard cmp_func_t and cmp_r_func_t callbacks. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
bdbf953b3c
commit
ca1e02f7e9
@ -37,6 +37,7 @@ bcachefs-y := \
|
||||
error.o \
|
||||
extents.o \
|
||||
extent_update.o \
|
||||
eytzinger.o \
|
||||
fs.o \
|
||||
fs-common.o \
|
||||
fs-ioctl.o \
|
||||
|
234
fs/bcachefs/eytzinger.c
Normal file
234
fs/bcachefs/eytzinger.c
Normal file
@ -0,0 +1,234 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "eytzinger.h"
|
||||
|
||||
/**
|
||||
* is_aligned - is this pointer & size okay for word-wide copying?
|
||||
* @base: pointer to data
|
||||
* @size: size of each element
|
||||
* @align: required alignment (typically 4 or 8)
|
||||
*
|
||||
* Returns true if elements can be copied using word loads and stores.
|
||||
* The size must be a multiple of the alignment, and the base address must
|
||||
* be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
|
||||
*
|
||||
* For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
|
||||
* to "if ((a | b) & mask)", so we do that by hand.
|
||||
*/
|
||||
__attribute_const__ __always_inline
|
||||
static bool is_aligned(const void *base, size_t size, unsigned char align)
|
||||
{
|
||||
unsigned char lsbits = (unsigned char)size;
|
||||
|
||||
(void)base;
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
lsbits |= (unsigned char)(uintptr_t)base;
|
||||
#endif
|
||||
return (lsbits & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_32 - swap two elements in 32-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 4)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index addressing,
|
||||
* which basically all CPUs have, to minimize loop overhead computations.
|
||||
*
|
||||
* For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
|
||||
* bottom of the loop, even though the zero flag is still valid from the
|
||||
* subtract (since the intervening mov instructions don't alter the flags).
|
||||
* Gcc 8.1.0 doesn't have that problem.
|
||||
*/
|
||||
static void swap_words_32(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_words_64 - swap two elements in 64-bit chunks
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size (must be a multiple of 8)
|
||||
*
|
||||
* Exchange the two objects in memory. This exploits base+index
|
||||
* addressing, which basically all CPUs have, to minimize loop overhead
|
||||
* computations.
|
||||
*
|
||||
* We'd like to use 64-bit loads if possible. If they're not, emulating
|
||||
* one requires base+index+4 addressing which x86 has but most other
|
||||
* processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
|
||||
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
|
||||
* x32 ABI). Are there any cases the kernel needs to worry about?
|
||||
*/
|
||||
static void swap_words_64(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 t = *(u64 *)(a + (n -= 8));
|
||||
*(u64 *)(a + n) = *(u64 *)(b + n);
|
||||
*(u64 *)(b + n) = t;
|
||||
#else
|
||||
/* Use two 32-bit transfers to avoid base+index+4 addressing */
|
||||
u32 t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
|
||||
t = *(u32 *)(a + (n -= 4));
|
||||
*(u32 *)(a + n) = *(u32 *)(b + n);
|
||||
*(u32 *)(b + n) = t;
|
||||
#endif
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_bytes - swap two elements a byte at a time
|
||||
* @a: pointer to the first element to swap
|
||||
* @b: pointer to the second element to swap
|
||||
* @n: element size
|
||||
*
|
||||
* This is the fallback if alignment doesn't allow using larger chunks.
|
||||
*/
|
||||
static void swap_bytes(void *a, void *b, size_t n)
|
||||
{
|
||||
do {
|
||||
char t = ((char *)a)[--n];
|
||||
((char *)a)[n] = ((char *)b)[n];
|
||||
((char *)b)[n] = t;
|
||||
} while (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* The values are arbitrary as long as they can't be confused with
|
||||
* a pointer, but small integers make for the smallest compare
|
||||
* instructions.
|
||||
*/
|
||||
#define SWAP_WORDS_64 (swap_r_func_t)0
|
||||
#define SWAP_WORDS_32 (swap_r_func_t)1
|
||||
#define SWAP_BYTES (swap_r_func_t)2
|
||||
#define SWAP_WRAPPER (swap_r_func_t)3
|
||||
|
||||
struct wrapper {
|
||||
cmp_func_t cmp;
|
||||
swap_func_t swap;
|
||||
};
|
||||
|
||||
/*
|
||||
* The function pointer is last to make tail calls most efficient if the
|
||||
* compiler decides not to inline this function.
|
||||
*/
|
||||
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
|
||||
{
|
||||
if (swap_func == SWAP_WRAPPER) {
|
||||
((const struct wrapper *)priv)->swap(a, b, (int)size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (swap_func == SWAP_WORDS_64)
|
||||
swap_words_64(a, b, size);
|
||||
else if (swap_func == SWAP_WORDS_32)
|
||||
swap_words_32(a, b, size);
|
||||
else if (swap_func == SWAP_BYTES)
|
||||
swap_bytes(a, b, size);
|
||||
else
|
||||
swap_func(a, b, (int)size, priv);
|
||||
}
|
||||
|
||||
#define _CMP_WRAPPER ((cmp_r_func_t)0L)
|
||||
|
||||
static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
|
||||
{
|
||||
if (cmp == _CMP_WRAPPER)
|
||||
return ((const struct wrapper *)priv)->cmp(a, b);
|
||||
return cmp(a, b, priv);
|
||||
}
|
||||
|
||||
static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
cmp_func, priv);
|
||||
}
|
||||
|
||||
static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
|
||||
swap_r_func_t swap_func, const void *priv,
|
||||
size_t l, size_t r)
|
||||
{
|
||||
do_swap(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size, swap_func, priv);
|
||||
}
|
||||
|
||||
void eytzinger0_sort_r(void *base, size_t n, size_t size,
|
||||
cmp_r_func_t cmp_func,
|
||||
swap_r_func_t swap_func,
|
||||
const void *priv)
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
/* called from 'sort' without swap function, let's pick the default */
|
||||
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
|
||||
swap_func = NULL;
|
||||
|
||||
if (!swap_func) {
|
||||
if (is_aligned(base, size, 8))
|
||||
swap_func = SWAP_WORDS_64;
|
||||
else if (is_aligned(base, size, 4))
|
||||
swap_func = SWAP_WORDS_32;
|
||||
else
|
||||
swap_func = SWAP_BYTES;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
|
||||
break;
|
||||
|
||||
eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
cmp_func_t cmp_func,
|
||||
swap_func_t swap_func)
|
||||
{
|
||||
struct wrapper w = {
|
||||
.cmp = cmp_func,
|
||||
.swap = swap_func,
|
||||
};
|
||||
|
||||
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
|
||||
}
|
@ -5,23 +5,33 @@
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "util.h"
|
||||
#ifdef EYTZINGER_DEBUG
|
||||
#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
|
||||
#else
|
||||
#define EYTZINGER_BUG_ON(cond)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
|
||||
* array
|
||||
*/
|
||||
|
||||
/*
|
||||
* One based indexing version:
|
||||
* array.
|
||||
*
|
||||
* With one based indexing each level of the tree starts at a power of two -
|
||||
* good for cacheline alignment:
|
||||
* Consider using an eytzinger tree any time you would otherwise be doing binary
|
||||
* search over an array. Binary search is a worst case scenario for branch
|
||||
* prediction and prefetching, but in an eytzinger tree every node's children
|
||||
* are adjacent in memory, thus we can prefetch children before knowing the
|
||||
* result of the comparison, assuming multiple nodes fit on a cacheline.
|
||||
*
|
||||
* Two variants are provided, for one based indexing and zero based indexing.
|
||||
*
|
||||
* Zero based indexing is more convenient, but one based indexing has better
|
||||
* alignment and thus better performance because each new level of the tree
|
||||
* starts at a power of two, and thus if element 0 was cacheline aligned, each
|
||||
* new level will be as well.
|
||||
*/
|
||||
|
||||
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + child;
|
||||
}
|
||||
@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_right_child(i) <= size) {
|
||||
i = eytzinger1_right_child(i);
|
||||
@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
{
|
||||
EBUG_ON(i > size);
|
||||
EYTZINGER_BUG_ON(i > size);
|
||||
|
||||
if (eytzinger1_left_child(i) <= size) {
|
||||
i = eytzinger1_left_child(i) + 1;
|
||||
@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
|
||||
unsigned shift = __fls(size) - b;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
i ^= 1U << b;
|
||||
i <<= 1;
|
||||
@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
|
||||
unsigned shift;
|
||||
int s;
|
||||
|
||||
EBUG_ON(!i || i > size);
|
||||
EYTZINGER_BUG_ON(!i || i > size);
|
||||
|
||||
/*
|
||||
* sign bit trick:
|
||||
@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
|
||||
{
|
||||
EBUG_ON(child > 1);
|
||||
EYTZINGER_BUG_ON(child > 1);
|
||||
|
||||
return (i << 1) + 1 + child;
|
||||
}
|
||||
@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
|
||||
(_i) != -1; \
|
||||
(_i) = eytzinger0_next((_i), (_size)))
|
||||
|
||||
typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
|
||||
|
||||
/* return greatest node <= @search, or -1 if not found */
|
||||
static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
eytzinger_cmp_fn cmp, const void *search)
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
unsigned i, n = 0;
|
||||
|
||||
@ -244,21 +252,24 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
|
||||
do {
|
||||
i = n;
|
||||
n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
|
||||
n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
|
||||
} while (n < nr);
|
||||
|
||||
if (n & 1) {
|
||||
/* @i was greater than @search, return previous node: */
|
||||
|
||||
if (i == eytzinger0_first(nr))
|
||||
return -1;
|
||||
|
||||
return eytzinger0_prev(i, nr);
|
||||
} else {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
|
||||
cmp_func_t cmp, const void *search)
|
||||
{
|
||||
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
|
||||
return eytzinger0_next(idx, size);
|
||||
}
|
||||
|
||||
#define eytzinger0_find(base, nr, size, _cmp, search) \
|
||||
({ \
|
||||
void *_base = (base); \
|
||||
@ -269,13 +280,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
|
||||
int _res; \
|
||||
\
|
||||
while (_i < _nr && \
|
||||
(_res = _cmp(_search, _base + _i * _size, _size))) \
|
||||
(_res = _cmp(_search, _base + _i * _size))) \
|
||||
_i = eytzinger0_child(_i, _res > 0); \
|
||||
_i; \
|
||||
})
|
||||
|
||||
void eytzinger0_sort(void *, size_t, size_t,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
void eytzinger0_sort_r(void *, size_t, size_t,
|
||||
cmp_r_func_t, swap_r_func_t, const void *);
|
||||
void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
|
||||
|
||||
#endif /* _EYTZINGER_H */
|
||||
|
@ -95,8 +95,7 @@ out:
|
||||
return ret ?: bch2_blacklist_table_initialize(c);
|
||||
}
|
||||
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l,
|
||||
const void *_r, size_t size)
|
||||
static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct journal_seq_blacklist_table_entry *l = _l;
|
||||
const struct journal_seq_blacklist_table_entry *r = _r;
|
||||
|
@ -6,12 +6,15 @@
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
|
||||
#include <linux/sort.h>
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
|
||||
struct bch_replicas_cpu *);
|
||||
|
||||
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
|
||||
static int bch2_memcmp(const void *l, const void *r, size_t size)
|
||||
static int bch2_memcmp(const void *l, const void *r, const void *priv)
|
||||
{
|
||||
size_t size = (size_t) priv;
|
||||
return memcmp(l, r, size);
|
||||
}
|
||||
|
||||
@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
|
||||
|
||||
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
|
||||
{
|
||||
eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
|
||||
eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
|
||||
bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
|
||||
}
|
||||
|
||||
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
|
||||
@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
|
||||
|
||||
verify_replicas_entry(search);
|
||||
|
||||
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
|
||||
#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
|
||||
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
|
||||
entry_cmp, search);
|
||||
#undef entry_cmp
|
||||
@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
sort_cmp_size(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL);
|
||||
sort_r(cpu_r->entries,
|
||||
cpu_r->nr,
|
||||
cpu_r->entry_size,
|
||||
bch2_memcmp, NULL,
|
||||
(void *)(size_t)cpu_r->entry_size);
|
||||
|
||||
for (i = 0; i < cpu_r->nr; i++) {
|
||||
struct bch_replicas_entry_v1 *e =
|
||||
|
@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
}
|
||||
}
|
||||
|
||||
static int alignment_ok(const void *base, size_t align)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
((unsigned long)base & (align - 1)) == 0;
|
||||
}
|
||||
|
||||
static void u32_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u32 t = *(u32 *)a;
|
||||
*(u32 *)a = *(u32 *)b;
|
||||
*(u32 *)b = t;
|
||||
}
|
||||
|
||||
static void u64_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
u64 t = *(u64 *)a;
|
||||
*(u64 *)a = *(u64 *)b;
|
||||
*(u64 *)b = t;
|
||||
}
|
||||
|
||||
static void generic_swap(void *a, void *b, size_t size)
|
||||
{
|
||||
char t;
|
||||
|
||||
do {
|
||||
t = *(char *)a;
|
||||
*(char *)a++ = *(char *)b;
|
||||
*(char *)b++ = t;
|
||||
} while (--size > 0);
|
||||
}
|
||||
|
||||
static inline int do_cmp(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
static inline void do_swap(void *base, size_t n, size_t size,
|
||||
void (*swap_func)(void *, void *, size_t),
|
||||
size_t l, size_t r)
|
||||
{
|
||||
swap_func(base + inorder_to_eytzinger0(l, n) * size,
|
||||
base + inorder_to_eytzinger0(r, n) * size,
|
||||
size);
|
||||
}
|
||||
|
||||
void eytzinger0_sort(void *base, size_t n, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t))
|
||||
{
|
||||
int i, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for (i = n / 2 - 1; i >= 0; --i) {
|
||||
for (r = i; r * 2 + 1 < n; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < n &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - 1; i > 0; --i) {
|
||||
do_swap(base, n, size, swap_func, 0, i);
|
||||
|
||||
for (r = 0; r * 2 + 1 < i; r = c) {
|
||||
c = r * 2 + 1;
|
||||
|
||||
if (c + 1 < i &&
|
||||
do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
|
||||
c++;
|
||||
|
||||
if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
|
||||
break;
|
||||
|
||||
do_swap(base, n, size, swap_func, r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t size))
|
||||
{
|
||||
/* pre-scale counters for performance */
|
||||
int i = (num/2 - 1) * size, n = num * size, c, r;
|
||||
|
||||
if (!swap_func) {
|
||||
if (size == 4 && alignment_ok(base, 4))
|
||||
swap_func = u32_swap;
|
||||
else if (size == 8 && alignment_ok(base, 8))
|
||||
swap_func = u64_swap;
|
||||
else
|
||||
swap_func = generic_swap;
|
||||
}
|
||||
|
||||
/* heapify */
|
||||
for ( ; i >= 0; i -= size) {
|
||||
for (r = i; r * 2 + size < n; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < n - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - size; i > 0; i -= size) {
|
||||
swap_func(base, base + i, size);
|
||||
for (r = 0; r * 2 + size < i; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < i - size &&
|
||||
cmp_func(base + c, base + c + size, size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(base + r, base + c, size) >= 0)
|
||||
break;
|
||||
swap_func(base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void eytzinger1_test(void)
|
||||
{
|
||||
|
@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
|
||||
memset(s + bytes, c, rem);
|
||||
}
|
||||
|
||||
void sort_cmp_size(void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(const void *, const void *, size_t),
|
||||
void (*swap_func)(void *, void *, size_t));
|
||||
|
||||
/* just the memmove, doesn't update @_nr */
|
||||
#define __array_insert_item(_array, _nr, _pos) \
|
||||
memmove(&(_array)[(_pos) + 1], \
|
||||
|
Loading…
Reference in New Issue
Block a user