58474f76a7
This adds another disk accounting counter to track usage per inode number (any snapshot ID). This will be used for a couple things: - It'll give us a way to tell the user how much space a given file ista consuming in all snapshots; i.e. how much extra space it's consuming due to snapshot versioning. - It counts number of extents and total size of extents (both in btree keyspace sectors and actual disk usage), meaning it gives us average extent size: that is, it'll let us cheaply find fragmented files that should be defragmented. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
168 lines
4.2 KiB
C
168 lines
4.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
|
|
#define _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
|
|
|
|
#include "replicas_format.h"
|
|
|
|
/*
|
|
* Disk accounting - KEY_TYPE_accounting - on disk format:
|
|
*
|
|
* Here, the key has considerably more structure than a typical key (bpos); an
|
|
* accounting key is 'struct disk_accounting_pos', which is a union of bpos.
|
|
*
|
|
* More specifically: a key is just a muliword integer (where word endianness
|
|
* matches native byte order), so we're treating bpos as an opaque 20 byte
|
|
* integer and mapping bch_accounting_key to that.
|
|
*
|
|
* This is a type-tagged union of all our various subtypes; a disk accounting
|
|
* key can be device counters, replicas counters, et cetera - it's extensible.
|
|
*
|
|
* The value is a list of u64s or s64s; the number of counters is specific to a
|
|
* given accounting type.
|
|
*
|
|
* Unlike with other key types, updates are _deltas_, and the deltas are not
|
|
* resolved until the update to the underlying btree, done by btree write buffer
|
|
* flush or journal replay.
|
|
*
|
|
* Journal replay in particular requires special handling. The journal tracks a
|
|
* range of entries which may possibly have not yet been applied to the btree
|
|
* yet - it does not know definitively whether individual entries are dirty and
|
|
* still need to be applied.
|
|
*
|
|
* To handle this, we use the version field of struct bkey, and give every
|
|
* accounting update a unique version number - a total ordering in time; the
|
|
* version number is derived from the key's position in the journal. Then
|
|
* journal replay can compare the version number of the key from the journal
|
|
* with the version number of the key in the btree to determine if a key needs
|
|
* to be replayed.
|
|
*
|
|
* For this to work, we must maintain this strict time ordering of updates as
|
|
* they are flushed to the btree, both via write buffer flush and via journal
|
|
* replay. This has complications for the write buffer code while journal replay
|
|
* is still in progress; the write buffer cannot flush any accounting keys to
|
|
* the btree until journal replay has finished replaying its accounting keys, or
|
|
* the (newer) version number of the keys from the write buffer will cause
|
|
* updates from journal replay to be lost.
|
|
*/
|
|
|
|
struct bch_accounting {
|
|
struct bch_val v;
|
|
__u64 d[];
|
|
};
|
|
|
|
#define BCH_ACCOUNTING_MAX_COUNTERS 3
|
|
|
|
#define BCH_DATA_TYPES() \
|
|
x(free, 0) \
|
|
x(sb, 1) \
|
|
x(journal, 2) \
|
|
x(btree, 3) \
|
|
x(user, 4) \
|
|
x(cached, 5) \
|
|
x(parity, 6) \
|
|
x(stripe, 7) \
|
|
x(need_gc_gens, 8) \
|
|
x(need_discard, 9) \
|
|
x(unstriped, 10)
|
|
|
|
enum bch_data_type {
|
|
#define x(t, n) BCH_DATA_##t,
|
|
BCH_DATA_TYPES()
|
|
#undef x
|
|
BCH_DATA_NR
|
|
};
|
|
|
|
static inline bool data_type_is_empty(enum bch_data_type type)
|
|
{
|
|
switch (type) {
|
|
case BCH_DATA_free:
|
|
case BCH_DATA_need_gc_gens:
|
|
case BCH_DATA_need_discard:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static inline bool data_type_is_hidden(enum bch_data_type type)
|
|
{
|
|
switch (type) {
|
|
case BCH_DATA_sb:
|
|
case BCH_DATA_journal:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#define BCH_DISK_ACCOUNTING_TYPES() \
|
|
x(nr_inodes, 0) \
|
|
x(persistent_reserved, 1) \
|
|
x(replicas, 2) \
|
|
x(dev_data_type, 3) \
|
|
x(compression, 4) \
|
|
x(snapshot, 5) \
|
|
x(btree, 6) \
|
|
x(rebalance_work, 7) \
|
|
x(inum, 8)
|
|
|
|
enum disk_accounting_type {
|
|
#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
|
|
BCH_DISK_ACCOUNTING_TYPES()
|
|
#undef x
|
|
BCH_DISK_ACCOUNTING_TYPE_NR,
|
|
};
|
|
|
|
struct bch_nr_inodes {
|
|
};
|
|
|
|
struct bch_persistent_reserved {
|
|
__u8 nr_replicas;
|
|
};
|
|
|
|
struct bch_dev_data_type {
|
|
__u8 dev;
|
|
__u8 data_type;
|
|
};
|
|
|
|
struct bch_acct_compression {
|
|
__u8 type;
|
|
};
|
|
|
|
struct bch_acct_snapshot {
|
|
__u32 id;
|
|
} __packed;
|
|
|
|
struct bch_acct_btree {
|
|
__u32 id;
|
|
} __packed;
|
|
|
|
struct bch_acct_inum {
|
|
__u64 inum;
|
|
} __packed;
|
|
|
|
struct bch_acct_rebalance_work {
|
|
};
|
|
|
|
struct disk_accounting_pos {
|
|
union {
|
|
struct {
|
|
__u8 type;
|
|
union {
|
|
struct bch_nr_inodes nr_inodes;
|
|
struct bch_persistent_reserved persistent_reserved;
|
|
struct bch_replicas_entry_v1 replicas;
|
|
struct bch_dev_data_type dev_data_type;
|
|
struct bch_acct_compression compression;
|
|
struct bch_acct_snapshot snapshot;
|
|
struct bch_acct_btree btree;
|
|
struct bch_acct_rebalance_work rebalance_work;
|
|
struct bch_acct_inum inum;
|
|
} __packed;
|
|
} __packed;
|
|
struct bpos _pad;
|
|
};
|
|
};
|
|
|
|
#endif /* _BCACHEFS_DISK_ACCOUNTING_FORMAT_H */
|