iommufd: Data structure to provide IOVA to PFN mapping
This is the remainder of the IOAS data structure. Provide an object called an io_pagetable that is composed of iopt_areas pointing at iopt_pages, along with a list of iommu_domains that mirror the IOVA to PFN map. At the top this is a simple interval tree of iopt_areas indicating the map of IOVA to iopt_pages. An xarray keeps track of a list of domains. Based on the attached domains there is a minimum alignment for areas (which may be smaller than PAGE_SIZE), an interval tree of reserved IOVA that can't be mapped and an IOVA of allowed IOVA that can always be mappable. The concept of an 'access' refers to something like a VFIO mdev that is accessing the IOVA and using a 'struct page *' for CPU based access. Externally an API is provided that matches the requirements of the IOCTL interface for map/unmap and domain attachment. The API provides a 'copy' primitive to establish a new IOVA map in a different IOAS from an existing mapping by re-using the iopt_pages. This is the basic mechanism to provide single pinning. This is designed to support a pre-registration flow where userspace would setup an dummy IOAS with no domains, map in memory and then establish an access to pin all PFNs into the xarray. Copy can then be used to create new IOVA mappings in a different IOAS, with iommu_domains attached. Upon copy the PFNs will be read out of the xarray and mapped into the iommu_domains, avoiding any pin_user_pages() overheads. Link: https://lore.kernel.org/r/10-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen <nicolinc@nvidia.com> Tested-by: Yi Liu <yi.l.liu@intel.com> Tested-by: Lixiao Yang <lixiao.yang@intel.com> Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Yi Liu <yi.l.liu@intel.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
8d160cd4d5
commit
51fe6141f0
@ -444,6 +444,7 @@ ForEachMacros:
|
||||
- 'interval_tree_for_each_span'
|
||||
- 'intlist__for_each_entry'
|
||||
- 'intlist__for_each_entry_safe'
|
||||
- 'iopt_for_each_contig_area'
|
||||
- 'kcore_copy__for_each_phdr'
|
||||
- 'key_for_each'
|
||||
- 'key_for_each_safe'
|
||||
|
@ -1,5 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
iommufd-y := \
|
||||
io_pagetable.o \
|
||||
main.o \
|
||||
pages.o
|
||||
|
||||
|
1186
drivers/iommu/iommufd/io_pagetable.c
Normal file
1186
drivers/iommu/iommufd/io_pagetable.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -46,9 +46,19 @@ struct iopt_area {
|
||||
unsigned int page_offset;
|
||||
/* IOMMU_READ, IOMMU_WRITE, etc */
|
||||
int iommu_prot;
|
||||
bool prevent_access : 1;
|
||||
unsigned int num_accesses;
|
||||
};
|
||||
|
||||
struct iopt_allowed {
|
||||
struct interval_tree_node node;
|
||||
};
|
||||
|
||||
struct iopt_reserved {
|
||||
struct interval_tree_node node;
|
||||
void *owner;
|
||||
};
|
||||
|
||||
int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages);
|
||||
void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages);
|
||||
|
||||
@ -83,6 +93,24 @@ static inline size_t iopt_area_length(struct iopt_area *area)
|
||||
return (area->node.last - area->node.start) + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of bytes from the start of the iopt_pages that the iova begins.
|
||||
* iopt_area_start_byte() / PAGE_SIZE encodes the starting page index
|
||||
* iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page
|
||||
*/
|
||||
static inline unsigned long iopt_area_start_byte(struct iopt_area *area,
|
||||
unsigned long iova)
|
||||
{
|
||||
return (iova - iopt_area_iova(area)) + area->page_offset +
|
||||
iopt_area_index(area) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area,
|
||||
unsigned long iova)
|
||||
{
|
||||
return iopt_area_start_byte(area, iova) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
#define __make_iopt_iter(name) \
|
||||
static inline struct iopt_##name *iopt_##name##_iter_first( \
|
||||
struct io_pagetable *iopt, unsigned long start, \
|
||||
@ -110,6 +138,33 @@ static inline size_t iopt_area_length(struct iopt_area *area)
|
||||
}
|
||||
|
||||
__make_iopt_iter(area)
|
||||
__make_iopt_iter(allowed)
|
||||
__make_iopt_iter(reserved)
|
||||
|
||||
struct iopt_area_contig_iter {
|
||||
unsigned long cur_iova;
|
||||
unsigned long last_iova;
|
||||
struct iopt_area *area;
|
||||
};
|
||||
struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
|
||||
struct io_pagetable *iopt,
|
||||
unsigned long iova,
|
||||
unsigned long last_iova);
|
||||
struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter);
|
||||
|
||||
static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter)
|
||||
{
|
||||
return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over a contiguous list of areas that span the iova,last_iova range.
|
||||
* The caller must check iopt_area_contig_done() after the loop to see if
|
||||
* contiguous areas existed.
|
||||
*/
|
||||
#define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova) \
|
||||
for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \
|
||||
area = iopt_area_contig_next(iter))
|
||||
|
||||
enum {
|
||||
IOPT_PAGES_ACCOUNT_NONE = 0,
|
||||
|
@ -9,9 +9,14 @@
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
struct iommu_domain;
|
||||
struct iommu_group;
|
||||
|
||||
struct iommufd_ctx {
|
||||
struct file *file;
|
||||
struct xarray objects;
|
||||
|
||||
u8 account_mode;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -27,6 +32,7 @@ struct iommufd_ctx {
|
||||
struct io_pagetable {
|
||||
struct rw_semaphore domains_rwsem;
|
||||
struct xarray domains;
|
||||
struct xarray access_list;
|
||||
unsigned int next_domain_id;
|
||||
|
||||
struct rw_semaphore iova_rwsem;
|
||||
@ -36,8 +42,46 @@ struct io_pagetable {
|
||||
/* IOVA that cannot be allocated, struct iopt_reserved */
|
||||
struct rb_root_cached reserved_itree;
|
||||
u8 disable_large_pages;
|
||||
unsigned long iova_alignment;
|
||||
};
|
||||
|
||||
void iopt_init_table(struct io_pagetable *iopt);
|
||||
void iopt_destroy_table(struct io_pagetable *iopt);
|
||||
int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length, struct list_head *pages_list);
|
||||
void iopt_free_pages_list(struct list_head *pages_list);
|
||||
enum {
|
||||
IOPT_ALLOC_IOVA = 1 << 0,
|
||||
};
|
||||
int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
|
||||
unsigned long *iova, void __user *uptr,
|
||||
unsigned long length, int iommu_prot,
|
||||
unsigned int flags);
|
||||
int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
|
||||
unsigned long length, unsigned long *dst_iova,
|
||||
int iommu_prot, unsigned int flags);
|
||||
int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length, unsigned long *unmapped);
|
||||
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
|
||||
|
||||
int iopt_table_add_domain(struct io_pagetable *iopt,
|
||||
struct iommu_domain *domain);
|
||||
void iopt_table_remove_domain(struct io_pagetable *iopt,
|
||||
struct iommu_domain *domain);
|
||||
int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
|
||||
struct device *device,
|
||||
struct iommu_group *group,
|
||||
phys_addr_t *sw_msi_start);
|
||||
int iopt_set_allow_iova(struct io_pagetable *iopt,
|
||||
struct rb_root_cached *allowed_iova);
|
||||
int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
|
||||
unsigned long last, void *owner);
|
||||
void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
|
||||
int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
|
||||
size_t num_iovas);
|
||||
void iopt_enable_large_pages(struct io_pagetable *iopt);
|
||||
int iopt_disable_large_pages(struct io_pagetable *iopt);
|
||||
|
||||
struct iommufd_ucmd {
|
||||
struct iommufd_ctx *ictx;
|
||||
void __user *ubuffer;
|
||||
@ -130,4 +174,12 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
|
||||
type), \
|
||||
typeof(*(ptr)), obj)
|
||||
|
||||
struct iommufd_access {
|
||||
unsigned long iova_alignment;
|
||||
u32 iopt_access_list_id;
|
||||
};
|
||||
|
||||
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
|
||||
void iopt_remove_access(struct io_pagetable *iopt,
|
||||
struct iommufd_access *access);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user