iommu/vt-d: Introduce batched cache invalidation
Converts IOTLB and Dev-IOTLB invalidation to a batched model. Cache tag invalidation requests for a domain are now accumulated in a qi_batch structure before being flushed in bulk. It replaces the previous per- request qi_flush approach with a more efficient batching mechanism. Co-developed-by: Lu Baolu <baolu.lu@linux.intel.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Signed-off-by: Tina Zhang <tina.zhang@intel.com> Link: https://lore.kernel.org/r/20240815065221.50328-5-tina.zhang@intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de>
This commit is contained in:
parent
777cdd8534
commit
705c1cdf1e
@ -262,6 +262,79 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
|
|||||||
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
|
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
if (!iommu || !batch->index)
|
||||||
|
return;
|
||||||
|
|
||||||
|
qi_submit_sync(iommu, batch->descs, batch->index, 0);
|
||||||
|
|
||||||
|
/* Reset the index value and clean the whole batch buffer. */
|
||||||
|
memset(batch, 0, sizeof(*batch));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
|
||||||
|
qi_batch_flush_descs(iommu, batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
|
||||||
|
unsigned int size_order, u64 type,
|
||||||
|
struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
|
||||||
|
qi_batch_increment_index(iommu, batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
|
||||||
|
u16 qdep, u64 addr, unsigned int mask,
|
||||||
|
struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* According to VT-d spec, software is recommended to not submit any Device-TLB
|
||||||
|
* invalidation requests while address remapping hardware is disabled.
|
||||||
|
*/
|
||||||
|
if (!(iommu->gcmd & DMA_GCMD_TE))
|
||||||
|
return;
|
||||||
|
|
||||||
|
qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
|
||||||
|
qi_batch_increment_index(iommu, batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
|
||||||
|
u64 addr, unsigned long npages, bool ih,
|
||||||
|
struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* npages == -1 means a PASID-selective invalidation, otherwise,
|
||||||
|
* a positive value for Page-selective-within-PASID invalidation.
|
||||||
|
* 0 is not a valid input.
|
||||||
|
*/
|
||||||
|
if (!npages)
|
||||||
|
return;
|
||||||
|
|
||||||
|
qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
|
||||||
|
qi_batch_increment_index(iommu, batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
|
||||||
|
u32 pasid, u16 qdep, u64 addr,
|
||||||
|
unsigned int size_order, struct qi_batch *batch)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* According to VT-d spec, software is recommended to not submit any
|
||||||
|
* Device-TLB invalidation requests while address remapping hardware
|
||||||
|
* is disabled.
|
||||||
|
*/
|
||||||
|
if (!(iommu->gcmd & DMA_GCMD_TE))
|
||||||
|
return;
|
||||||
|
|
||||||
|
qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
|
||||||
|
&batch->descs[batch->index]);
|
||||||
|
qi_batch_increment_index(iommu, batch);
|
||||||
|
}
|
||||||
|
|
||||||
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
|
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
|
||||||
unsigned long addr, unsigned long pages,
|
unsigned long addr, unsigned long pages,
|
||||||
unsigned long mask, int ih)
|
unsigned long mask, int ih)
|
||||||
@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
|
|||||||
u64 type = DMA_TLB_PSI_FLUSH;
|
u64 type = DMA_TLB_PSI_FLUSH;
|
||||||
|
|
||||||
if (domain->use_first_level) {
|
if (domain->use_first_level) {
|
||||||
qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih);
|
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
|
||||||
|
pages, ih, domain->qi_batch);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ecap_qis(iommu->ecap))
|
if (ecap_qis(iommu->ecap))
|
||||||
qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
|
qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
|
||||||
|
domain->qi_batch);
|
||||||
else
|
else
|
||||||
__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
|
__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
|
||||||
}
|
}
|
||||||
@ -303,19 +378,20 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_
|
|||||||
sid = PCI_DEVID(info->bus, info->devfn);
|
sid = PCI_DEVID(info->bus, info->devfn);
|
||||||
|
|
||||||
if (tag->pasid == IOMMU_NO_PASID) {
|
if (tag->pasid == IOMMU_NO_PASID) {
|
||||||
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
|
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
|
||||||
addr, mask);
|
addr, mask, domain->qi_batch);
|
||||||
if (info->dtlb_extra_inval)
|
if (info->dtlb_extra_inval)
|
||||||
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
|
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
|
||||||
info->ats_qdep, addr, mask);
|
addr, mask, domain->qi_batch);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
|
qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
|
||||||
info->ats_qdep, addr, mask);
|
info->ats_qdep, addr, mask, domain->qi_batch);
|
||||||
if (info->dtlb_extra_inval)
|
if (info->dtlb_extra_inval)
|
||||||
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
|
qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
|
||||||
info->ats_qdep, addr, mask);
|
info->ats_qdep, addr, mask,
|
||||||
|
domain->qi_batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
|
static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
|
||||||
@ -327,11 +403,11 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
|
|||||||
info = dev_iommu_priv_get(tag->dev);
|
info = dev_iommu_priv_get(tag->dev);
|
||||||
sid = PCI_DEVID(info->bus, info->devfn);
|
sid = PCI_DEVID(info->bus, info->devfn);
|
||||||
|
|
||||||
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
|
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
|
||||||
MAX_AGAW_PFN_WIDTH);
|
MAX_AGAW_PFN_WIDTH, domain->qi_batch);
|
||||||
if (info->dtlb_extra_inval)
|
if (info->dtlb_extra_inval)
|
||||||
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
|
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
|
||||||
MAX_AGAW_PFN_WIDTH);
|
MAX_AGAW_PFN_WIDTH, domain->qi_batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
|
|||||||
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
|
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
|
||||||
unsigned long end, int ih)
|
unsigned long end, int ih)
|
||||||
{
|
{
|
||||||
|
struct intel_iommu *iommu = NULL;
|
||||||
unsigned long pages, mask, addr;
|
unsigned long pages, mask, addr;
|
||||||
struct cache_tag *tag;
|
struct cache_tag *tag;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
|
|||||||
|
|
||||||
spin_lock_irqsave(&domain->cache_lock, flags);
|
spin_lock_irqsave(&domain->cache_lock, flags);
|
||||||
list_for_each_entry(tag, &domain->cache_tags, node) {
|
list_for_each_entry(tag, &domain->cache_tags, node) {
|
||||||
|
if (iommu && iommu != tag->iommu)
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
|
iommu = tag->iommu;
|
||||||
|
|
||||||
switch (tag->type) {
|
switch (tag->type) {
|
||||||
case CACHE_TAG_IOTLB:
|
case CACHE_TAG_IOTLB:
|
||||||
case CACHE_TAG_NESTING_IOTLB:
|
case CACHE_TAG_NESTING_IOTLB:
|
||||||
@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
|
|||||||
|
|
||||||
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
|
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
|
||||||
}
|
}
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,11 +463,16 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
|
|||||||
*/
|
*/
|
||||||
void cache_tag_flush_all(struct dmar_domain *domain)
|
void cache_tag_flush_all(struct dmar_domain *domain)
|
||||||
{
|
{
|
||||||
|
struct intel_iommu *iommu = NULL;
|
||||||
struct cache_tag *tag;
|
struct cache_tag *tag;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&domain->cache_lock, flags);
|
spin_lock_irqsave(&domain->cache_lock, flags);
|
||||||
list_for_each_entry(tag, &domain->cache_tags, node) {
|
list_for_each_entry(tag, &domain->cache_tags, node) {
|
||||||
|
if (iommu && iommu != tag->iommu)
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
|
iommu = tag->iommu;
|
||||||
|
|
||||||
switch (tag->type) {
|
switch (tag->type) {
|
||||||
case CACHE_TAG_IOTLB:
|
case CACHE_TAG_IOTLB:
|
||||||
case CACHE_TAG_NESTING_IOTLB:
|
case CACHE_TAG_NESTING_IOTLB:
|
||||||
@ -399,6 +486,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
|
|||||||
|
|
||||||
trace_cache_tag_flush_all(tag);
|
trace_cache_tag_flush_all(tag);
|
||||||
}
|
}
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -416,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
|
|||||||
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
|
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
|
||||||
unsigned long end)
|
unsigned long end)
|
||||||
{
|
{
|
||||||
|
struct intel_iommu *iommu = NULL;
|
||||||
unsigned long pages, mask, addr;
|
unsigned long pages, mask, addr;
|
||||||
struct cache_tag *tag;
|
struct cache_tag *tag;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -424,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
|
|||||||
|
|
||||||
spin_lock_irqsave(&domain->cache_lock, flags);
|
spin_lock_irqsave(&domain->cache_lock, flags);
|
||||||
list_for_each_entry(tag, &domain->cache_tags, node) {
|
list_for_each_entry(tag, &domain->cache_tags, node) {
|
||||||
struct intel_iommu *iommu = tag->iommu;
|
if (iommu && iommu != tag->iommu)
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
|
iommu = tag->iommu;
|
||||||
|
|
||||||
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
|
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
|
||||||
iommu_flush_write_buffer(iommu);
|
iommu_flush_write_buffer(iommu);
|
||||||
@ -437,5 +528,6 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
|
|||||||
|
|
||||||
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
|
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
|
||||||
}
|
}
|
||||||
|
qi_batch_flush_descs(iommu, domain->qi_batch);
|
||||||
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
spin_unlock_irqrestore(&domain->cache_lock, flags);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user