1
linux/drivers/vdpa/virtio_pci/vp_vdpa.c

693 lines
17 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* vDPA bridge driver for modern virtio-pci device
*
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* Author: Jason Wang <jasowang@redhat.com>
*
* Based on virtio_pci_modern.c.
*/
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/vdpa.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/virtio_pci_modern.h>
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
#include <uapi/linux/vdpa.h>
#define VP_VDPA_QUEUE_MAX 256
#define VP_VDPA_DRIVER_NAME "vp_vdpa"
#define VP_VDPA_NAME_SIZE 256
struct vp_vring {
void __iomem *notify;
char msix_name[VP_VDPA_NAME_SIZE];
struct vdpa_callback cb;
resource_size_t notify_pa;
int irq;
};
struct vp_vdpa {
struct vdpa_device vdpa;
struct virtio_pci_modern_device *mdev;
struct vp_vring *vring;
struct vdpa_callback config_cb;
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
u64 device_features;
char msix_name[VP_VDPA_NAME_SIZE];
int config_irq;
int queues;
int vectors;
};
struct vp_vdpa_mgmtdev {
struct vdpa_mgmt_dev mgtdev;
struct virtio_pci_modern_device *mdev;
struct vp_vdpa *vp_vdpa;
};
static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
{
return container_of(vdpa, struct vp_vdpa, vdpa);
}
static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
return vp_vdpa->mdev;
}
static struct virtio_pci_modern_device *vp_vdpa_to_mdev(struct vp_vdpa *vp_vdpa)
{
return vp_vdpa->mdev;
}
static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa)
{
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
return vp_vdpa->device_features;
}
static int vp_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
vp_modern_set_features(mdev, features);
return 0;
}
static u64 vp_vdpa_get_driver_features(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_get_driver_features(mdev);
}
static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_get_status(mdev);
}
static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
int irq = vp_vdpa->vring[idx].irq;
if (irq == VIRTIO_MSI_NO_VECTOR)
return -EINVAL;
return irq;
}
static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa)
{
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
struct pci_dev *pdev = mdev->pci_dev;
int i;
for (i = 0; i < vp_vdpa->queues; i++) {
if (vp_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) {
vp_modern_queue_vector(mdev, i, VIRTIO_MSI_NO_VECTOR);
devm_free_irq(&pdev->dev, vp_vdpa->vring[i].irq,
&vp_vdpa->vring[i]);
vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
}
}
if (vp_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) {
vp_modern_config_vector(mdev, VIRTIO_MSI_NO_VECTOR);
devm_free_irq(&pdev->dev, vp_vdpa->config_irq, vp_vdpa);
vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
}
if (vp_vdpa->vectors) {
pci_free_irq_vectors(pdev);
vp_vdpa->vectors = 0;
}
}
static irqreturn_t vp_vdpa_vq_handler(int irq, void *arg)
{
struct vp_vring *vring = arg;
if (vring->cb.callback)
return vring->cb.callback(vring->cb.private);
return IRQ_HANDLED;
}
static irqreturn_t vp_vdpa_config_handler(int irq, void *arg)
{
struct vp_vdpa *vp_vdpa = arg;
if (vp_vdpa->config_cb.callback)
return vp_vdpa->config_cb.callback(vp_vdpa->config_cb.private);
return IRQ_HANDLED;
}
static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
{
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
struct pci_dev *pdev = mdev->pci_dev;
int i, ret, irq;
int queues = vp_vdpa->queues;
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
int vectors = 1;
int msix_vec = 0;
for (i = 0; i < queues; i++) {
if (vp_vdpa->vring[i].cb.callback)
vectors++;
}
ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
if (ret != vectors) {
dev_err(&pdev->dev,
"vp_vdpa: fail to allocate irq vectors want %d but %d\n",
vectors, ret);
return ret;
}
vp_vdpa->vectors = vectors;
for (i = 0; i < queues; i++) {
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
if (!vp_vdpa->vring[i].cb.callback)
continue;
snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE,
"vp-vdpa[%s]-%d\n", pci_name(pdev), i);
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
irq = pci_irq_vector(pdev, msix_vec);
ret = devm_request_irq(&pdev->dev, irq,
vp_vdpa_vq_handler,
0, vp_vdpa->vring[i].msix_name,
&vp_vdpa->vring[i]);
if (ret) {
dev_err(&pdev->dev,
"vp_vdpa: fail to request irq for vq %d\n", i);
goto err;
}
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
vp_modern_queue_vector(mdev, i, msix_vec);
vp_vdpa->vring[i].irq = irq;
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
msix_vec++;
}
snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n",
pci_name(pdev));
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
irq = pci_irq_vector(pdev, msix_vec);
ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0,
vp_vdpa->msix_name, vp_vdpa);
if (ret) {
dev_err(&pdev->dev,
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
"vp_vdpa: fail to request irq for config: %d\n", ret);
goto err;
}
vp_vdpa: don't allocate unused msix vectors When there is a ctlq and it doesn't require interrupt callbacks,the original method of calculating vectors wastes hardware msi or msix resources as well as system IRQ resources. When conducting performance testing using testpmd in the guest os, it was found that the performance was lower compared to directly using vfio-pci to passthrough the device In scenarios where the virtio device in the guest os does not utilize interrupts, the vdpa driver still configures the hardware's msix vector. Therefore, the hardware still sends interrupts to the host os. Because of this unnecessary action by the hardware, hardware performance decreases, and it also affects the performance of the host os. Before modification:(interrupt mode) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(interrupt mode) 32: 0 0 1 7 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 36 0 3 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-config Before modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-0 33: 0 0 0 0 PCI-MSI 32769-edge vp-vdpa[0000:00:02.0]-1 34: 0 0 0 0 PCI-MSI 32770-edge vp-vdpa[0000:00:02.0]-2 35: 0 0 0 0 PCI-MSI 32771-edge vp-vdpa[0000:00:02.0]-config After modification:(virtio pmd mode for guest os) 32: 0 0 0 0 PCI-MSI 32768-edge vp-vdpa[0000:00:02.0]-config To verify the use of the virtio PMD mode in the guest operating system, the following patch needs to be applied to QEMU: https://lore.kernel.org/all/20240408073311.2049-1-yuxue.liu@jaguarmicro.com Signed-off-by: Yuxue Liu <yuxue.liu@jaguarmicro.com> Acked-by: Jason Wang <jasowang@redhat.com> Reviewed-by: Heng Qi <hengqi@linux.alibaba.com> Message-Id: <20240410033020.1310-1-yuxue.liu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2024-04-09 20:30:20 -07:00
vp_modern_config_vector(mdev, msix_vec);
vp_vdpa->config_irq = irq;
return 0;
err:
vp_vdpa_free_irq(vp_vdpa);
return ret;
}
static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
u8 s = vp_vdpa_get_status(vdpa);
if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
!(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
if (vp_vdpa_request_irq(vp_vdpa)) {
WARN_ON(1);
return;
}
}
vp_modern_set_status(mdev, status);
}
static int vp_vdpa_reset(struct vdpa_device *vdpa)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
u8 s = vp_vdpa_get_status(vdpa);
vp_modern_set_status(mdev, 0);
if (s & VIRTIO_CONFIG_S_DRIVER_OK)
vp_vdpa_free_irq(vp_vdpa);
return 0;
}
static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
{
return VP_VDPA_QUEUE_MAX;
}
static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
struct vdpa_vq_state *state)
{
/* Note that this is not supported by virtio specification, so
* we return -EOPNOTSUPP here. This means we can't support live
* migration, vhost device start/stop.
*/
return -EOPNOTSUPP;
}
static int vp_vdpa_set_vq_state_split(struct vdpa_device *vdpa,
const struct vdpa_vq_state *state)
{
const struct vdpa_vq_state_split *split = &state->split;
if (split->avail_index == 0)
return 0;
return -EOPNOTSUPP;
}
static int vp_vdpa_set_vq_state_packed(struct vdpa_device *vdpa,
const struct vdpa_vq_state *state)
{
const struct vdpa_vq_state_packed *packed = &state->packed;
if (packed->last_avail_counter == 1 &&
packed->last_avail_idx == 0 &&
packed->last_used_counter == 1 &&
packed->last_used_idx == 0)
return 0;
return -EOPNOTSUPP;
}
static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
const struct vdpa_vq_state *state)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
/* Note that this is not supported by virtio specification.
* But if the state is by chance equal to the device initial
* state, we can let it go.
*/
if ((vp_modern_get_status(mdev) & VIRTIO_CONFIG_S_FEATURES_OK) &&
!vp_modern_get_queue_enable(mdev, qid)) {
if (vp_modern_get_driver_features(mdev) &
BIT_ULL(VIRTIO_F_RING_PACKED))
return vp_vdpa_set_vq_state_packed(vdpa, state);
else
return vp_vdpa_set_vq_state_split(vdpa, state);
}
return -EOPNOTSUPP;
}
static void vp_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid,
struct vdpa_callback *cb)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
vp_vdpa->vring[qid].cb = *cb;
}
static void vp_vdpa_set_vq_ready(struct vdpa_device *vdpa,
u16 qid, bool ready)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
vp_modern_set_queue_enable(mdev, qid, ready);
}
static bool vp_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_get_queue_enable(mdev, qid);
}
static void vp_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid,
u32 num)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
vp_modern_set_queue_size(mdev, qid, num);
}
static u16 vp_vdpa_get_vq_size(struct vdpa_device *vdpa, u16 qid)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_get_queue_size(mdev, qid);
}
static int vp_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid,
u64 desc_area, u64 driver_area,
u64 device_area)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
vp_modern_queue_address(mdev, qid, desc_area,
driver_area, device_area);
return 0;
}
static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
vp_iowrite16(qid, vp_vdpa->vring[qid].notify);
}
static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return vp_modern_generation(mdev);
}
static u32 vp_vdpa_get_device_id(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return mdev->id.device;
}
static u32 vp_vdpa_get_vendor_id(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return mdev->id.vendor;
}
static u32 vp_vdpa_get_vq_align(struct vdpa_device *vdpa)
{
return PAGE_SIZE;
}
static size_t vp_vdpa_get_config_size(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
return mdev->device_len;
}
static void vp_vdpa_get_config(struct vdpa_device *vdpa,
unsigned int offset,
void *buf, unsigned int len)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
u8 old, new;
u8 *p;
int i;
do {
old = vp_ioread8(&mdev->common->config_generation);
p = buf;
for (i = 0; i < len; i++)
*p++ = vp_ioread8(mdev->device + offset + i);
new = vp_ioread8(&mdev->common->config_generation);
} while (old != new);
}
static void vp_vdpa_set_config(struct vdpa_device *vdpa,
unsigned int offset, const void *buf,
unsigned int len)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
const u8 *p = buf;
int i;
for (i = 0; i < len; i++)
vp_iowrite8(*p++, mdev->device + offset + i);
}
static void vp_vdpa_set_config_cb(struct vdpa_device *vdpa,
struct vdpa_callback *cb)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
vp_vdpa->config_cb = *cb;
}
static struct vdpa_notification_area
vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
{
struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa);
struct vdpa_notification_area notify;
notify.addr = vp_vdpa->vring[qid].notify_pa;
notify.size = mdev->notify_offset_multiplier;
return notify;
}
static const struct vdpa_config_ops vp_vdpa_ops = {
.get_device_features = vp_vdpa_get_device_features,
.set_driver_features = vp_vdpa_set_driver_features,
.get_driver_features = vp_vdpa_get_driver_features,
.get_status = vp_vdpa_get_status,
.set_status = vp_vdpa_set_status,
.reset = vp_vdpa_reset,
.get_vq_num_max = vp_vdpa_get_vq_num_max,
.get_vq_state = vp_vdpa_get_vq_state,
.get_vq_notification = vp_vdpa_get_vq_notification,
.set_vq_state = vp_vdpa_set_vq_state,
.set_vq_cb = vp_vdpa_set_vq_cb,
.set_vq_ready = vp_vdpa_set_vq_ready,
.get_vq_ready = vp_vdpa_get_vq_ready,
.set_vq_num = vp_vdpa_set_vq_num,
.get_vq_size = vp_vdpa_get_vq_size,
.set_vq_address = vp_vdpa_set_vq_address,
.kick_vq = vp_vdpa_kick_vq,
.get_generation = vp_vdpa_get_generation,
.get_device_id = vp_vdpa_get_device_id,
.get_vendor_id = vp_vdpa_get_vendor_id,
.get_vq_align = vp_vdpa_get_vq_align,
.get_config_size = vp_vdpa_get_config_size,
.get_config = vp_vdpa_get_config,
.set_config = vp_vdpa_set_config,
.set_config_cb = vp_vdpa_set_config_cb,
.get_vq_irq = vp_vdpa_get_vq_irq,
};
static void vp_vdpa_free_irq_vectors(void *data)
{
pci_free_irq_vectors(data);
}
static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
const struct vdpa_dev_set_config *add_config)
{
struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev =
container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev);
struct virtio_pci_modern_device *mdev = vp_vdpa_mgtdev->mdev;
struct pci_dev *pdev = mdev->pci_dev;
struct device *dev = &pdev->dev;
struct vp_vdpa *vp_vdpa = NULL;
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
u64 device_features;
int ret, i;
vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
dev, &vp_vdpa_ops, 1, 1, name, false);
if (IS_ERR(vp_vdpa)) {
dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
return PTR_ERR(vp_vdpa);
}
vp_vdpa_mgtdev->vp_vdpa = vp_vdpa;
vp_vdpa->vdpa.dma_dev = &pdev->dev;
vp_vdpa->queues = vp_modern_get_num_queues(mdev);
vp_vdpa->mdev = mdev;
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
device_features = vp_modern_get_features(mdev);
if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
if (add_config->device_features & ~device_features) {
ret = -EINVAL;
dev_err(&pdev->dev, "Try to provision features "
"that are not supported by the device: "
"device_features 0x%llx provisioned 0x%llx\n",
device_features, add_config->device_features);
goto err;
}
device_features = add_config->device_features;
}
vp_vdpa->device_features = device_features;
ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev);
if (ret) {
dev_err(&pdev->dev,
"Failed for adding devres for freeing irq vectors\n");
goto err;
}
vp_vdpa->vring = devm_kcalloc(&pdev->dev, vp_vdpa->queues,
sizeof(*vp_vdpa->vring),
GFP_KERNEL);
if (!vp_vdpa->vring) {
ret = -ENOMEM;
dev_err(&pdev->dev, "Fail to allocate virtqueues\n");
goto err;
}
for (i = 0; i < vp_vdpa->queues; i++) {
vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
vp_vdpa->vring[i].notify =
vp_modern_map_vq_notify(mdev, i,
&vp_vdpa->vring[i].notify_pa);
if (!vp_vdpa->vring[i].notify) {
ret = -EINVAL;
dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i);
goto err;
}
}
vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
vp_vdpa->vdpa.mdev = &vp_vdpa_mgtdev->mgtdev;
ret = _vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues);
if (ret) {
dev_err(&pdev->dev, "Failed to register to vdpa bus\n");
goto err;
}
return 0;
err:
put_device(&vp_vdpa->vdpa.dev);
return ret;
}
static void vp_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev,
struct vdpa_device *dev)
{
struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev =
container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev);
struct vp_vdpa *vp_vdpa = vp_vdpa_mgtdev->vp_vdpa;
_vdpa_unregister_device(&vp_vdpa->vdpa);
vp_vdpa_mgtdev->vp_vdpa = NULL;
}
static const struct vdpa_mgmtdev_ops vp_vdpa_mdev_ops = {
.dev_add = vp_vdpa_dev_add,
.dev_del = vp_vdpa_dev_del,
};
static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = NULL;
struct vdpa_mgmt_dev *mgtdev;
struct device *dev = &pdev->dev;
struct virtio_pci_modern_device *mdev = NULL;
struct virtio_device_id *mdev_id = NULL;
int err;
vp_vdpa_mgtdev = kzalloc(sizeof(*vp_vdpa_mgtdev), GFP_KERNEL);
if (!vp_vdpa_mgtdev)
return -ENOMEM;
mgtdev = &vp_vdpa_mgtdev->mgtdev;
mgtdev->ops = &vp_vdpa_mdev_ops;
mgtdev->device = dev;
mdev = kzalloc(sizeof(struct virtio_pci_modern_device), GFP_KERNEL);
if (!mdev) {
err = -ENOMEM;
goto mdev_err;
}
/*
* id_table should be a null terminated array, so allocate one additional
* entry here, see vdpa_mgmtdev_get_classes().
*/
mdev_id = kcalloc(2, sizeof(struct virtio_device_id), GFP_KERNEL);
if (!mdev_id) {
err = -ENOMEM;
goto mdev_id_err;
}
vp_vdpa_mgtdev->mdev = mdev;
mdev->pci_dev = pdev;
err = pcim_enable_device(pdev);
if (err) {
goto probe_err;
}
err = vp_modern_probe(mdev);
if (err) {
dev_err(&pdev->dev, "Failed to probe modern PCI device\n");
goto probe_err;
}
mdev_id[0].device = mdev->id.device;
mdev_id[0].vendor = mdev->id.vendor;
mgtdev->id_table = mdev_id;
mgtdev->max_supported_vqs = vp_modern_get_num_queues(mdev);
mgtdev->supported_features = vp_modern_get_features(mdev);
vp_vdpa: support feature provisioning This patch allows the device features to be provisioned via netlink. This is done by: 1) validating the provisioned features to be a subset of the parent features. 2) clearing the features that is not wanted by the userspace For example: # vdpa mgmtdev show pci/0000:02:00.0: supported_classes net max_supported_vqs 3 dev_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN CTRL_RX_EXTRA GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 1) provision vDPA device with all features that are supported by the virtio-pci # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 # vdpa dev config show dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CSUM GUEST_CSUM CTRL_GUEST_OFFLOADS MAC GUEST_TSO4 GUEST_TSO6 GUEST_ECN GUEST_UFO HOST_TSO4 HOST_TSO6 HOST_ECN HOST_UFO MRG_RXBUF STATUS CTRL_VQ CTRL_RX CTRL_VLAN GUEST_ANNOUNCE CTRL_MAC_ADDR RING_INDIRECT_DESC RING_EVENT_IDX VERSION_1 ACCESS_PLATFORM 2) provision vDPA device with a subset of the features # vdpa dev add name dev1 mgmtdev pci/0000:02:00.0 device_features 0x300020000 # dev1: mac 52:54:00:12:34:56 link up link_announce false mtu 65535 negotiated_features CTRL_VQ VERSION_1 ACCESS_PLATFORM Reviewed-by: Eli Cohen <elic@nvidia.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Message-Id: <20220927074810.28627-4-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2022-09-27 00:48:10 -07:00
mgtdev->config_attr_mask = (1 << VDPA_ATTR_DEV_FEATURES);
pci_set_master(pdev);
pci_set_drvdata(pdev, vp_vdpa_mgtdev);
err = vdpa_mgmtdev_register(mgtdev);
if (err) {
dev_err(&pdev->dev, "Failed to register vdpa mgmtdev device\n");
goto register_err;
}
return 0;
register_err:
vp_modern_remove(vp_vdpa_mgtdev->mdev);
probe_err:
kfree(mdev_id);
mdev_id_err:
kfree(mdev);
mdev_err:
kfree(vp_vdpa_mgtdev);
return err;
}
static void vp_vdpa_remove(struct pci_dev *pdev)
{
struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = pci_get_drvdata(pdev);
struct virtio_pci_modern_device *mdev = NULL;
mdev = vp_vdpa_mgtdev->mdev;
vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev);
vp_vdpa: fix the crash in hot unplug with vp_vdpa While unplugging the vp_vdpa device, it triggers a kernel panic The root cause is: vdpa_mgmtdev_unregister() will accesses modern devices which will cause a use after free. So need to change the sequence in vp_vdpa_remove [ 195.003359] BUG: unable to handle page fault for address: ff4e8beb80199014 [ 195.004012] #PF: supervisor read access in kernel mode [ 195.004486] #PF: error_code(0x0000) - not-present page [ 195.004960] PGD 100000067 P4D 1001b6067 PUD 1001b7067 PMD 1001b8067 PTE 0 [ 195.005578] Oops: 0000 1 PREEMPT SMP PTI [ 195.005968] CPU: 13 PID: 164 Comm: kworker/u56:10 Kdump: loaded Not tainted 5.14.0-252.el9.x86_64 #1 [ 195.006792] Hardware name: Red Hat KVM/RHEL, BIOS edk2-20221207gitfff6d81270b5-2.el9 unknown [ 195.007556] Workqueue: kacpi_hotplug acpi_hotplug_work_fn [ 195.008059] RIP: 0010:ioread8+0x31/0x80 [ 195.008418] Code: 77 28 48 81 ff 00 00 01 00 76 0b 89 fa ec 0f b6 c0 c3 cc cc cc cc 8b 15 ad 72 93 01 b8 ff 00 00 00 85 d2 75 0f c3 cc cc cc cc <8a> 07 0f b6 c0 c3 cc cc cc cc 83 ea 01 48 83 ec 08 48 89 fe 48 c7 [ 195.010104] RSP: 0018:ff4e8beb8067bab8 EFLAGS: 00010292 [ 195.010584] RAX: ffffffffc05834a0 RBX: ffffffffc05843c0 RCX: ff4e8beb8067bae0 [ 195.011233] RDX: ff1bcbd580f88000 RSI: 0000000000000246 RDI: ff4e8beb80199014 [ 195.011881] RBP: ff1bcbd587e39000 R08: ffffffff916fa2d0 R09: ff4e8beb8067ba68 [ 195.012527] R10: 000000000000001c R11: 0000000000000000 R12: ff1bcbd5a3de9120 [ 195.013179] R13: ffffffffc062d000 R14: 0000000000000080 R15: ff1bcbe402bc7805 [ 195.013826] FS: 0000000000000000(0000) GS:ff1bcbe402740000(0000) knlGS:0000000000000000 [ 195.014564] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 195.015093] CR2: ff4e8beb80199014 CR3: 0000000107dea002 CR4: 0000000000771ee0 [ 195.015741] PKRU: 55555554 [ 195.016001] Call Trace: [ 195.016233] <TASK> [ 195.016434] vp_modern_get_status+0x12/0x20 [ 195.016823] vp_vdpa_reset+0x1b/0x50 [vp_vdpa] [ 195.017238] virtio_vdpa_reset+0x3c/0x48 [virtio_vdpa] [ 195.017709] remove_vq_common+0x1f/0x3a0 [virtio_net] [ 195.018178] virtnet_remove+0x5d/0x70 [virtio_net] [ 195.018618] virtio_dev_remove+0x3d/0x90 [ 195.018986] device_release_driver_internal+0x1aa/0x230 [ 195.019466] bus_remove_device+0xd8/0x150 [ 195.019841] device_del+0x18b/0x3f0 [ 195.020167] ? kernfs_find_ns+0x35/0xd0 [ 195.020526] device_unregister+0x13/0x60 [ 195.020894] unregister_virtio_device+0x11/0x20 [ 195.021311] device_release_driver_internal+0x1aa/0x230 [ 195.021790] bus_remove_device+0xd8/0x150 [ 195.022162] device_del+0x18b/0x3f0 [ 195.022487] device_unregister+0x13/0x60 [ 195.022852] ? vdpa_dev_remove+0x30/0x30 [vdpa] [ 195.023270] vp_vdpa_dev_del+0x12/0x20 [vp_vdpa] [ 195.023694] vdpa_match_remove+0x2b/0x40 [vdpa] [ 195.024115] bus_for_each_dev+0x78/0xc0 [ 195.024471] vdpa_mgmtdev_unregister+0x65/0x80 [vdpa] [ 195.024937] vp_vdpa_remove+0x23/0x40 [vp_vdpa] [ 195.025353] pci_device_remove+0x36/0xa0 [ 195.025719] device_release_driver_internal+0x1aa/0x230 [ 195.026201] pci_stop_bus_device+0x6c/0x90 [ 195.026580] pci_stop_and_remove_bus_device+0xe/0x20 [ 195.027039] disable_slot+0x49/0x90 [ 195.027366] acpiphp_disable_and_eject_slot+0x15/0x90 [ 195.027832] hotplug_event+0xea/0x210 [ 195.028171] ? hotplug_event+0x210/0x210 [ 195.028535] acpiphp_hotplug_notify+0x22/0x80 [ 195.028942] ? hotplug_event+0x210/0x210 [ 195.029303] acpi_device_hotplug+0x8a/0x1d0 [ 195.029690] acpi_hotplug_work_fn+0x1a/0x30 [ 195.030077] process_one_work+0x1e8/0x3c0 [ 195.030451] worker_thread+0x50/0x3b0 [ 195.030791] ? rescuer_thread+0x3a0/0x3a0 [ 195.031165] kthread+0xd9/0x100 [ 195.031459] ? kthread_complete_and_exit+0x20/0x20 [ 195.031899] ret_from_fork+0x22/0x30 [ 195.032233] </TASK> Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa") Tested-by: Lei Yang <leiyang@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Cindy Lu <lulu@redhat.com> Message-Id: <20230214080924.131462-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com>
2023-02-14 01:09:24 -07:00
vp_modern_remove(mdev);
kfree(vp_vdpa_mgtdev->mgtdev.id_table);
kfree(mdev);
kfree(vp_vdpa_mgtdev);
}
static struct pci_driver vp_vdpa_driver = {
.name = "vp-vdpa",
.id_table = NULL, /* only dynamic ids */
.probe = vp_vdpa_probe,
.remove = vp_vdpa_remove,
};
module_pci_driver(vp_vdpa_driver);
MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
MODULE_DESCRIPTION("vp-vdpa");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");