1

A revert for a previous TTM commit causing stuttering, 3 fixes for

vmwgfx related to buffer operations, a fix for video/aperture with
 non-VGA primary devices, and a preemption status fix for v3d
 -----BEGIN PGP SIGNATURE-----
 
 iJUEABMJAB0WIQTkHFbLp4ejekA/qfgnX84Zoj2+dgUCZtCKzwAKCRAnX84Zoj2+
 drMcAYDvGyENhD3jcOSM0vd1pEgleMSKoD4sbsj+JO6vMona7/dWzpUXHyxZg0P9
 7gOZAkwBgMQHQKWBOga/LnloJi1/GC4Y7pjR4tDzLRO4ejbGdStLK8vEMKJKltwB
 ej1UUueBAQ==
 =4GlG
 -----END PGP SIGNATURE-----

Merge tag 'drm-misc-fixes-2024-08-29' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes

A revert for a previous TTM commit causing stuttering, 3 fixes for
vmwgfx related to buffer operations, a fix for video/aperture with
non-VGA primary devices, and a preemption status fix for v3d

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240829-efficient-swift-from-lemuria-f60c05@houat
This commit is contained in:
Dave Airlie 2024-08-30 11:28:00 +10:00
commit 27f5b729cb
12 changed files with 161 additions and 37 deletions

View File

@ -39,6 +39,8 @@ static struct platform_device *pd;
static DEFINE_MUTEX(disable_lock);
static bool disabled;
static struct device *sysfb_parent_dev(const struct screen_info *si);
static bool sysfb_unregister(void)
{
if (IS_ERR_OR_NULL(pd))
@ -52,6 +54,7 @@ static bool sysfb_unregister(void)
/**
* sysfb_disable() - disable the Generic System Framebuffers support
* @dev: the device to check if non-NULL
*
* This disables the registration of system framebuffer devices that match the
* generic drivers that make use of the system framebuffer set up by firmware.
@ -61,17 +64,21 @@ static bool sysfb_unregister(void)
* Context: The function can sleep. A @disable_lock mutex is acquired to serialize
* against sysfb_init(), that registers a system framebuffer device.
*/
void sysfb_disable(void)
void sysfb_disable(struct device *dev)
{
struct screen_info *si = &screen_info;
mutex_lock(&disable_lock);
sysfb_unregister();
disabled = true;
if (!dev || dev == sysfb_parent_dev(si)) {
sysfb_unregister();
disabled = true;
}
mutex_unlock(&disable_lock);
}
EXPORT_SYMBOL_GPL(sysfb_disable);
#if defined(CONFIG_PCI)
static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
static bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
{
/*
* TODO: Try to integrate this code into the PCI subsystem
@ -87,13 +94,13 @@ static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
return true;
}
#else
static __init bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
static bool sysfb_pci_dev_is_enabled(struct pci_dev *pdev)
{
return false;
}
#endif
static __init struct device *sysfb_parent_dev(const struct screen_info *si)
static struct device *sysfb_parent_dev(const struct screen_info *si)
{
struct pci_dev *pdev;

View File

@ -134,6 +134,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
preempt_disable();
write_seqcount_begin(&local_stats->lock);
local_stats->start_ns = now;
write_seqcount_end(&local_stats->lock);
@ -141,6 +143,8 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
write_seqcount_begin(&global_stats->lock);
global_stats->start_ns = now;
write_seqcount_end(&global_stats->lock);
preempt_enable();
}
static void
@ -162,8 +166,10 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
preempt_disable();
v3d_stats_update(local_stats, now);
v3d_stats_update(global_stats, now);
preempt_enable();
}
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)

View File

@ -27,6 +27,8 @@
**************************************************************************/
#include "vmwgfx_drv.h"
#include "vmwgfx_bo.h"
#include <linux/highmem.h>
/*
@ -420,13 +422,105 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
return 0;
}
static void *map_external(struct vmw_bo *bo, struct iosys_map *map)
{
struct vmw_private *vmw =
container_of(bo->tbo.bdev, struct vmw_private, bdev);
void *ptr = NULL;
int ret;
if (bo->tbo.base.import_attach) {
ret = dma_buf_vmap(bo->tbo.base.dma_buf, map);
if (ret) {
drm_dbg_driver(&vmw->drm,
"Wasn't able to map external bo!\n");
goto out;
}
ptr = map->vaddr;
} else {
ptr = vmw_bo_map_and_cache(bo);
}
out:
return ptr;
}
static void unmap_external(struct vmw_bo *bo, struct iosys_map *map)
{
if (bo->tbo.base.import_attach)
dma_buf_vunmap(bo->tbo.base.dma_buf, map);
else
vmw_bo_unmap(bo);
}
static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset,
u32 dst_stride, struct vmw_bo *src,
u32 src_offset, u32 src_stride,
u32 width_in_bytes, u32 height,
struct vmw_diff_cpy *diff)
{
struct vmw_private *vmw =
container_of(dst->tbo.bdev, struct vmw_private, bdev);
size_t dst_size = dst->tbo.resource->size;
size_t src_size = src->tbo.resource->size;
struct iosys_map dst_map = {0};
struct iosys_map src_map = {0};
int ret, i;
int x_in_bytes;
u8 *vsrc;
u8 *vdst;
vsrc = map_external(src, &src_map);
if (!vsrc) {
drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n");
ret = -ENOMEM;
goto out;
}
vdst = map_external(dst, &dst_map);
if (!vdst) {
drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n");
ret = -ENOMEM;
goto out;
}
vsrc += src_offset;
vdst += dst_offset;
if (src_stride == dst_stride) {
dst_size -= dst_offset;
src_size -= src_offset;
memcpy(vdst, vsrc,
min(dst_stride * height, min(dst_size, src_size)));
} else {
WARN_ON(dst_stride < width_in_bytes);
for (i = 0; i < height; ++i) {
memcpy(vdst, vsrc, width_in_bytes);
vsrc += src_stride;
vdst += dst_stride;
}
}
x_in_bytes = (dst_offset % dst_stride);
diff->rect.x1 = x_in_bytes / diff->cpp;
diff->rect.y1 = ((dst_offset - x_in_bytes) / dst_stride);
diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp;
diff->rect.y2 = diff->rect.y1 + height;
ret = 0;
out:
unmap_external(src, &src_map);
unmap_external(dst, &dst_map);
return ret;
}
/**
* vmw_bo_cpu_blit - in-kernel cpu blit.
*
* @dst: Destination buffer object.
* @vmw_dst: Destination buffer object.
* @dst_offset: Destination offset of blit start in bytes.
* @dst_stride: Destination stride in bytes.
* @src: Source buffer object.
* @vmw_src: Source buffer object.
* @src_offset: Source offset of blit start in bytes.
* @src_stride: Source stride in bytes.
* @w: Width of blit.
@ -444,13 +538,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
* Neither of the buffer objects may be placed in PCI memory
* (Fixed memory in TTM terminology) when using this function.
*/
int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst,
u32 dst_offset, u32 dst_stride,
struct ttm_buffer_object *src,
struct vmw_bo *vmw_src,
u32 src_offset, u32 src_stride,
u32 w, u32 h,
struct vmw_diff_cpy *diff)
{
struct ttm_buffer_object *src = &vmw_src->tbo;
struct ttm_buffer_object *dst = &vmw_dst->tbo;
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false
@ -460,6 +556,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
int ret = 0;
struct page **dst_pages = NULL;
struct page **src_pages = NULL;
bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
if (WARN_ON(dst == src))
return -EINVAL;
/* Buffer objects need to be either pinned or reserved: */
if (!(dst->pin_count))
@ -479,6 +580,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
return ret;
}
if (src_external || dst_external)
return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride,
vmw_src, src_offset, src_stride,
w, h, diff);
if (!src->ttm->pages && src->ttm->sg) {
src_pages = kvmalloc_array(src->ttm->num_pages,
sizeof(struct page *), GFP_KERNEL);

View File

@ -360,6 +360,8 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size)
void *virtual;
int ret;
atomic_inc(&vbo->map_count);
virtual = ttm_kmap_obj_virtual(&vbo->map, &not_used);
if (virtual)
return virtual;
@ -383,11 +385,17 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size)
*/
void vmw_bo_unmap(struct vmw_bo *vbo)
{
int map_count;
if (vbo->map.bo == NULL)
return;
ttm_bo_kunmap(&vbo->map);
vbo->map.bo = NULL;
map_count = atomic_dec_return(&vbo->map_count);
if (!map_count) {
ttm_bo_kunmap(&vbo->map);
vbo->map.bo = NULL;
}
}
@ -421,6 +429,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
vmw_bo->tbo.priority = 3;
vmw_bo->res_tree = RB_ROOT;
xa_init(&vmw_bo->detached_resources);
atomic_set(&vmw_bo->map_count, 0);
params->size = ALIGN(params->size, PAGE_SIZE);
drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size);

View File

@ -71,6 +71,8 @@ struct vmw_bo_params {
* @map: Kmap object for semi-persistent mappings
* @res_tree: RB tree of resources using this buffer object as a backing MOB
* @res_prios: Eviction priority counts for attached resources
* @map_count: The number of currently active maps. Will differ from the
* cpu_writers because it includes kernel maps.
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
@ -90,6 +92,7 @@ struct vmw_bo {
u32 res_prios[TTM_MAX_BO_PRIORITY];
struct xarray detached_resources;
atomic_t map_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
struct vmw_resource *dx_query_ctx;

View File

@ -1353,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n);
int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
int vmw_bo_cpu_blit(struct vmw_bo *dst,
u32 dst_offset, u32 dst_stride,
struct ttm_buffer_object *src,
struct vmw_bo *src,
u32 src_offset, u32 src_stride,
u32 w, u32 h,
struct vmw_diff_cpy *diff);

View File

@ -502,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
container_of(dirty->unit, typeof(*stdu), base);
s32 width, height;
s32 src_pitch, dst_pitch;
struct ttm_buffer_object *src_bo, *dst_bo;
struct vmw_bo *src_bo, *dst_bo;
u32 src_offset, dst_offset;
struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
@ -517,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
/* Assume we are blitting from Guest (bo) to Host (display_srf) */
src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
src_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
src_bo = stdu->display_srf->res.guest_memory_bo;
src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp;
dst_pitch = ddirty->pitch;
dst_bo = &ddirty->buf->tbo;
dst_bo = ddirty->buf;
dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
@ -1170,7 +1170,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0);
struct vmw_stdu_update_gb_image *cmd_img = cmd;
struct vmw_stdu_update *cmd_update;
struct ttm_buffer_object *src_bo, *dst_bo;
struct vmw_bo *src_bo, *dst_bo;
u32 src_offset, dst_offset;
s32 src_pitch, dst_pitch;
s32 width, height;
@ -1184,11 +1184,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
diff.cpp = stdu->cpp;
dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
dst_bo = stdu->display_srf->res.guest_memory_bo;
dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp;
src_bo = &vfbbo->buffer->tbo;
src_bo = vfbbo->buffer;
src_pitch = update->vfb->base.pitches[0];
src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left *
stdu->cpp;

View File

@ -2283,9 +2283,11 @@ int vmw_dumb_create(struct drm_file *file_priv,
/*
* Without mob support we're just going to use raw memory buffer
* because we wouldn't be able to support full surface coherency
* without mobs
* without mobs. There also no reason to support surface coherency
* without 3d (i.e. gpu usage on the host) because then all the
* contents is going to be rendered guest side.
*/
if (!dev_priv->has_mob) {
if (!dev_priv->has_mob || !vmw_supports_3d(dev_priv)) {
int cpp = DIV_ROUND_UP(args->bpp, 8);
switch (cpp) {

View File

@ -592,7 +592,7 @@ static int __init of_platform_default_populate_init(void)
* This can happen for example on DT systems that do EFI
* booting and may provide a GOP handle to the EFI stub.
*/
sysfb_disable();
sysfb_disable(NULL);
of_platform_device_create(node, NULL, NULL);
of_node_put(node);
}

View File

@ -293,7 +293,7 @@ int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t si
* ask for this, so let's assume that a real driver for the display
* was already probed and prevent sysfb to register devices later.
*/
sysfb_disable();
sysfb_disable(NULL);
aperture_detach_devices(base, size);
@ -346,15 +346,10 @@ EXPORT_SYMBOL(__aperture_remove_legacy_vga_devices);
*/
int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name)
{
bool primary = false;
resource_size_t base, size;
int bar, ret = 0;
if (pdev == vga_default_device())
primary = true;
if (primary)
sysfb_disable();
sysfb_disable(&pdev->dev);
for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) {
if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
@ -370,7 +365,7 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na
* that consumes the VGA framebuffer I/O range. Remove this
* device as well.
*/
if (primary)
if (pdev == vga_default_device())
ret = __aperture_remove_legacy_vga_devices(pdev);
return ret;

View File

@ -39,11 +39,7 @@
#include "ttm_device.h"
/* Default number of pre-faulted pages in the TTM fault handler */
#if CONFIG_PGTABLE_LEVELS > 2
#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT))
#else
#define TTM_BO_VM_NUM_PREFAULT 16
#endif
struct iosys_map;

View File

@ -58,11 +58,11 @@ struct efifb_dmi_info {
#ifdef CONFIG_SYSFB
void sysfb_disable(void);
void sysfb_disable(struct device *dev);
#else /* CONFIG_SYSFB */
static inline void sysfb_disable(void)
static inline void sysfb_disable(struct device *dev)
{
}