Skip to content

Commit 905eb40

Browse files
Iouri Tarassovchessturo
authored andcommitted
drivers: hv: dxgkrnl: Submit execution commands to the compute device
Implements ioctls for submission of compute device buffers for execution: - LX_DXSUBMITCOMMAND The ioctl is used to submit a command buffer to the device, working in the "packet scheduling" mode. - LX_DXSUBMITCOMMANDTOHWQUEUE The ioctl is used to submit a command buffer to the device, working in the "hardware scheduling" mode. To improve performance both ioctls use asynchronous VM bus messages to communicate with the host as these are high frequency operations. Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com> [kms: forward port to 6.6 from 6.1. No code changes made.] Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
1 parent 26a1249 commit 905eb40

5 files changed

Lines changed: 316 additions & 2 deletions

File tree

drivers/hv/dxgkrnl/dxgkrnl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,9 @@ int dxgvmb_send_create_allocation(struct dxgprocess *pr, struct dxgdevice *dev,
796796
int dxgvmb_send_destroy_allocation(struct dxgprocess *pr, struct dxgdevice *dev,
797797
struct d3dkmt_destroyallocation2 *args,
798798
struct d3dkmthandle *alloc_handles);
799+
int dxgvmb_send_submit_command(struct dxgprocess *pr,
800+
struct dxgadapter *adapter,
801+
struct d3dkmt_submitcommand *args);
799802
int dxgvmb_send_create_sync_object(struct dxgprocess *pr,
800803
struct dxgadapter *adapter,
801804
struct d3dkmt_createsynchronizationobject2
@@ -838,6 +841,9 @@ int dxgvmb_send_destroy_hwqueue(struct dxgprocess *process,
838841
int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
839842
struct dxgadapter *adapter,
840843
struct d3dkmt_queryadapterinfo *args);
844+
int dxgvmb_send_submit_command_hwqueue(struct dxgprocess *process,
845+
struct dxgadapter *adapter,
846+
struct d3dkmt_submitcommandtohwqueue *a);
841847
int dxgvmb_send_open_sync_object_nt(struct dxgprocess *process,
842848
struct dxgvmbuschannel *channel,
843849
struct d3dkmt_opensyncobjectfromnthandle2

drivers/hv/dxgkrnl/dxgvmbus.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,6 +1901,61 @@ int dxgvmb_send_get_stdalloc_data(struct dxgdevice *device,
19011901
return ret;
19021902
}
19031903

1904+
int dxgvmb_send_submit_command(struct dxgprocess *process,
1905+
struct dxgadapter *adapter,
1906+
struct d3dkmt_submitcommand *args)
1907+
{
1908+
int ret;
1909+
u32 cmd_size;
1910+
struct dxgkvmb_command_submitcommand *command;
1911+
u32 hbufsize = args->num_history_buffers * sizeof(struct d3dkmthandle);
1912+
struct dxgvmbusmsg msg = {.hdr = NULL};
1913+
struct dxgglobal *dxgglobal = dxggbl();
1914+
1915+
cmd_size = sizeof(struct dxgkvmb_command_submitcommand) +
1916+
hbufsize + args->priv_drv_data_size;
1917+
1918+
ret = init_message(&msg, adapter, process, cmd_size);
1919+
if (ret)
1920+
goto cleanup;
1921+
command = (void *)msg.msg;
1922+
1923+
ret = copy_from_user(&command[1], args->history_buffer_array,
1924+
hbufsize);
1925+
if (ret) {
1926+
DXG_ERR(" failed to copy history buffer");
1927+
ret = -EINVAL;
1928+
goto cleanup;
1929+
}
1930+
ret = copy_from_user((u8 *) &command[1] + hbufsize,
1931+
args->priv_drv_data, args->priv_drv_data_size);
1932+
if (ret) {
1933+
DXG_ERR("failed to copy history priv data");
1934+
ret = -EINVAL;
1935+
goto cleanup;
1936+
}
1937+
1938+
command_vgpu_to_host_init2(&command->hdr,
1939+
DXGK_VMBCOMMAND_SUBMITCOMMAND,
1940+
process->host_handle);
1941+
command->args = *args;
1942+
1943+
if (dxgglobal->async_msg_enabled) {
1944+
command->hdr.async_msg = 1;
1945+
ret = dxgvmb_send_async_msg(msg.channel, msg.hdr, msg.size);
1946+
} else {
1947+
ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr,
1948+
msg.size);
1949+
}
1950+
1951+
cleanup:
1952+
1953+
free_message(&msg, process);
1954+
if (ret)
1955+
DXG_TRACE("err: %d", ret);
1956+
return ret;
1957+
}
1958+
19041959
static void set_result(struct d3dkmt_createsynchronizationobject2 *args,
19051960
u64 fence_gpu_va, u8 *va)
19061961
{
@@ -2427,3 +2482,61 @@ int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
24272482
DXG_TRACE("err: %d", ret);
24282483
return ret;
24292484
}
2485+
2486+
int dxgvmb_send_submit_command_hwqueue(struct dxgprocess *process,
2487+
struct dxgadapter *adapter,
2488+
struct d3dkmt_submitcommandtohwqueue
2489+
*args)
2490+
{
2491+
int ret = -EINVAL;
2492+
u32 cmd_size;
2493+
struct dxgkvmb_command_submitcommandtohwqueue *command;
2494+
u32 primaries_size = args->num_primaries * sizeof(struct d3dkmthandle);
2495+
struct dxgvmbusmsg msg = {.hdr = NULL};
2496+
struct dxgglobal *dxgglobal = dxggbl();
2497+
2498+
cmd_size = sizeof(*command) + args->priv_drv_data_size + primaries_size;
2499+
ret = init_message(&msg, adapter, process, cmd_size);
2500+
if (ret)
2501+
goto cleanup;
2502+
command = (void *)msg.msg;
2503+
2504+
if (primaries_size) {
2505+
ret = copy_from_user(&command[1], args->written_primaries,
2506+
primaries_size);
2507+
if (ret) {
2508+
DXG_ERR("failed to copy primaries handles");
2509+
ret = -EINVAL;
2510+
goto cleanup;
2511+
}
2512+
}
2513+
if (args->priv_drv_data_size) {
2514+
ret = copy_from_user((char *)&command[1] + primaries_size,
2515+
args->priv_drv_data,
2516+
args->priv_drv_data_size);
2517+
if (ret) {
2518+
DXG_ERR("failed to copy primaries data");
2519+
ret = -EINVAL;
2520+
goto cleanup;
2521+
}
2522+
}
2523+
2524+
command_vgpu_to_host_init2(&command->hdr,
2525+
DXGK_VMBCOMMAND_SUBMITCOMMANDTOHWQUEUE,
2526+
process->host_handle);
2527+
command->args = *args;
2528+
2529+
if (dxgglobal->async_msg_enabled) {
2530+
command->hdr.async_msg = 1;
2531+
ret = dxgvmb_send_async_msg(msg.channel, msg.hdr, msg.size);
2532+
} else {
2533+
ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr,
2534+
msg.size);
2535+
}
2536+
2537+
cleanup:
2538+
free_message(&msg, process);
2539+
if (ret)
2540+
DXG_TRACE("err: %d", ret);
2541+
return ret;
2542+
}

drivers/hv/dxgkrnl/dxgvmbus.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,20 @@ struct dxgkvmb_command_flushdevice {
314314
enum dxgdevice_flushschedulerreason reason;
315315
};
316316

317+
struct dxgkvmb_command_submitcommand {
318+
struct dxgkvmb_command_vgpu_to_host hdr;
319+
struct d3dkmt_submitcommand args;
320+
/* HistoryBufferHandles */
321+
/* PrivateDriverData */
322+
};
323+
324+
struct dxgkvmb_command_submitcommandtohwqueue {
325+
struct dxgkvmb_command_vgpu_to_host hdr;
326+
struct d3dkmt_submitcommandtohwqueue args;
327+
/* Written primaries */
328+
/* PrivateDriverData */
329+
};
330+
317331
struct dxgkvmb_command_createallocation_allocinfo {
318332
u32 flags;
319333
u32 priv_drv_data_size;

drivers/hv/dxgkrnl/ioctl.c

Lines changed: 125 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,6 +1902,129 @@ dxgkio_destroy_allocation(struct dxgprocess *process, void *__user inargs)
19021902
return ret;
19031903
}
19041904

1905+
static int
1906+
dxgkio_submit_command(struct dxgprocess *process, void *__user inargs)
1907+
{
1908+
int ret;
1909+
struct d3dkmt_submitcommand args;
1910+
struct dxgdevice *device = NULL;
1911+
struct dxgadapter *adapter = NULL;
1912+
1913+
ret = copy_from_user(&args, inargs, sizeof(args));
1914+
if (ret) {
1915+
DXG_ERR("failed to copy input args");
1916+
ret = -EINVAL;
1917+
goto cleanup;
1918+
}
1919+
1920+
if (args.broadcast_context_count > D3DDDI_MAX_BROADCAST_CONTEXT ||
1921+
args.broadcast_context_count == 0) {
1922+
DXG_ERR("invalid number of contexts");
1923+
ret = -EINVAL;
1924+
goto cleanup;
1925+
}
1926+
1927+
if (args.priv_drv_data_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
1928+
DXG_ERR("invalid private data size");
1929+
ret = -EINVAL;
1930+
goto cleanup;
1931+
}
1932+
1933+
if (args.num_history_buffers > 1024) {
1934+
DXG_ERR("invalid number of history buffers");
1935+
ret = -EINVAL;
1936+
goto cleanup;
1937+
}
1938+
1939+
if (args.num_primaries > DXG_MAX_VM_BUS_PACKET_SIZE) {
1940+
DXG_ERR("invalid number of primaries");
1941+
ret = -EINVAL;
1942+
goto cleanup;
1943+
}
1944+
1945+
device = dxgprocess_device_by_object_handle(process,
1946+
HMGRENTRY_TYPE_DXGCONTEXT,
1947+
args.broadcast_context[0]);
1948+
if (device == NULL) {
1949+
ret = -EINVAL;
1950+
goto cleanup;
1951+
}
1952+
1953+
adapter = device->adapter;
1954+
ret = dxgadapter_acquire_lock_shared(adapter);
1955+
if (ret < 0) {
1956+
adapter = NULL;
1957+
goto cleanup;
1958+
}
1959+
1960+
ret = dxgvmb_send_submit_command(process, adapter, &args);
1961+
1962+
cleanup:
1963+
1964+
if (adapter)
1965+
dxgadapter_release_lock_shared(adapter);
1966+
if (device)
1967+
kref_put(&device->device_kref, dxgdevice_release);
1968+
1969+
DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
1970+
return ret;
1971+
}
1972+
1973+
static int
1974+
dxgkio_submit_command_to_hwqueue(struct dxgprocess *process, void *__user inargs)
1975+
{
1976+
int ret;
1977+
struct d3dkmt_submitcommandtohwqueue args;
1978+
struct dxgdevice *device = NULL;
1979+
struct dxgadapter *adapter = NULL;
1980+
1981+
ret = copy_from_user(&args, inargs, sizeof(args));
1982+
if (ret) {
1983+
DXG_ERR("failed to copy input args");
1984+
ret = -EINVAL;
1985+
goto cleanup;
1986+
}
1987+
1988+
if (args.priv_drv_data_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
1989+
DXG_ERR("invalid private data size");
1990+
ret = -EINVAL;
1991+
goto cleanup;
1992+
}
1993+
1994+
if (args.num_primaries > DXG_MAX_VM_BUS_PACKET_SIZE) {
1995+
DXG_ERR("invalid number of primaries");
1996+
ret = -EINVAL;
1997+
goto cleanup;
1998+
}
1999+
2000+
device = dxgprocess_device_by_object_handle(process,
2001+
HMGRENTRY_TYPE_DXGHWQUEUE,
2002+
args.hwqueue);
2003+
if (device == NULL) {
2004+
ret = -EINVAL;
2005+
goto cleanup;
2006+
}
2007+
2008+
adapter = device->adapter;
2009+
ret = dxgadapter_acquire_lock_shared(adapter);
2010+
if (ret < 0) {
2011+
adapter = NULL;
2012+
goto cleanup;
2013+
}
2014+
2015+
ret = dxgvmb_send_submit_command_hwqueue(process, adapter, &args);
2016+
2017+
cleanup:
2018+
2019+
if (adapter)
2020+
dxgadapter_release_lock_shared(adapter);
2021+
if (device)
2022+
kref_put(&device->device_kref, dxgdevice_release);
2023+
2024+
DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
2025+
return ret;
2026+
}
2027+
19052028
static int
19062029
dxgkio_submit_signal_to_hwqueue(struct dxgprocess *process, void *__user inargs)
19072030
{
@@ -3666,7 +3789,7 @@ static struct ioctl_desc ioctls[] = {
36663789
/* 0x0c */ {},
36673790
/* 0x0d */ {},
36683791
/* 0x0e */ {},
3669-
/* 0x0f */ {},
3792+
/* 0x0f */ {dxgkio_submit_command, LX_DXSUBMITCOMMAND},
36703793
/* 0x10 */ {dxgkio_create_sync_object, LX_DXCREATESYNCHRONIZATIONOBJECT},
36713794
/* 0x11 */ {dxgkio_signal_sync_object, LX_DXSIGNALSYNCHRONIZATIONOBJECT},
36723795
/* 0x12 */ {dxgkio_wait_sync_object, LX_DXWAITFORSYNCHRONIZATIONOBJECT},
@@ -3706,7 +3829,7 @@ static struct ioctl_desc ioctls[] = {
37063829
LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU},
37073830
/* 0x33 */ {dxgkio_signal_sync_object_gpu2,
37083831
LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU2},
3709-
/* 0x34 */ {},
3832+
/* 0x34 */ {dxgkio_submit_command_to_hwqueue, LX_DXSUBMITCOMMANDTOHWQUEUE},
37103833
/* 0x35 */ {dxgkio_submit_signal_to_hwqueue,
37113834
LX_DXSUBMITSIGNALSYNCOBJECTSTOHWQUEUE},
37123835
/* 0x36 */ {dxgkio_submit_wait_to_hwqueue,

include/uapi/misc/d3dkmthk.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ struct winluid {
5858
__u32 b;
5959
};
6060

61+
#define D3DDDI_MAX_WRITTEN_PRIMARIES 16
62+
6163
#define D3DKMT_CREATEALLOCATION_MAX 1024
6264
#define D3DKMT_ADAPTERS_MAX 64
6365
#define D3DDDI_MAX_BROADCAST_CONTEXT 64
@@ -525,6 +527,58 @@ struct d3dkmt_destroysynchronizationobject {
525527
struct d3dkmthandle sync_object;
526528
};
527529

530+
struct d3dkmt_submitcommandflags {
531+
__u32 null_rendering:1;
532+
__u32 present_redirected:1;
533+
__u32 reserved:30;
534+
};
535+
536+
struct d3dkmt_submitcommand {
537+
__u64 command_buffer;
538+
__u32 command_length;
539+
struct d3dkmt_submitcommandflags flags;
540+
__u64 present_history_token;
541+
__u32 broadcast_context_count;
542+
struct d3dkmthandle broadcast_context[D3DDDI_MAX_BROADCAST_CONTEXT];
543+
__u32 reserved;
544+
#ifdef __KERNEL__
545+
void *priv_drv_data;
546+
#else
547+
__u64 priv_drv_data;
548+
#endif
549+
__u32 priv_drv_data_size;
550+
__u32 num_primaries;
551+
struct d3dkmthandle written_primaries[D3DDDI_MAX_WRITTEN_PRIMARIES];
552+
__u32 num_history_buffers;
553+
__u32 reserved1;
554+
#ifdef __KERNEL__
555+
struct d3dkmthandle *history_buffer_array;
556+
#else
557+
__u64 history_buffer_array;
558+
#endif
559+
};
560+
561+
struct d3dkmt_submitcommandtohwqueue {
562+
struct d3dkmthandle hwqueue;
563+
__u32 reserved;
564+
__u64 hwqueue_progress_fence_id;
565+
__u64 command_buffer;
566+
__u32 command_length;
567+
__u32 priv_drv_data_size;
568+
#ifdef __KERNEL__
569+
void *priv_drv_data;
570+
#else
571+
__u64 priv_drv_data;
572+
#endif
573+
__u32 num_primaries;
574+
__u32 reserved1;
575+
#ifdef __KERNEL__
576+
struct d3dkmthandle *written_primaries;
577+
#else
578+
__u64 written_primaries;
579+
#endif
580+
};
581+
528582
enum d3dkmt_standardallocationtype {
529583
_D3DKMT_STANDARDALLOCATIONTYPE_EXISTINGHEAP = 1,
530584
_D3DKMT_STANDARDALLOCATIONTYPE_CROSSADAPTER = 2,
@@ -917,6 +971,8 @@ struct d3dkmt_enumadapters3 {
917971
_IOWR(0x47, 0x07, struct d3dkmt_createpagingqueue)
918972
#define LX_DXQUERYADAPTERINFO \
919973
_IOWR(0x47, 0x09, struct d3dkmt_queryadapterinfo)
974+
#define LX_DXSUBMITCOMMAND \
975+
_IOWR(0x47, 0x0f, struct d3dkmt_submitcommand)
920976
#define LX_DXCREATESYNCHRONIZATIONOBJECT \
921977
_IOWR(0x47, 0x10, struct d3dkmt_createsynchronizationobject2)
922978
#define LX_DXSIGNALSYNCHRONIZATIONOBJECT \
@@ -945,6 +1001,8 @@ struct d3dkmt_enumadapters3 {
9451001
_IOWR(0x47, 0x32, struct d3dkmt_signalsynchronizationobjectfromgpu)
9461002
#define LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU2 \
9471003
_IOWR(0x47, 0x33, struct d3dkmt_signalsynchronizationobjectfromgpu2)
1004+
#define LX_DXSUBMITCOMMANDTOHWQUEUE \
1005+
_IOWR(0x47, 0x34, struct d3dkmt_submitcommandtohwqueue)
9481006
#define LX_DXSUBMITSIGNALSYNCOBJECTSTOHWQUEUE \
9491007
_IOWR(0x47, 0x35, struct d3dkmt_submitsignalsyncobjectstohwqueue)
9501008
#define LX_DXSUBMITWAITFORSYNCOBJECTSTOHWQUEUE \

0 commit comments

Comments
 (0)