Skip to content

Commit ff83a21

Browse files
Iouri Tarassovchessturo
authored andcommitted
drivers: hv: dxgkrnl: Operations using sync objects
Implement ioctls to submit operations with compute device sync objects: - the LX_DXSIGNALSYNCHRONIZATIONOBJECT ioctl. The ioctl is used to submit a signal to a sync object. - the LX_DXWAITFORSYNCHRONIZATIONOBJECT ioctl. The ioctl is used to submit a wait for a sync object - the LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMCPU ioctl The ioctl is used to signal to a monitored fence sync object from a CPU thread. - the LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU ioctl. The ioctl is used to submit a signal to a monitored fence sync object.. - the LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU2 ioctl. The ioctl is used to submit a signal to a monitored fence sync object. - the LX_DXWAITFORSYNCHRONIZATIONOBJECTFROMGPU ioctl. The ioctl is used to submit a wait for a monitored fence sync object. Compute device synchronization objects are used to synchronize execution of DMA buffers between different execution contexts. Operations with sync objects include "signal" and "wait". A wait for a sync object is satisfied when the sync object is signaled. A signal operation could be submitted to a compute device context or the sync object could be signaled by a CPU thread. To improve performance, submitting operations to the host is done asynchronously when the host supports it. Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com> [kms: forward port to 6.6 from 6.1. No code changes made.] Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
1 parent 6d6faa2 commit ff83a21

8 files changed

Lines changed: 1311 additions & 21 deletions

File tree

drivers/hv/dxgkrnl/dxgadapter.c

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ void dxgdevice_stop(struct dxgdevice *device)
249249
struct dxgallocation *alloc;
250250
struct dxgsyncobject *syncobj;
251251

252-
DXG_TRACE("Destroying device: %p", device);
252+
DXG_TRACE("Stopping device: %p", device);
253253
dxgdevice_acquire_alloc_list_lock(device);
254254
list_for_each_entry(alloc, &device->alloc_list_head, alloc_list_entry) {
255255
dxgallocation_stop(alloc);
@@ -743,15 +743,13 @@ void dxgallocation_destroy(struct dxgallocation *alloc)
743743
}
744744
#ifdef _MAIN_KERNEL_
745745
if (alloc->gpadl.gpadl_handle) {
746-
DXG_TRACE("Teardown gpadl %d",
747-
alloc->gpadl.gpadl_handle);
746+
DXG_TRACE("Teardown gpadl %d", alloc->gpadl.gpadl_handle);
748747
vmbus_teardown_gpadl(dxgglobal_get_vmbus(), &alloc->gpadl);
749748
alloc->gpadl.gpadl_handle = 0;
750749
}
751750
else
752751
if (alloc->gpadl) {
753-
DXG_TRACE("Teardown gpadl %d",
754-
alloc->gpadl);
752+
DXG_TRACE("Teardown gpadl %d", alloc->gpadl);
755753
vmbus_teardown_gpadl(dxgglobal_get_vmbus(), alloc->gpadl);
756754
alloc->gpadl = 0;
757755
}
@@ -901,6 +899,13 @@ struct dxgsyncobject *dxgsyncobject_create(struct dxgprocess *process,
901899
case _D3DDDI_PERIODIC_MONITORED_FENCE:
902900
syncobj->monitored_fence = 1;
903901
break;
902+
case _D3DDDI_CPU_NOTIFICATION:
903+
syncobj->cpu_event = 1;
904+
syncobj->host_event = kzalloc(sizeof(*syncobj->host_event),
905+
GFP_KERNEL);
906+
if (syncobj->host_event == NULL)
907+
goto cleanup;
908+
break;
904909
default:
905910
break;
906911
}
@@ -928,6 +933,8 @@ struct dxgsyncobject *dxgsyncobject_create(struct dxgprocess *process,
928933
DXG_TRACE("Syncobj created: %p", syncobj);
929934
return syncobj;
930935
cleanup:
936+
if (syncobj->host_event)
937+
kfree(syncobj->host_event);
931938
if (syncobj)
932939
kfree(syncobj);
933940
return NULL;
@@ -937,6 +944,7 @@ void dxgsyncobject_destroy(struct dxgprocess *process,
937944
struct dxgsyncobject *syncobj)
938945
{
939946
int destroyed;
947+
struct dxghosteventcpu *host_event;
940948

941949
DXG_TRACE("Destroying syncobj: %p", syncobj);
942950

@@ -955,6 +963,16 @@ void dxgsyncobject_destroy(struct dxgprocess *process,
955963
}
956964
hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
957965

966+
if (syncobj->cpu_event) {
967+
host_event = syncobj->host_event;
968+
if (host_event->cpu_event) {
969+
eventfd_ctx_put(host_event->cpu_event);
970+
if (host_event->hdr.event_id)
971+
dxgglobal_remove_host_event(
972+
&host_event->hdr);
973+
host_event->cpu_event = NULL;
974+
}
975+
}
958976
if (syncobj->monitored_fence)
959977
dxgdevice_remove_syncobj(syncobj);
960978
else
@@ -971,16 +989,14 @@ void dxgsyncobject_destroy(struct dxgprocess *process,
971989
void dxgsyncobject_stop(struct dxgsyncobject *syncobj)
972990
{
973991
int stopped = test_and_set_bit(1, &syncobj->flags);
992+
int ret;
974993

975994
if (!stopped) {
976995
DXG_TRACE("Stopping syncobj");
977996
if (syncobj->monitored_fence) {
978997
if (syncobj->mapped_address) {
979-
int ret =
980-
dxg_unmap_iospace(syncobj->mapped_address,
981-
PAGE_SIZE);
982-
983-
(void)ret;
998+
ret = dxg_unmap_iospace(syncobj->mapped_address,
999+
PAGE_SIZE);
9841000
DXG_TRACE("unmap fence %d %p",
9851001
ret, syncobj->mapped_address);
9861002
syncobj->mapped_address = NULL;
@@ -994,5 +1010,7 @@ void dxgsyncobject_release(struct kref *refcount)
9941010
struct dxgsyncobject *syncobj;
9951011

9961012
syncobj = container_of(refcount, struct dxgsyncobject, syncobj_kref);
1013+
if (syncobj->host_event)
1014+
kfree(syncobj->host_event);
9971015
kfree(syncobj);
9981016
}

drivers/hv/dxgkrnl/dxgkrnl.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,29 @@ int dxgvmbuschannel_init(struct dxgvmbuschannel *ch, struct hv_device *hdev);
101101
void dxgvmbuschannel_destroy(struct dxgvmbuschannel *ch);
102102
void dxgvmbuschannel_receive(void *ctx);
103103

104+
/*
105+
* The structure describes an event, which will be signaled by
106+
* a message from host.
107+
*/
108+
enum dxghosteventtype {
109+
dxghostevent_cpu_event = 1,
110+
};
111+
112+
struct dxghostevent {
113+
struct list_head host_event_list_entry;
114+
u64 event_id;
115+
enum dxghosteventtype event_type;
116+
};
117+
118+
struct dxghosteventcpu {
119+
struct dxghostevent hdr;
120+
struct dxgprocess *process;
121+
struct eventfd_ctx *cpu_event;
122+
struct completion *completion_event;
123+
bool destroy_after_signal;
124+
bool remove_from_list;
125+
};
126+
104127
/*
105128
* This is GPU synchronization object, which is used to synchronize execution
106129
* between GPU contextx/hardware queues or for tracking GPU execution progress.
@@ -130,6 +153,8 @@ struct dxgsyncobject {
130153
*/
131154
struct dxgdevice *device;
132155
struct dxgprocess *process;
156+
/* Used by D3DDDI_CPU_NOTIFICATION objects */
157+
struct dxghosteventcpu *host_event;
133158
/* CPU virtual address of the fence value for "device" syncobjects */
134159
void *mapped_address;
135160
/* Handle in the process handle table */
@@ -144,6 +169,7 @@ struct dxgsyncobject {
144169
u32 stopped:1;
145170
/* device syncobject */
146171
u32 monitored_fence:1;
172+
u32 cpu_event:1;
147173
u32 shared:1;
148174
u32 reserved:27;
149175
};
@@ -206,6 +232,11 @@ struct dxgglobal {
206232
/* protects the dxgprocess_adapter lists */
207233
struct mutex process_adapter_mutex;
208234

235+
/* list of events, waiting to be signaled by the host */
236+
struct list_head host_event_list_head;
237+
spinlock_t host_event_list_mutex;
238+
atomic64_t host_event_id;
239+
209240
bool global_channel_initialized;
210241
bool async_msg_enabled;
211242
bool misc_registered;
@@ -228,6 +259,11 @@ struct vmbus_channel *dxgglobal_get_vmbus(void);
228259
struct dxgvmbuschannel *dxgglobal_get_dxgvmbuschannel(void);
229260
void dxgglobal_acquire_process_adapter_lock(void);
230261
void dxgglobal_release_process_adapter_lock(void);
262+
void dxgglobal_add_host_event(struct dxghostevent *hostevent);
263+
void dxgglobal_remove_host_event(struct dxghostevent *hostevent);
264+
u64 dxgglobal_new_host_event_id(void);
265+
void dxgglobal_signal_host_event(u64 event_id);
266+
struct dxghostevent *dxgglobal_get_host_event(u64 event_id);
231267
int dxgglobal_acquire_channel_lock(void);
232268
void dxgglobal_release_channel_lock(void);
233269

@@ -594,6 +630,31 @@ int dxgvmb_send_create_sync_object(struct dxgprocess *pr,
594630
*args, struct dxgsyncobject *so);
595631
int dxgvmb_send_destroy_sync_object(struct dxgprocess *pr,
596632
struct d3dkmthandle h);
633+
int dxgvmb_send_signal_sync_object(struct dxgprocess *process,
634+
struct dxgadapter *adapter,
635+
struct d3dddicb_signalflags flags,
636+
u64 legacy_fence_value,
637+
struct d3dkmthandle context,
638+
u32 object_count,
639+
struct d3dkmthandle *object,
640+
u32 context_count,
641+
struct d3dkmthandle *contexts,
642+
u32 fence_count, u64 *fences,
643+
struct eventfd_ctx *cpu_event,
644+
struct d3dkmthandle device);
645+
int dxgvmb_send_wait_sync_object_gpu(struct dxgprocess *process,
646+
struct dxgadapter *adapter,
647+
struct d3dkmthandle context,
648+
u32 object_count,
649+
struct d3dkmthandle *objects,
650+
u64 *fences,
651+
bool legacy_fence);
652+
int dxgvmb_send_wait_sync_object_cpu(struct dxgprocess *process,
653+
struct dxgadapter *adapter,
654+
struct
655+
d3dkmt_waitforsynchronizationobjectfromcpu
656+
*args,
657+
u64 cpu_event);
597658
int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
598659
struct dxgadapter *adapter,
599660
struct d3dkmt_queryadapterinfo *args);
@@ -609,6 +670,7 @@ int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel,
609670
void *command,
610671
u32 cmd_size);
611672

673+
void signal_host_cpu_event(struct dxghostevent *eventhdr);
612674
int ntstatus2int(struct ntstatus status);
613675

614676
#ifdef DEBUG

drivers/hv/dxgkrnl/dxgmodule.c

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,102 @@ static struct dxgadapter *find_adapter(struct winluid *luid)
123123
return adapter;
124124
}
125125

126+
void dxgglobal_add_host_event(struct dxghostevent *event)
127+
{
128+
struct dxgglobal *dxgglobal = dxggbl();
129+
130+
spin_lock_irq(&dxgglobal->host_event_list_mutex);
131+
list_add_tail(&event->host_event_list_entry,
132+
&dxgglobal->host_event_list_head);
133+
spin_unlock_irq(&dxgglobal->host_event_list_mutex);
134+
}
135+
136+
void dxgglobal_remove_host_event(struct dxghostevent *event)
137+
{
138+
struct dxgglobal *dxgglobal = dxggbl();
139+
140+
spin_lock_irq(&dxgglobal->host_event_list_mutex);
141+
if (event->host_event_list_entry.next != NULL) {
142+
list_del(&event->host_event_list_entry);
143+
event->host_event_list_entry.next = NULL;
144+
}
145+
spin_unlock_irq(&dxgglobal->host_event_list_mutex);
146+
}
147+
148+
void signal_host_cpu_event(struct dxghostevent *eventhdr)
149+
{
150+
struct dxghosteventcpu *event = (struct dxghosteventcpu *)eventhdr;
151+
152+
if (event->remove_from_list ||
153+
event->destroy_after_signal) {
154+
list_del(&eventhdr->host_event_list_entry);
155+
eventhdr->host_event_list_entry.next = NULL;
156+
}
157+
if (event->cpu_event) {
158+
DXG_TRACE("signal cpu event");
159+
eventfd_signal(event->cpu_event, 1);
160+
if (event->destroy_after_signal)
161+
eventfd_ctx_put(event->cpu_event);
162+
} else {
163+
DXG_TRACE("signal completion");
164+
complete(event->completion_event);
165+
}
166+
if (event->destroy_after_signal) {
167+
DXG_TRACE("destroying event %p", event);
168+
kfree(event);
169+
}
170+
}
171+
172+
void dxgglobal_signal_host_event(u64 event_id)
173+
{
174+
struct dxghostevent *event;
175+
unsigned long flags;
176+
struct dxgglobal *dxgglobal = dxggbl();
177+
178+
DXG_TRACE("Signaling host event %lld", event_id);
179+
180+
spin_lock_irqsave(&dxgglobal->host_event_list_mutex, flags);
181+
list_for_each_entry(event, &dxgglobal->host_event_list_head,
182+
host_event_list_entry) {
183+
if (event->event_id == event_id) {
184+
DXG_TRACE("found event to signal");
185+
if (event->event_type == dxghostevent_cpu_event)
186+
signal_host_cpu_event(event);
187+
else
188+
DXG_ERR("Unknown host event type");
189+
break;
190+
}
191+
}
192+
spin_unlock_irqrestore(&dxgglobal->host_event_list_mutex, flags);
193+
}
194+
195+
struct dxghostevent *dxgglobal_get_host_event(u64 event_id)
196+
{
197+
struct dxghostevent *entry;
198+
struct dxghostevent *event = NULL;
199+
struct dxgglobal *dxgglobal = dxggbl();
200+
201+
spin_lock_irq(&dxgglobal->host_event_list_mutex);
202+
list_for_each_entry(entry, &dxgglobal->host_event_list_head,
203+
host_event_list_entry) {
204+
if (entry->event_id == event_id) {
205+
list_del(&entry->host_event_list_entry);
206+
entry->host_event_list_entry.next = NULL;
207+
event = entry;
208+
break;
209+
}
210+
}
211+
spin_unlock_irq(&dxgglobal->host_event_list_mutex);
212+
return event;
213+
}
214+
215+
u64 dxgglobal_new_host_event_id(void)
216+
{
217+
struct dxgglobal *dxgglobal = dxggbl();
218+
219+
return atomic64_inc_return(&dxgglobal->host_event_id);
220+
}
221+
126222
void dxgglobal_acquire_process_adapter_lock(void)
127223
{
128224
struct dxgglobal *dxgglobal = dxggbl();
@@ -720,12 +816,16 @@ static struct dxgglobal *dxgglobal_create(void)
720816
INIT_LIST_HEAD(&dxgglobal->vgpu_ch_list_head);
721817
INIT_LIST_HEAD(&dxgglobal->adapter_list_head);
722818
init_rwsem(&dxgglobal->adapter_list_lock);
723-
724819
init_rwsem(&dxgglobal->channel_lock);
725820

821+
INIT_LIST_HEAD(&dxgglobal->host_event_list_head);
822+
spin_lock_init(&dxgglobal->host_event_list_mutex);
823+
atomic64_set(&dxgglobal->host_event_id, 1);
824+
726825
#ifdef DEBUG
727826
dxgk_validate_ioctls();
728827
#endif
828+
729829
return dxgglobal;
730830
}
731831

0 commit comments

Comments
 (0)