diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 894dcd7defb76..fb924a700df1c 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Wed Nov 13 13:26:08 2024 -0800 # # Merge branch 'main' of https://github.com/oneapi-src/unified-runtime into ianayl/2way-prefetch -set(UNIFIED_RUNTIME_TAG fb02dacf979036236cd047f115ef4e0058b09db6) \ No newline at end of file +set(UNIFIED_RUNTIME_TAG fb02dacf979036236cd047f115ef4e0058b09db6) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index fb606dbb76915..16fad6595727b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -3475,25 +3475,47 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::ProfilingTag: { assert(MQueue && "Profiling tag requires a valid queue"); const auto &Adapter = MQueue->getAdapter(); + + bool IsInOrderQueue = MQueue->isInOrder(); + ur_event_handle_t *TimestampDeps = nullptr; + size_t NumTimestampDeps = 0; + + // If the queue is not in-order, the implementation will need to first + // insert a marker event that the timestamp waits for. + ur_event_handle_t PreTimestampMarkerEvent{}; + if (!IsInOrderQueue) { + // FIXME: urEnqueueEventsWait on the L0 adapter requires a double-release. + // Use that instead once it has been fixed. + // See https://github.com/oneapi-src/unified-runtime/issues/2347. + Adapter->call( + MQueue->getHandleRef(), + /*num_events_in_wait_list=*/0, + /*event_wait_list=*/nullptr, &PreTimestampMarkerEvent); + TimestampDeps = &PreTimestampMarkerEvent; + NumTimestampDeps = 1; + } + + Adapter->call( + MQueue->getHandleRef(), + /*blocking=*/false, NumTimestampDeps, TimestampDeps, Event); + // If the queue is not in-order, we need to insert a barrier. This barrier // does not need output events as it will implicitly enforce the following // enqueue is blocked until it finishes. - if (!MQueue->isInOrder()) { + if (!IsInOrderQueue) { + // We also need to release the timestamp event from the marker. + Adapter->call(PreTimestampMarkerEvent); // FIXME: Due to a bug in the L0 UR adapter, we will leak events if we do // not pass an output event to the UR call. Once that is fixed, // this immediately-deleted event can be removed. - ur_event_handle_t PreTimestampBarrierEvent{}; + ur_event_handle_t PostTimestampBarrierEvent{}; Adapter->call( MQueue->getHandleRef(), /*num_events_in_wait_list=*/0, - /*event_wait_list=*/nullptr, &PreTimestampBarrierEvent); - Adapter->call(PreTimestampBarrierEvent); + /*event_wait_list=*/nullptr, &PostTimestampBarrierEvent); + Adapter->call(PostTimestampBarrierEvent); } - Adapter->call( - MQueue->getHandleRef(), - /*blocking=*/false, - /*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, Event); if (Event) MEvent->setHandle(*Event); return UR_RESULT_SUCCESS; diff --git a/sycl/unittests/Extensions/ProfilingTag.cpp b/sycl/unittests/Extensions/ProfilingTag.cpp index 7b18b9ba00e4e..394fba8497103 100644 --- a/sycl/unittests/Extensions/ProfilingTag.cpp +++ b/sycl/unittests/Extensions/ProfilingTag.cpp @@ -66,6 +66,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) { "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", &after_urEventGetProfilingInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); sycl::context Ctx{sycl::platform()}; sycl::queue Queue{Ctx, sycl::default_selector_v}; @@ -75,6 +77,39 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) { sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); + // TODO: We expect two barriers for now, while marker events leak. Adjust when + // addressed. + ASSERT_EQ(size_t{2}, counter_urEnqueueEventsWaitWithBarrier); + + E.get_profiling_info(); + ASSERT_TRUE(LatestProfilingQuery.has_value()); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START); + + E.get_profiling_info(); + ASSERT_TRUE(LatestProfilingQuery.has_value()); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END); +} + +TEST_F(ProfilingTagTest, ProfilingTagSupportedInOrderQueue) { + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", + &after_urEventGetProfilingInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + + sycl::context Ctx{sycl::platform()}; + sycl::queue Queue{ + Ctx, sycl::default_selector_v, {sycl::property::queue::in_order()}}; + sycl::device Dev = Queue.get_device(); + + ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); + + sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); + ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier); E.get_profiling_info(); ASSERT_TRUE(LatestProfilingQuery.has_value()); @@ -113,6 +148,38 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) { ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END); } +TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingInOrderQueue) { + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &after_urDeviceGetInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp); + mock::getCallbacks().set_after_callback("urEventGetProfilingInfo", + &after_urEventGetProfilingInfo); + mock::getCallbacks().set_after_callback( + "urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier); + + sycl::context Ctx{sycl::platform()}; + sycl::queue Queue{Ctx, + sycl::default_selector_v, + {sycl::property::queue::enable_profiling(), + sycl::property::queue::in_order()}}; + sycl::device Dev = Queue.get_device(); + + ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag)); + + sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue); + ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp); + ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier); + + E.get_profiling_info(); + ASSERT_TRUE(LatestProfilingQuery.has_value()); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START); + + E.get_profiling_info(); + ASSERT_TRUE(LatestProfilingQuery.has_value()); + ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END); +} + TEST_F(ProfilingTagTest, ProfilingTagFallbackDefaultQueue) { mock::getCallbacks().set_after_callback("urDeviceGetInfo", &after_urDeviceGetInfo);