Skip to content

Commit

Permalink
Merge branch 'sycl' into ianayl/2way-prefetch
Browse files Browse the repository at this point in the history
  • Loading branch information
ianayl authored Nov 19, 2024
2 parents af6ca57 + b7607f0 commit 28fb2bd
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 9 deletions.
2 changes: 1 addition & 1 deletion sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# Date: Wed Nov 13 13:26:08 2024 -0800
#
# Merge branch 'main' of https://github.com/oneapi-src/unified-runtime into ianayl/2way-prefetch
set(UNIFIED_RUNTIME_TAG fb02dacf979036236cd047f115ef4e0058b09db6)
set(UNIFIED_RUNTIME_TAG fb02dacf979036236cd047f115ef4e0058b09db6)
38 changes: 30 additions & 8 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3475,25 +3475,47 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
case CGType::ProfilingTag: {
assert(MQueue && "Profiling tag requires a valid queue");
const auto &Adapter = MQueue->getAdapter();

bool IsInOrderQueue = MQueue->isInOrder();
ur_event_handle_t *TimestampDeps = nullptr;
size_t NumTimestampDeps = 0;

// If the queue is not in-order, the implementation will need to first
// insert a marker event that the timestamp waits for.
ur_event_handle_t PreTimestampMarkerEvent{};
if (!IsInOrderQueue) {
// FIXME: urEnqueueEventsWait on the L0 adapter requires a double-release.
// Use that instead once it has been fixed.
// See https://github.com/oneapi-src/unified-runtime/issues/2347.
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
MQueue->getHandleRef(),
/*num_events_in_wait_list=*/0,
/*event_wait_list=*/nullptr, &PreTimestampMarkerEvent);
TimestampDeps = &PreTimestampMarkerEvent;
NumTimestampDeps = 1;
}

Adapter->call<UrApiKind::urEnqueueTimestampRecordingExp>(
MQueue->getHandleRef(),
/*blocking=*/false, NumTimestampDeps, TimestampDeps, Event);

// If the queue is not in-order, we need to insert a barrier. This barrier
// does not need output events as it will implicitly enforce the following
// enqueue is blocked until it finishes.
if (!MQueue->isInOrder()) {
if (!IsInOrderQueue) {
// We also need to release the timestamp event from the marker.
Adapter->call<UrApiKind::urEventRelease>(PreTimestampMarkerEvent);
// FIXME: Due to a bug in the L0 UR adapter, we will leak events if we do
// not pass an output event to the UR call. Once that is fixed,
// this immediately-deleted event can be removed.
ur_event_handle_t PreTimestampBarrierEvent{};
ur_event_handle_t PostTimestampBarrierEvent{};
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
MQueue->getHandleRef(),
/*num_events_in_wait_list=*/0,
/*event_wait_list=*/nullptr, &PreTimestampBarrierEvent);
Adapter->call<UrApiKind::urEventRelease>(PreTimestampBarrierEvent);
/*event_wait_list=*/nullptr, &PostTimestampBarrierEvent);
Adapter->call<UrApiKind::urEventRelease>(PostTimestampBarrierEvent);
}

Adapter->call<UrApiKind::urEnqueueTimestampRecordingExp>(
MQueue->getHandleRef(),
/*blocking=*/false,
/*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, Event);
if (Event)
MEvent->setHandle(*Event);
return UR_RESULT_SUCCESS;
Expand Down
67 changes: 67 additions & 0 deletions sycl/unittests/Extensions/ProfilingTag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) {
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
&after_urEventGetProfilingInfo);
mock::getCallbacks().set_after_callback(
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);

sycl::context Ctx{sycl::platform()};
sycl::queue Queue{Ctx, sycl::default_selector_v};
Expand All @@ -75,6 +77,39 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) {

sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
// TODO: We expect two barriers for now, while marker events leak. Adjust when
// addressed.
ASSERT_EQ(size_t{2}, counter_urEnqueueEventsWaitWithBarrier);

E.get_profiling_info<sycl::info::event_profiling::command_start>();
ASSERT_TRUE(LatestProfilingQuery.has_value());
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START);

E.get_profiling_info<sycl::info::event_profiling::command_end>();
ASSERT_TRUE(LatestProfilingQuery.has_value());
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
}

TEST_F(ProfilingTagTest, ProfilingTagSupportedInOrderQueue) {
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
&after_urDeviceGetInfo<true>);
mock::getCallbacks().set_after_callback(
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
&after_urEventGetProfilingInfo);
mock::getCallbacks().set_after_callback(
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);

sycl::context Ctx{sycl::platform()};
sycl::queue Queue{
Ctx, sycl::default_selector_v, {sycl::property::queue::in_order()}};
sycl::device Dev = Queue.get_device();

ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag));

sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier);

E.get_profiling_info<sycl::info::event_profiling::command_start>();
ASSERT_TRUE(LatestProfilingQuery.has_value());
Expand Down Expand Up @@ -113,6 +148,38 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) {
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
}

TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingInOrderQueue) {
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
&after_urDeviceGetInfo<true>);
mock::getCallbacks().set_after_callback(
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
&after_urEventGetProfilingInfo);
mock::getCallbacks().set_after_callback(
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);

sycl::context Ctx{sycl::platform()};
sycl::queue Queue{Ctx,
sycl::default_selector_v,
{sycl::property::queue::enable_profiling(),
sycl::property::queue::in_order()}};
sycl::device Dev = Queue.get_device();

ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag));

sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier);

E.get_profiling_info<sycl::info::event_profiling::command_start>();
ASSERT_TRUE(LatestProfilingQuery.has_value());
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START);

E.get_profiling_info<sycl::info::event_profiling::command_end>();
ASSERT_TRUE(LatestProfilingQuery.has_value());
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
}

TEST_F(ProfilingTagTest, ProfilingTagFallbackDefaultQueue) {
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
&after_urDeviceGetInfo<false>);
Expand Down

0 comments on commit 28fb2bd

Please # to comment.