diff --git a/sycl/doc/syclcompat/README.md b/sycl/doc/syclcompat/README.md
index 4f395ea26760a..00d99e6c919f7 100644
--- a/sycl/doc/syclcompat/README.md
+++ b/sycl/doc/syclcompat/README.md
@@ -3474,116 +3474,8 @@ public:
 
 ## Sample Code
 
-Below is a simple linear algebra sample, which computes `y = mx + b` implemented
-using this library:
-
-``` c++
-#include <cassert>
-#include <iostream>
-
-#include <syclcompat.hpp>
-#include <sycl/sycl.hpp>
-
-/**
- * Slope intercept form of a straight line equation: Y = m * X + b
- */
-template <int BLOCK_SIZE>
-void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
-
-  // Block index
-  size_t bx = syclcompat::work_group_id::x();
-  // Thread index
-  size_t tx = syclcompat::local_id::x();
-
-  size_t i = bx * BLOCK_SIZE + tx;
-  // or  i = syclcompat::global_id::x();
-  if (i < n)
-    Y[i] = m * X[i] + b;
-}
-
-void check_memory(void *ptr, std::string msg) {
-  if (ptr == nullptr) {
-    std::cerr << "Failed to allocate memory: " << msg << std::endl;
-    exit(EXIT_FAILURE);
-  }
-}
-
-/**
- * Program main
- */
-int main(int argc, char **argv) {
-  std::cout << "Simple Kernel example" << std::endl;
-
-  constexpr size_t n_points = 32;
-  constexpr float m = 1.5f;
-  constexpr float b = 0.5f;
-
-  int block_size = 32;
-  if (block_size > syclcompat::get_current_device()
-                       .get_info<sycl::info::device::max_work_group_size>())
-    block_size = 16;
-
-  std::cout << "block_size = " << block_size << ", n_points = " << n_points
-            << std::endl;
-
-  // Allocate host memory for vectors X and Y
-  size_t mem_size = n_points * sizeof(float);
-  float *h_X = (float *)syclcompat::malloc_host(mem_size);
-  float *h_Y = (float *)syclcompat::malloc_host(mem_size);
-  check_memory(h_X, "h_X allocation failed.");
-  check_memory(h_Y, "h_Y allocation failed.");
-
-  // Alternative templated allocation for the expected output
-  float *h_expected = syclcompat::malloc_host<float>(n_points);
-  check_memory(h_expected, "Not enough for h_expected.");
-
-  // Initialize host memory & expected output
-  for (size_t i = 0; i < n_points; i++) {
-    h_X[i] = i + 1;
-    h_expected[i] = m * h_X[i] + b;
-  }
-
-  // Allocate device memory
-  float *d_X = (float *)syclcompat::malloc(mem_size);
-  float *d_Y = (float *)syclcompat::malloc(mem_size);
-  check_memory(d_X, "d_X allocation failed.");
-  check_memory(d_Y, "d_Y allocation failed.");
-
-  // copy host memory to device
-  syclcompat::memcpy(d_X, h_X, mem_size);
-
-  size_t threads = block_size;
-  size_t grid = n_points / block_size;
-
-  std::cout << "Computing result using SYCL Kernel... ";
-  if (block_size == 16) {
-    syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
-                                        n_points);
-  } else {
-    syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
-                                        n_points);
-  }
-  syclcompat::wait();
-  std::cout << "DONE" << std::endl;
-
-  // Async copy result from device to host
-  syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
-
-  // Check output
-  for (size_t i = 0; i < n_points; i++) {
-    assert(h_Y[i] - h_expected[i] < 1e-6);
-  }
-
-  // Clean up memory
-  syclcompat::free(h_X);
-  syclcompat::free(h_Y);
-  syclcompat::free(h_expected);
-  syclcompat::free(d_X);
-  syclcompat::free(d_Y);
-
-  return 0;
-}
-```
+The file [helloworld.cpp](../../test-e2e/syclcompat/helloworld.cpp) contains
+a simple example which computes `y = mx + b` implemented using this library.
 
 ## Maintainers
 
diff --git a/sycl/include/syclcompat/traits.hpp b/sycl/include/syclcompat/traits.hpp
index 7ed4d765251bc..30719b376ae43 100644
--- a/sycl/include/syclcompat/traits.hpp
+++ b/sycl/include/syclcompat/traits.hpp
@@ -87,7 +87,7 @@ template <int Dim> struct range_to_item_map<sycl::nd_range<Dim>> {
   using ItemT = sycl::nd_item<Dim>;
 };
 template <int Dim> struct range_to_item_map<sycl::range<Dim>> {
-  using ItemT = sycl::item<Dim>;
+  using ItemT = sycl::item<Dim, false>;
 };
 
 template <typename T>
diff --git a/sycl/test-e2e/syclcompat/helloworld.cpp b/sycl/test-e2e/syclcompat/helloworld.cpp
new file mode 100644
index 0000000000000..3e32f8a965eb6
--- /dev/null
+++ b/sycl/test-e2e/syclcompat/helloworld.cpp
@@ -0,0 +1,142 @@
+/***************************************************************************
+ *
+ *  Copyright (C) Codeplay Software Ltd.
+ *
+ *  Part of the LLVM Project, under the Apache License v2.0 with LLVM
+ *  Exceptions. See https://llvm.org/LICENSE.txt for license information.
+ *  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  SYCLcompat API
+ *
+ *  helloworld.cpp
+ *
+ *  Description:
+ *    Checks that the SYCLcompat example program compiles and runs
+ **************************************************************************/
+
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include <sycl/detail/core.hpp>
+
+// The example uses specific headers but the user can
+// simple include <syclcompat/syclcompat.hpp> to get all the
+// functionality with a single header
+
+#include <syclcompat/device.hpp>
+#include <syclcompat/id_query.hpp>
+#include <syclcompat/launch.hpp>
+#include <syclcompat/memory.hpp>
+
+#include <cstdlib>
+#include <iostream>
+
+#define CHECK_MEMORY(ptr)                                                      \
+  if ((ptr) == nullptr) {                                                      \
+    std::cerr << "Failed to allocate memory: " << (#ptr) << "\n";              \
+    exit(EXIT_FAILURE);                                                        \
+  }
+
+/**
+ * Slope intercept form of a straight line equation: Y = m * X + b
+ */
+template <int BLOCK_SIZE>
+void slope_intercept(float *Y, float *X, float m, float b, size_t n) {
+
+  // Block index
+  size_t bx = syclcompat::work_group_id::x();
+  // Thread index
+  size_t tx = syclcompat::local_id::x();
+
+  size_t i = bx * BLOCK_SIZE + tx;
+  // or  i = syclcompat::global_id::x();
+  if (i < n)
+    Y[i] = m * X[i] + b;
+}
+
+/**
+ * Program main
+ */
+int main(int argc, char **argv) {
+  std::cout << "Simple Kernel example" << "\n";
+
+  constexpr size_t n_points = 32;
+  constexpr float m = 1.5f;
+  constexpr float b = 0.5f;
+
+  int block_size = 32;
+  if (block_size > syclcompat::get_current_device()
+                       .get_info<sycl::info::device::max_work_group_size>()) {
+    block_size = 16;
+  }
+
+  std::cout << "block_size = " << block_size << ", n_points = " << n_points
+            << "\n";
+
+  // Allocate host memory for vectors X and Y
+  size_t mem_size = n_points * sizeof(float);
+  float *h_X = (float *)syclcompat::malloc_host(mem_size);
+  float *h_Y = (float *)syclcompat::malloc_host(mem_size);
+  CHECK_MEMORY(h_X);
+  CHECK_MEMORY(h_Y);
+
+  // Alternative templated allocation for the expected output
+  float *h_expected = syclcompat::malloc_host<float>(n_points);
+  CHECK_MEMORY(h_expected);
+
+  // Initialize host memory & expected output
+  for (size_t i = 0; i < n_points; i++) {
+    h_X[i] = i + 1;
+    h_expected[i] = m * h_X[i] + b;
+  }
+
+  // Allocate device memory
+  float *d_X = (float *)syclcompat::malloc(mem_size);
+  float *d_Y = (float *)syclcompat::malloc(mem_size);
+  CHECK_MEMORY(d_X);
+  CHECK_MEMORY(d_Y);
+
+  // copy host memory to device
+  syclcompat::memcpy(d_X, h_X, mem_size);
+
+  size_t threads = block_size;
+  size_t grid = n_points / block_size;
+
+  std::cout << "Computing result using SYCL Kernel... ";
+  if (block_size == 16) {
+    syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
+                                            n_points);
+  } else {
+    syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
+                                            n_points);
+  }
+  syclcompat::wait();
+  std::cout << "DONE" << "\n";
+
+  // Async copy result from device to host
+  syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();
+
+  // Check output
+  for (size_t i = 0; i < n_points; i++) {
+    if (std::abs(h_Y[i] - h_expected[i]) >= 1e-6) {
+      std::cerr << "Mismatch at index " << i << ": expected " << h_expected[i]
+                << ", but got " << h_Y[i] << "\n";
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  // Clean up memory
+  syclcompat::free(h_X);
+  syclcompat::free(h_Y);
+  syclcompat::free(h_expected);
+  syclcompat::free(d_X);
+  syclcompat::free(d_Y);
+
+  return EXIT_SUCCESS;
+}