Skip to content

[SYCL][COMPAT] Fix error on headers, add helloworld test #18401

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 9 commits into
base: sycl
Choose a base branch
from
112 changes: 2 additions & 110 deletions sycl/doc/syclcompat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3474,116 +3474,8 @@ public:

## Sample Code

Below is a simple linear algebra sample, which computes `y = mx + b` implemented
using this library:

``` c++
#include <cassert>
#include <iostream>

#include <syclcompat.hpp>
#include <sycl/sycl.hpp>

/**
* Slope intercept form of a straight line equation: Y = m * X + b
*/
template <int BLOCK_SIZE>
void slope_intercept(float *Y, float *X, float m, float b, size_t n) {

// Block index
size_t bx = syclcompat::work_group_id::x();
// Thread index
size_t tx = syclcompat::local_id::x();

size_t i = bx * BLOCK_SIZE + tx;
// or i = syclcompat::global_id::x();
if (i < n)
Y[i] = m * X[i] + b;
}

void check_memory(void *ptr, std::string msg) {
if (ptr == nullptr) {
std::cerr << "Failed to allocate memory: " << msg << std::endl;
exit(EXIT_FAILURE);
}
}

/**
* Program main
*/
int main(int argc, char **argv) {
std::cout << "Simple Kernel example" << std::endl;

constexpr size_t n_points = 32;
constexpr float m = 1.5f;
constexpr float b = 0.5f;

int block_size = 32;
if (block_size > syclcompat::get_current_device()
.get_info<sycl::info::device::max_work_group_size>())
block_size = 16;

std::cout << "block_size = " << block_size << ", n_points = " << n_points
<< std::endl;

// Allocate host memory for vectors X and Y
size_t mem_size = n_points * sizeof(float);
float *h_X = (float *)syclcompat::malloc_host(mem_size);
float *h_Y = (float *)syclcompat::malloc_host(mem_size);
check_memory(h_X, "h_X allocation failed.");
check_memory(h_Y, "h_Y allocation failed.");

// Alternative templated allocation for the expected output
float *h_expected = syclcompat::malloc_host<float>(n_points);
check_memory(h_expected, "Not enough for h_expected.");

// Initialize host memory & expected output
for (size_t i = 0; i < n_points; i++) {
h_X[i] = i + 1;
h_expected[i] = m * h_X[i] + b;
}

// Allocate device memory
float *d_X = (float *)syclcompat::malloc(mem_size);
float *d_Y = (float *)syclcompat::malloc(mem_size);
check_memory(d_X, "d_X allocation failed.");
check_memory(d_Y, "d_Y allocation failed.");

// copy host memory to device
syclcompat::memcpy(d_X, h_X, mem_size);

size_t threads = block_size;
size_t grid = n_points / block_size;

std::cout << "Computing result using SYCL Kernel... ";
if (block_size == 16) {
syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
n_points);
} else {
syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
n_points);
}
syclcompat::wait();
std::cout << "DONE" << std::endl;

// Async copy result from device to host
syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();

// Check output
for (size_t i = 0; i < n_points; i++) {
assert(h_Y[i] - h_expected[i] < 1e-6);
}

// Clean up memory
syclcompat::free(h_X);
syclcompat::free(h_Y);
syclcompat::free(h_expected);
syclcompat::free(d_X);
syclcompat::free(d_Y);

return 0;
}
```
The file [helloworld.cpp](../../test-e2e/syclcompat/helloworld.cpp) contains
a simple example which computes `y = mx + b` implemented using this library.

## Maintainers

Expand Down
2 changes: 1 addition & 1 deletion sycl/include/syclcompat/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ template <int Dim> struct range_to_item_map<sycl::nd_range<Dim>> {
using ItemT = sycl::nd_item<Dim>;
};
template <int Dim> struct range_to_item_map<sycl::range<Dim>> {
using ItemT = sycl::item<Dim>;
using ItemT = sycl::item<Dim, false>;
};

template <typename T>
Expand Down
142 changes: 142 additions & 0 deletions sycl/test-e2e/syclcompat/helloworld.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/***************************************************************************
*
* Copyright (C) Codeplay Software Ltd.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM
* Exceptions. See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SYCLcompat API
*
* helloworld.cpp
*
* Description:
* Checks that the SYCLcompat example program compiles and runs
**************************************************************************/

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <sycl/detail/core.hpp>

// The example uses specific headers but the user can
// simple include <syclcompat/syclcompat.hpp> to get all the
// functionality with a single header

#include <syclcompat/device.hpp>
#include <syclcompat/id_query.hpp>
#include <syclcompat/launch.hpp>
#include <syclcompat/memory.hpp>

#include <cstdlib>
#include <iostream>

#define CHECK_MEMORY(ptr) \
if ((ptr) == nullptr) { \
std::cerr << "Failed to allocate memory: " << (#ptr) << "\n"; \
exit(EXIT_FAILURE); \
}

/**
* Slope intercept form of a straight line equation: Y = m * X + b
*/
template <int BLOCK_SIZE>
void slope_intercept(float *Y, float *X, float m, float b, size_t n) {

// Block index
size_t bx = syclcompat::work_group_id::x();
// Thread index
size_t tx = syclcompat::local_id::x();

size_t i = bx * BLOCK_SIZE + tx;
// or i = syclcompat::global_id::x();
if (i < n)
Y[i] = m * X[i] + b;
}

/**
* Program main
*/
int main(int argc, char **argv) {
std::cout << "Simple Kernel example" << "\n";

constexpr size_t n_points = 32;
constexpr float m = 1.5f;
constexpr float b = 0.5f;

int block_size = 32;
if (block_size > syclcompat::get_current_device()
.get_info<sycl::info::device::max_work_group_size>()) {
block_size = 16;
}

std::cout << "block_size = " << block_size << ", n_points = " << n_points
<< "\n";

// Allocate host memory for vectors X and Y
size_t mem_size = n_points * sizeof(float);
float *h_X = (float *)syclcompat::malloc_host(mem_size);
float *h_Y = (float *)syclcompat::malloc_host(mem_size);
CHECK_MEMORY(h_X);
CHECK_MEMORY(h_Y);

// Alternative templated allocation for the expected output
float *h_expected = syclcompat::malloc_host<float>(n_points);
CHECK_MEMORY(h_expected);

// Initialize host memory & expected output
for (size_t i = 0; i < n_points; i++) {
h_X[i] = i + 1;
h_expected[i] = m * h_X[i] + b;
}

// Allocate device memory
float *d_X = (float *)syclcompat::malloc(mem_size);
float *d_Y = (float *)syclcompat::malloc(mem_size);
CHECK_MEMORY(d_X);
CHECK_MEMORY(d_Y);

// copy host memory to device
syclcompat::memcpy(d_X, h_X, mem_size);

size_t threads = block_size;
size_t grid = n_points / block_size;

std::cout << "Computing result using SYCL Kernel... ";
if (block_size == 16) {
syclcompat::launch<slope_intercept<16>>(grid, threads, d_Y, d_X, m, b,
n_points);
} else {
syclcompat::launch<slope_intercept<32>>(grid, threads, d_Y, d_X, m, b,
n_points);
}
syclcompat::wait();
std::cout << "DONE" << "\n";

// Async copy result from device to host
syclcompat::memcpy_async(h_Y, d_Y, mem_size).wait();

// Check output
for (size_t i = 0; i < n_points; i++) {
if (std::abs(h_Y[i] - h_expected[i]) >= 1e-6) {
std::cerr << "Mismatch at index " << i << ": expected " << h_expected[i]
<< ", but got " << h_Y[i] << "\n";
exit(EXIT_FAILURE);
}
}

// Clean up memory
syclcompat::free(h_X);
syclcompat::free(h_Y);
syclcompat::free(h_expected);
syclcompat::free(d_X);
syclcompat::free(d_Y);

return EXIT_SUCCESS;
}
Loading