Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Make most code compatible with CUDA #1349

Merged
merged 18 commits into from
Mar 11, 2022
19 changes: 8 additions & 11 deletions apps/ycutrace/ycutrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,36 +114,33 @@ void run_render(const render_params& params_) {
shape.quads = {};
}

// slice params
auto params__ = (cutrace_params&)params;

// initialize context
timer = simple_timer{};
auto context = make_cutrace_context(params__);
auto context = make_cutrace_context(params);
print_info("init gpu: {}", elapsed_formatted(timer));

// upload scene to the gpu
timer = simple_timer{};
auto cuscene = make_cutrace_scene(context, scene, params__);
auto cuscene = make_cutrace_scene(context, scene, params);
print_info("upload scene: {}", elapsed_formatted(timer));

// build bvh
timer = simple_timer{};
auto bvh = make_cutrace_bvh(context, cuscene, params__);
auto bvh = make_cutrace_bvh(context, cuscene, params);
print_info("build bvh: {}", elapsed_formatted(timer));

// init lights
auto lights = make_cutrace_lights(context, scene, params__);
auto lights = make_cutrace_lights(context, scene, params);

// state
auto state = make_cutrace_state(context, scene, params__);
auto state = make_cutrace_state(context, scene, params);

// render
timer = simple_timer{};
trace_start(context, state, cuscene, bvh, lights, scene, params__);
for (auto sample : range(0, params__.samples, params__.batch)) {
trace_start(context, state, cuscene, bvh, lights, scene, params);
for (auto sample : range(0, params.samples, params.batch)) {
auto sample_timer = simple_timer{};
trace_samples(context, state, cuscene, bvh, lights, scene, params__);
trace_samples(context, state, cuscene, bvh, lights, scene, params);
print_info("render sample {}/{}: {}", state.samples, params.samples,
elapsed_formatted(sample_timer));
if (params.savebatch && state.samples % params.batch == 0) {
Expand Down
33 changes: 17 additions & 16 deletions apps/ymesh/ymesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void run_glpath(const glpath_params& params) {
// geodesic solver
auto adjacencies = face_adjacencies(shape.triangles);
auto solver = make_dual_geodesic_solver(
shape.triangles, shape.positions, adjacencies);
shape.triangles, shape.positions, adjacencies);
auto bezier = true;

// bezier algos
Expand Down Expand Up @@ -256,9 +256,9 @@ void run_glpath(const glpath_params& params) {
auto mouse_uv = vec2f{input.cursor.x / float(input.window.x),
input.cursor.y / float(input.window.y)};
auto ray = camera_ray(camera.frame, camera.lens, camera.aspect,
camera.film, mouse_uv);
camera.film, mouse_uv);
auto isec = intersect_triangles_bvh(
bvh, shape.triangles, shape.positions, ray, false);
bvh, shape.triangles, shape.positions, ray, false);
if (isec.hit) {
if (stroke.empty() || stroke.back().element != isec.element ||
stroke.back().uv != isec.uv) {
Expand Down Expand Up @@ -423,7 +423,7 @@ void run_glpathd(const glpathd_params& params) {
// geodesic solver
auto adjacencies = face_adjacencies(shape.triangles);
auto solver = make_dual_geodesic_solver(
shape.triangles, shape.positions, adjacencies);
shape.triangles, shape.positions, adjacencies);

// other solver
// auto v2t = vertex_to_triangles(shape.triangles, shape.positions,
Expand All @@ -450,7 +450,7 @@ void run_glpathd(const glpathd_params& params) {
auto mouse_uv = vec2f{input.cursor.x / float(input.window.x),
input.cursor.y / float(input.window.y)};
auto ray = camera_ray(
camera.frame, camera.lens, camera.aspect, camera.film, mouse_uv);
camera.frame, camera.lens, camera.aspect, camera.film, mouse_uv);
auto isec = intersect_triangles_bvh(
bvh, shape.triangles, shape.positions, ray, false);
if (isec.hit) {
Expand All @@ -469,15 +469,15 @@ void run_glpathd(const glpathd_params& params) {
scene.shapes.at(1) = points_to_spheres(positions, 2, 0.002f);
updated_shapes.push_back(1);
auto path1 = compute_shortest_path(solver, shape.triangles,
shape.positions, adjacencies, point1, point2);
shape.positions, adjacencies, point1, point2);
auto positions1 = vector<vec3f>{};
for (auto [element, uv] : path1) {
positions1.push_back(eval_position(shape, element, uv));
}
scene.shapes.at(2) = polyline_to_cylinders(positions1, 4, 0.002f);
updated_shapes.push_back(2);
auto positions2 = visualize_shortest_path(solver, shape.triangles,
shape.positions, adjacencies, point1, point2, true);
shape.positions, adjacencies, point1, point2, true);
scene.shapes.at(3) = polyline_to_cylinders(positions2, 4, 0.002f);
updated_shapes.push_back(3);
// auto path3 = visualize_shortest_path(solver2, shape.triangles,
Expand Down Expand Up @@ -553,11 +553,11 @@ sculpt_state make_sculpt_state(
const shape_data& shape, const texture_data& texture) {
auto state = sculpt_state{};
state.bvh = make_triangles_bvh(
shape.triangles, shape.positions, shape.radius);
shape.triangles, shape.positions, shape.radius);
state.grid = make_hash_grid(shape.positions, 0.05f);
auto adjacencies = face_adjacencies(shape.triangles);
state.solver = make_geodesic_solver(
shape.triangles, adjacencies, shape.positions);
shape.triangles, adjacencies, shape.positions);
state.adjacencies = vertex_adjacencies(shape.triangles, adjacencies);
state.tex_image = texture;
state.base_shape = shape;
Expand Down Expand Up @@ -650,7 +650,8 @@ vector<int> stroke_parameterization(vector<vec2f>& coords,
auto new_coord = coords[neighbor] + projection;
auto avg_lenght = (length(current_coord) + length(new_coord)) / 2;
auto new_dir = normalize(current_coord + new_coord);
coords[node] = current_coord == zero2f ? new_coord : new_dir * avg_lenght;
coords[node] = current_coord == vec2f{0, 0} ? new_coord
: new_dir * avg_lenght;

// following doesn't work
// coords[node] = current_coord + (weight * (coords[neighbor] +
Expand Down Expand Up @@ -747,7 +748,7 @@ vector<int> stroke_parameterization(vector<vec2f>& coords,
auto visited = vector<bool>(positions.size(), false);
for (auto sample : sampling) visited[sample] = true;

coords = vector<vec2f>(solver.graph.size(), zero2f);
coords = vector<vec2f>(solver.graph.size(), vec2f{0, 0});
coords[sampling[0]] = {radius, radius};
vertices.insert(sampling[0]);
for (size_t i = 1; i < sampling.size(); i++) {
Expand Down Expand Up @@ -868,7 +869,7 @@ bool texture_brush(vector<vec3f>& positions, vector<vec2f>& texcoords,

auto scale_factor = 3.5f / params.radius;
auto max_height = gaussian_distribution(
{0, 0, 0}, {0, 0, 0}, 0.7f, scale_factor, params.strength, params.radius);
{0, 0, 0}, {0, 0, 0}, 0.7f, scale_factor, params.strength, params.radius);

for (auto idx : vertices) {
auto uv = texcoords[idx];
Expand Down Expand Up @@ -1066,7 +1067,7 @@ static pair<vector<shape_point>, vec2f> sample_stroke(const bvh_tree& bvh,

// sample
auto delta_pos = distance(eval_position(shape, last.element, last.uv),
eval_position(shape, mouse.element, mouse.uv));
eval_position(shape, mouse.element, mouse.uv));
auto stroke_dist = params.radius * 0.2f;
auto steps = int(delta_pos / stroke_dist);
if (steps == 0) return {};
Expand Down Expand Up @@ -1094,9 +1095,9 @@ static pair<bool, bool> sculpt_update(sculpt_state& state, shape_data& shape,
state.bvh, shape.triangles, shape.positions, ray, false);
if (isec.hit) {
cursor = make_cursor(eval_position(shape, isec.element, isec.uv),
eval_normal(shape, isec.element, isec.uv),
params.radius *
(params.type == sculpt_brush_type::gaussian ? 0.5f : 1.0f));
eval_normal(shape, isec.element, isec.uv),
params.radius *
(params.type == sculpt_brush_type::gaussian ? 0.5f : 1.0f));
updated_cursor = true;
}

Expand Down
2 changes: 1 addition & 1 deletion exts/embed_ptx/embed_ptx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ function(embed_ptx)
target_link_libraries(${PTX_TARGET} PRIVATE ${EMBED_PTX_PTX_LINK_LIBRARIES})
set_property(TARGET ${PTX_TARGET} PROPERTY CUDA_PTX_COMPILATION ON)
set_property(TARGET ${PTX_TARGET} PROPERTY CUDA_ARCHITECTURES OFF)
target_compile_options(${PTX_TARGET} PRIVATE "-lineinfo")
target_compile_options(${PTX_TARGET} PRIVATE "-lineinfo" "--expt-relaxed-constexpr")

## Create command to run the bin2c via the CMake script ##

Expand Down
69 changes: 66 additions & 3 deletions libs/yocto/yocto_color.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@

#include "yocto_math.h"

// -----------------------------------------------------------------------------
// CUDA SUPPORT
// -----------------------------------------------------------------------------
#ifdef __CUDACC__
#define inline inline __device__ __forceinline__
#endif

// -----------------------------------------------------------------------------
// COLOR OPERATIONS
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -156,6 +163,8 @@ inline vec4f colorgrade(

} // namespace yocto

#ifndef __CUDACC__

// -----------------------------------------------------------------------------
// COLOR SPACE CONVERSION
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -190,6 +199,8 @@ inline vec3f convert_color(const vec3f& col, color_space from, color_space to);

} // namespace yocto

#endif

// -----------------------------------------------------------------------------
//
//
Expand Down Expand Up @@ -270,6 +281,8 @@ inline vec3f saturate(
return max({0, 0, 0}, grey + (rgb - grey) * (saturation * 2));
}

#ifndef __CUDACC__

// Filmic tonemapping
inline vec3f tonemap_filmic(const vec3f& hdr_, bool accurate_fit = false) {
if (!accurate_fit) {
Expand Down Expand Up @@ -303,6 +316,43 @@ inline vec3f tonemap_filmic(const vec3f& hdr_, bool accurate_fit = false) {
}
}

#else

// Filmic tonemapping
inline vec3f tonemap_filmic(const vec3f& hdr_, bool accurate_fit = false) {
if (!accurate_fit) {
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
auto hdr = hdr_ * 0.6f; // brings it back to ACES range
auto ldr = (hdr * hdr * 2.51f + hdr * 0.03f) /
(hdr * hdr * 2.43f + hdr * 0.59f + 0.14f);
return max({0, 0, 0}, ldr);
} else {
// https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl
// sRGB => XYZ => D65_2_D60 => AP1 => RRT_SAT
auto ACESInputMat = transpose(mat3f{
{0.59719f, 0.35458f, 0.04823f},
{0.07600f, 0.90834f, 0.01566f},
{0.02840f, 0.13383f, 0.83777f},
});
// ODT_SAT => XYZ => D60_2_D65 => sRGB
auto ACESOutputMat = transpose(mat3f{
{1.60475f, -0.53108f, -0.07367f},
{-0.10208f, 1.10813f, -0.00605f},
{-0.00327f, -0.07276f, 1.07602f},
});
// RRT => ODT
auto RRTAndODTFit = [](const vec3f& v) -> vec3f {
return (v * v + v * 0.0245786f - 0.000090537f) /
(v * v * 0.983729f + v * 0.4329510f + 0.238081f);
};

auto ldr = ACESOutputMat * RRTAndODTFit(ACESInputMat * hdr_);
return max({0, 0, 0}, ldr);
}
}

#endif

inline vec3f tonemap(const vec3f& hdr, float exposure, bool filmic, bool srgb) {
auto rgb = hdr;
if (exposure != 0) rgb *= exp2(exposure);
Expand Down Expand Up @@ -559,6 +609,8 @@ inline vec4f colorgrade(

} // namespace yocto

#ifndef __CUDACC__

// -----------------------------------------------------------------------------
// COLOR SPACES
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -617,12 +669,14 @@ inline color_space_params get_color_scape_params(color_space space) {
};
static auto make_gamma_rgb_space =
[](const vec2f& red, const vec2f& green, const vec2f& blue,
const vec2f& white, float gamma, const vec4f& curve_abcd = zero4f) {
const vec2f& white, float gamma,
const vec4f& curve_abcd = vec4f{0, 0, 0, 0}) {
return color_space_params{red, green, blue, white,
rgb_to_xyz_mat(red, green, blue, white),
inverse(rgb_to_xyz_mat(red, green, blue, white)),
curve_abcd == zero4f ? color_space_params::curve_t::gamma
: color_space_params::curve_t::linear_gamma};
curve_abcd == vec4f{0, 0, 0, 0}
? color_space_params::curve_t::gamma
: color_space_params::curve_t::linear_gamma};
};
static auto make_other_rgb_space =
[](const vec2f& red, const vec2f& green, const vec2f& blue,
Expand Down Expand Up @@ -919,3 +973,12 @@ inline vec3f convert_color(const vec3f& col, color_space from, color_space to) {
} // namespace yocto

#endif

// -----------------------------------------------------------------------------
// CUDA SUPPORT
// -----------------------------------------------------------------------------
#ifdef __CUDACC__
#undef inline
#endif

#endif
Loading