From c408be130577f707c0a136e041589ae35195a5bb Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 01:58:19 -0400 Subject: [PATCH 01/34] preprocess batches for backward outline --- cuda_rasterizer/backward.cu | 281 +++++++++++++++++++++- cuda_rasterizer/backward.h | 25 ++ cuda_rasterizer/rasterizer_impl.cu | 63 +++++ diff_gaussian_rasterization/__init__.py | 5 +- ext.cpp | 36 +-- rasterize_points.cu | 91 ++++++++ rasterize_points.h | 298 ++++++++++++------------ 7 files changed, 629 insertions(+), 170 deletions(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 1f316f2..18e04f7 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -17,7 +17,7 @@ namespace cg = cooperative_groups; // Backward pass for conversion of spherical harmonics to RGB for // each Gaussian. -__device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs) +__device__ void computeColorFromSH(int idx, int view_idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs) { // Compute intermediate values, as it is done during forward glm::vec3 pos = means[idx]; @@ -29,9 +29,9 @@ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm:: // Use PyTorch rule for clamping: if clamping was applied, // gradient becomes 0. glm::vec3 dL_dRGB = dL_dcolor[idx]; - dL_dRGB.x *= clamped[3 * idx + 0] ? 0 : 1; - dL_dRGB.y *= clamped[3 * idx + 1] ? 0 : 1; - dL_dRGB.z *= clamped[3 * idx + 2] ? 0 : 1; + dL_dRGB.x *= clamped[3 * view_idx + 0] ? 0 : 1; + dL_dRGB.y *= clamped[3 * view_idx + 1] ? 0 : 1; + dL_dRGB.z *= clamped[3 * view_idx + 2] ? 0 : 1; glm::vec3 dRGBdx(0, 0, 0); glm::vec3 dRGBdy(0, 0, 0); @@ -273,6 +273,148 @@ __global__ void computeCov2DCUDA(int P, dL_dmeans[idx] = dL_dmean; } +__global__ void computeCov2DCUDABatched( + const int num_viewpoints, + const int P, + const float3* means, + const int* radii, + const float* cov3Ds, + const float h_x, float h_y, + const float* tan_fovx, const float* tan_fovy, + const float* viewmatrix_arr, + const float* dL_dconics, + float3* dL_dmeans, + float* dL_dcov) +{ + auto point_idx = blockIdx.x * blockDim.x + threadIdx.x; + auto viewpoint_idx = blockIdx.y; + + if (point_idx >= P || viewpoint_idx >= num_viewpoints) + return; + + auto idx = viewpoint_idx * P + point_idx; + if (!(radii[idx] > 0)) + return; + + const float* view_matrix = viewmatrix_arr + viewpoint_idx * 16; + + // Reading location of 3D covariance for this Gaussian + const float* cov3D = cov3Ds + 6 * idx; + + // Fetch gradients, recompute 2D covariance and relevant + // intermediate forward results needed in the backward. + float3 mean = means[point_idx]; + float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] }; + float3 t = transformPoint4x3(mean, view_matrix); + + const float limx = 1.3f * tan_fovx[viewpoint_idx]; + const float limy = 1.3f * tan_fovy[viewpoint_idx]; + const float txtz = t.x / t.z; + const float tytz = t.y / t.z; + t.x = min(limx, max(-limx, txtz)) * t.z; + t.y = min(limy, max(-limy, tytz)) * t.z; + + const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1; + const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1; + + glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z), + 0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z), + 0, 0, 0); + + glm::mat3 W = glm::mat3( + view_matrix[0], view_matrix[4], view_matrix[8], + view_matrix[1], view_matrix[5], view_matrix[9], + view_matrix[2], view_matrix[6], view_matrix[10]); + + glm::mat3 Vrk = glm::mat3( + cov3D[0], cov3D[1], cov3D[2], + cov3D[1], cov3D[3], cov3D[4], + cov3D[2], cov3D[4], cov3D[5]); + + glm::mat3 T = W * J; + + glm::mat3 cov2D = glm::transpose(T) * glm::transpose(Vrk) * T; + + // Use helper variables for 2D covariance entries. More compact. + float a = cov2D[0][0] += 0.3f; + float b = cov2D[0][1]; + float c = cov2D[1][1] += 0.3f; + + float denom = a * c - b * b; + float dL_da = 0, dL_db = 0, dL_dc = 0; + float denom2inv = 1.0f / ((denom * denom) + 0.0000001f); + + if (denom2inv != 0) + { + // Gradients of loss w.r.t. entries of 2D covariance matrix, + // given gradients of loss w.r.t. conic matrix (inverse covariance matrix). + // e.g., dL / da = dL / d_conic_a * d_conic_a / d_a + dL_da = denom2inv * (-c * c * dL_dconic.x + 2 * b * c * dL_dconic.y + (denom - a * c) * dL_dconic.z); + dL_dc = denom2inv * (-a * a * dL_dconic.z + 2 * a * b * dL_dconic.y + (denom - a * c) * dL_dconic.x); + dL_db = denom2inv * 2 * (b * c * dL_dconic.x - (denom + 2 * b * b) * dL_dconic.y + a * b * dL_dconic.z); + + // Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry, + // given gradients w.r.t. 2D covariance matrix (diagonal). + // cov2D = transpose(T) * transpose(Vrk) * T; + dL_dcov[6 * idx + 0] = (T[0][0] * T[0][0] * dL_da + T[0][0] * T[1][0] * dL_db + T[1][0] * T[1][0] * dL_dc); + dL_dcov[6 * idx + 3] = (T[0][1] * T[0][1] * dL_da + T[0][1] * T[1][1] * dL_db + T[1][1] * T[1][1] * dL_dc); + dL_dcov[6 * idx + 5] = (T[0][2] * T[0][2] * dL_da + T[0][2] * T[1][2] * dL_db + T[1][2] * T[1][2] * dL_dc); + + // Gradients of loss L w.r.t. each 3D covariance matrix (Vrk) entry, + // given gradients w.r.t. 2D covariance matrix (off-diagonal). + // Off-diagonal elements appear twice --> double the gradient. + // cov2D = transpose(T) * transpose(Vrk) * T; + dL_dcov[6 * idx + 1] = 2 * T[0][0] * T[0][1] * dL_da + (T[0][0] * T[1][1] + T[0][1] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][1] * dL_dc; + dL_dcov[6 * idx + 2] = 2 * T[0][0] * T[0][2] * dL_da + (T[0][0] * T[1][2] + T[0][2] * T[1][0]) * dL_db + 2 * T[1][0] * T[1][2] * dL_dc; + dL_dcov[6 * idx + 4] = 2 * T[0][2] * T[0][1] * dL_da + (T[0][1] * T[1][2] + T[0][2] * T[1][1]) * dL_db + 2 * T[1][1] * T[1][2] * dL_dc; + } + else + { + for (int i = 0; i < 6; i++) + dL_dcov[6 * idx + i] = 0; + } + + // Gradients of loss w.r.t. upper 2x3 portion of intermediate matrix T + // cov2D = transpose(T) * transpose(Vrk) * T; + float dL_dT00 = 2 * (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_da + + (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_db; + float dL_dT01 = 2 * (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_da + + (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_db; + float dL_dT02 = 2 * (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_da + + (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_db; + float dL_dT10 = 2 * (T[1][0] * Vrk[0][0] + T[1][1] * Vrk[0][1] + T[1][2] * Vrk[0][2]) * dL_dc + + (T[0][0] * Vrk[0][0] + T[0][1] * Vrk[0][1] + T[0][2] * Vrk[0][2]) * dL_db; + float dL_dT11 = 2 * (T[1][0] * Vrk[1][0] + T[1][1] * Vrk[1][1] + T[1][2] * Vrk[1][2]) * dL_dc + + (T[0][0] * Vrk[1][0] + T[0][1] * Vrk[1][1] + T[0][2] * Vrk[1][2]) * dL_db; + float dL_dT12 = 2 * (T[1][0] * Vrk[2][0] + T[1][1] * Vrk[2][1] + T[1][2] * Vrk[2][2]) * dL_dc + + (T[0][0] * Vrk[2][0] + T[0][1] * Vrk[2][1] + T[0][2] * Vrk[2][2]) * dL_db; + + // Gradients of loss w.r.t. upper 3x2 non-zero entries of Jacobian matrix + // T = W * J + float dL_dJ00 = W[0][0] * dL_dT00 + W[0][1] * dL_dT01 + W[0][2] * dL_dT02; + float dL_dJ02 = W[2][0] * dL_dT00 + W[2][1] * dL_dT01 + W[2][2] * dL_dT02; + float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12; + float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12; + + float tz = 1.f / t.z; + float tz2 = tz * tz; + float tz3 = tz2 * tz; + + // Gradients of loss w.r.t. transformed Gaussian mean t + float dL_dtx = x_grad_mul * -h_x * tz2 * dL_dJ02; + float dL_dty = y_grad_mul * -h_y * tz2 * dL_dJ12; + float dL_dtz = -h_x * tz2 * dL_dJ00 - h_y * tz2 * dL_dJ11 + (2 * h_x * t.x) * tz3 * dL_dJ02 + (2 * h_y * t.y) * tz3 * dL_dJ12; + + // Account for transformation of mean to t + // t = transformPoint4x3(mean, view_matrix); + float3 dL_dmean = transformVec4x3Transpose({ dL_dtx, dL_dty, dL_dtz }, view_matrix); + + // Gradients of loss w.r.t. Gaussian means, but only the portion + // that is caused because the mean affects the covariance matrix. + // Additional mean gradient is accumulated in BACKWARD::preprocess. + dL_dmeans[idx] = dL_dmean; +} + // Backward pass for the conversion of scale and rotation to a // 3D covariance matrix for each Gaussian. __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const glm::vec4 rot, const float* dL_dcov3Ds, glm::vec3* dL_dscales, glm::vec4* dL_drots) @@ -388,13 +530,73 @@ __global__ void preprocessCUDA( // Compute gradient updates due to computing colors from SHs if (shs) - computeColorFromSH(idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh); + computeColorFromSH(idx, idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh); // Compute gradient updates due to computing covariance from scale/rotation if (scales) computeCov3D(idx, scales[idx], scale_modifier, rotations[idx], dL_dcov3D, dL_dscale, dL_drot); } +template +__global__ void preprocessCUDABatched( + const int num_viewpoints, + const int P, const int D, const int M, + const float3* means, + const int* radii, + const float* shs, + const bool* clamped, + const glm::vec3* scales, + const glm::vec4* rotations, + const float scale_modifier, + const float* projmatrix_arr, + const glm::vec3* campos, + const float3* dL_dmean2D, + glm::vec3* dL_dmeans, + float* dL_dcolor,//TODO: this should be change to const float*, because we do not modify dL_dcolor in preprocessCUDA backward. + float* dL_dcov3D, + float* dL_dsh, + glm::vec3* dL_dscale, + glm::vec4* dL_drot) +{ + auto point_idx = blockIdx.x * blockDim.x + threadIdx.x; + auto viewpoint_idx = blockIdx.y; + if (viewpoint_idx >= num_viewpoints || point_idx >= P) return; + return; + + auto idx = viewpoint_idx * P + point_idx; + if (!(radii[idx] > 0)) + return; + + const float* proj = projmatrix_arr + viewpoint_idx * 16; + + float3 m = means[idx]; + + // Taking care of gradients from the screenspace points + float4 m_hom = transformPoint4x4(m, proj); + float m_w = 1.0f / (m_hom.w + 0.0000001f); + + // Compute loss gradient w.r.t. 3D means due to gradients of 2D means + // from rendering procedure + glm::vec3 dL_dmean; + float mul1 = (proj[0] * m.x + proj[4] * m.y + proj[8] * m.z + proj[12]) * m_w * m_w; + float mul2 = (proj[1] * m.x + proj[5] * m.y + proj[9] * m.z + proj[13]) * m_w * m_w; + dL_dmean.x = (proj[0] * m_w - proj[3] * mul1) * dL_dmean2D[idx].x + (proj[1] * m_w - proj[3] * mul2) * dL_dmean2D[idx].y; + dL_dmean.y = (proj[4] * m_w - proj[7] * mul1) * dL_dmean2D[idx].x + (proj[5] * m_w - proj[7] * mul2) * dL_dmean2D[idx].y; + dL_dmean.z = (proj[8] * m_w - proj[11] * mul1) * dL_dmean2D[idx].x + (proj[9] * m_w - proj[11] * mul2) * dL_dmean2D[idx].y; + + // That's the second part of the mean gradient. Previous computation + // of cov2D and following SH conversion also affects it. + dL_dmeans[idx] += dL_dmean; + + // Compute gradient updates due to computing colors from SHs + if (shs) + computeColorFromSH(point_idx, idx, D, M, (glm::vec3*)means, campos[viewpoint_idx], shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh); + + // Compute gradient updates due to computing covariance from scale/rotation + if (scales) + computeCov3D(idx, scales[point_idx], scale_modifier, rotations[point_idx], dL_dcov3D, dL_dscale, dL_drot); +} + // Backward version of the rendering procedure. template __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y) @@ -691,4 +893,73 @@ void BACKWARD::render( dL_dopacity, dL_dcolors ); +} + +void BACKWARD::preprocess_batch( + const int num_viewpoints, + const int P, const int D, const int M, + const float3* means3D, + const int* radii, + const float* shs, + const bool* clamped, + const glm::vec3* scales, + const glm::vec4* rotations, + const float scale_modifier, + const float* cov3Ds, + const float* viewmatrix, + const float* projmatrix, + const float* tan_fovx, const float* tan_fovy, + const glm::vec3* campos, + const float3* dL_dmean2D, + const float* dL_dconic, + glm::vec3* dL_dmean3D, + float* dL_dcolor, + float* dL_dcov3D, + float* dL_dsh, + glm::vec3* dL_dscale, + glm::vec4* dL_drot) +{ + // Propagate gradients for the path of 2D conic matrix computation. + // Somewhat long, thus it is its own kernel rather than being part of + // "preprocess". When done, loss gradient w.r.t. 3D means has been + // modified and gradient w.r.t. 3D covariance matrix has been computed. + dim3 tile_grid(cdiv(P, ONE_DIM_BLOCK_SIZE), num_viewpoints); + + computeCov2DCUDABatched << > > ( + num_viewpoints, + P, + means3D, + radii, + cov3Ds, + focal_x, + focal_y, + tan_fovx, + tan_fovy, + viewmatrix, + dL_dconic, + (float3*)dL_dmean3D, + dL_dcov3D); + + // Propagate gradients for remaining steps: finish 3D mean gradients, + // propagate color gradients to SH (if desireD), propagate 3D covariance + // matrix gradients to scale and rotation. + preprocessCUDABatched << < tile_grid, ONE_DIM_BLOCK_SIZE >> > ( + num_viewpoints, + P, D, M, + (float3*)means3D, + radii, + shs, + clamped, + (glm::vec3*)scales, + (glm::vec4*)rotations, + scale_modifier, + projmatrix, + campos, + (float3*)dL_dmean2D, + (glm::vec3*)dL_dmean3D, + dL_dcolor, + dL_dcov3D, + dL_dsh, + dL_dscale, + dL_drot); } \ No newline at end of file diff --git a/cuda_rasterizer/backward.h b/cuda_rasterizer/backward.h index 1f1a790..62820c0 100644 --- a/cuda_rasterizer/backward.h +++ b/cuda_rasterizer/backward.h @@ -61,6 +61,31 @@ namespace BACKWARD float* dL_dsh, glm::vec3* dL_dscale, glm::vec4* dL_drot); + + void preprocess_batch( + const int num_viewpoints, + const int P, const int D, const int M, + const float3* means, + const int* radii, + const float* shs, + const bool* clamped, + const glm::vec3* scales, + const glm::vec4* rotations, + const float scale_modifier, + const float* cov3Ds, + const float* view, + const float* proj, + const float* tan_fovx, const float* tan_fovy, + const glm::vec3* campos, + const float3* dL_dmean2D, + const float* dL_dconics, + glm::vec3* dL_dmeans, + float* dL_dcolor, + float* dL_dcov3D, + float* dL_dsh, + glm::vec3* dL_dscale, + glm::vec4* dL_drot + ); } #endif \ No newline at end of file diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index 06e95db..c0a2461 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -583,6 +583,69 @@ void CudaRasterizer::Rasterizer::preprocessBackward( } +void CudaRasterizer::Rasterizer::preprocessBackwardBatches( + const int num_viewpoints, + const int* radii, + const float* cov3D, + const bool* clamped,//the above are all per-Gaussian intemediate results. + const int P, int D, int M, int R, + const int width, int height,//rasterization setting. + const float* means3D, + const float* scales, + const float* rotations, + const float* shs,//input of this operator + const float scale_modifier, + const float* viewmatrix, + const float* projmatrix, + const float* campos, + const float* tan_fovx, const float* tan_fovy,//rasterization setting. + const float* dL_dmean2D, + const float* dL_dconic, + float* dL_dcolor,//gradients of output of this operator. TODO: dL_dcolor is not const here because low-level implementation does not use const. Even though, we never modify it. + float* dL_dmean3D, + float* dL_dcov3D, + float* dL_dscale, + float* dL_drot, + float* dL_dsh,//gradients of input of this operator + bool debug, + const pybind11::dict &args) +{ + auto [global_rank, world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs(args); + + MyTimerOnGPU timer; + + const float* cov3D_ptr = cov3D; + timer.start("b20 preprocess"); + CHECK_CUDA(BACKWARD::preprocess_batch( + num_viewpoints, + P, D, M, + (float3*)means3D, + radii, + shs, + clamped, + (glm::vec3*)scales, + (glm::vec4*)rotations, + scale_modifier, + cov3D_ptr, + viewmatrix, + projmatrix, + tan_fovx, tan_fovy, + (glm::vec3*)campos, + (float3*)dL_dmean2D, + dL_dconic, + (glm::vec3*)dL_dmean3D, + dL_dcolor, + dL_dcov3D, + dL_dsh, + (glm::vec3*)dL_dscale, + (glm::vec4*)dL_drot), debug) + timer.stop("b20 preprocess"); + + // Print out timing information + if (zhx_time && iteration % log_interval == 1) { + timer.printAllTimes(iteration, world_size, global_rank, log_folder, false); + } +} diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index f7860b4..d45e97e 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -140,7 +140,10 @@ def backward(ctx, grad_means2D, grad_rgb, grad_conic_opacity, grad_radii, grad_d raster_settings.debug, cuda_args) - dL_dmeans3D, dL_dscales, dL_drotations, dL_dsh, dL_dopacity = _C.preprocess_gaussians_backward(*args) + if not torch.is_tensor(raster_settings.tanfovx): + dL_dmeans3D, dL_dscales, dL_drotations, dL_dsh, dL_dopacity = _C.preprocess_gaussians_backward(*args) + else: + dL_dmeans3D, dL_dscales, dL_drotations, dL_dsh, dL_dopacity = _C.preprocess_gaussians_backward_batched(*args) grads = ( dL_dmeans3D.contiguous(), diff --git a/ext.cpp b/ext.cpp index e4249bb..4a39895 100644 --- a/ext.cpp +++ b/ext.cpp @@ -3,7 +3,7 @@ * GRAPHDECO research group, https://team.inria.fr/graphdeco * All rights reserved. * - * This software is free for non-commercial, research and evaluation use + * This software is free for non-commercial, research and evaluation use * under the terms of the LICENSE.md file. * * For inquiries contact george.drettakis@inria.fr @@ -13,22 +13,24 @@ #include "rasterize_points.h" #include "cuda_rasterizer/config.h" -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("mark_visible", &markVisible); - m.def("preprocess_gaussians", &PreprocessGaussiansCUDA); - m.def("preprocess_gaussians_batched", &PreprocessGaussiansCUDABatches); - m.def("preprocess_gaussians_backward", &PreprocessGaussiansBackwardCUDA); - m.def("get_distribution_strategy", &GetDistributionStrategyCUDA); - m.def("render_gaussians", &RenderGaussiansCUDA); - m.def("render_gaussians_backward", &RenderGaussiansBackwardCUDA); - m.def("get_local2j_ids_bool", &GetLocal2jIdsBoolCUDA); - m.def("get_local2j_ids_bool_adjust_mode6", &GetLocal2jIdsBoolAdjustMode6CUDA); +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) +{ + m.def("mark_visible", &markVisible); + m.def("preprocess_gaussians", &PreprocessGaussiansCUDA); + m.def("preprocess_gaussians_batched", &PreprocessGaussiansCUDABatches); + m.def("preprocess_gaussians_backward", &PreprocessGaussiansBackwardCUDA); + m.def("preprocess_gaussians_backward_batched", &PreprocessGaussiansBackwardCUDABatches); + m.def("get_distribution_strategy", &GetDistributionStrategyCUDA); + m.def("render_gaussians", &RenderGaussiansCUDA); + m.def("render_gaussians_backward", &RenderGaussiansBackwardCUDA); + m.def("get_local2j_ids_bool", &GetLocal2jIdsBoolCUDA); + m.def("get_local2j_ids_bool_adjust_mode6", &GetLocal2jIdsBoolAdjustMode6CUDA); - // Image Distribution Utilities - m.def("get_touched_locally", &GetTouchedLocally); - m.def("load_image_tiles_by_pos", &LoadImageTilesByPos); - m.def("set_image_tiles_by_pos", &SetImageTilesByPos); - m.def("get_pixels_compute_locally_and_in_rect", &GetPixelsComputeLocallyAndInRect); + // Image Distribution Utilities + m.def("get_touched_locally", &GetTouchedLocally); + m.def("load_image_tiles_by_pos", &LoadImageTilesByPos); + m.def("set_image_tiles_by_pos", &SetImageTilesByPos); + m.def("get_pixels_compute_locally_and_in_rect", &GetPixelsComputeLocallyAndInRect); - m.def("get_block_XY", &GetBlockXY); + m.def("get_block_XY", &GetBlockXY); } \ No newline at end of file diff --git a/rasterize_points.cu b/rasterize_points.cu index e8eb8a7..f391052 100644 --- a/rasterize_points.cu +++ b/rasterize_points.cu @@ -311,6 +311,97 @@ std::tuple +PreprocessGaussiansBackwardCUDABatches( + const torch::Tensor &radii, + const torch::Tensor &cov3D, + const torch::Tensor &clamped, // the above are all per-Gaussian intemediate results. + const torch::Tensor &means3D, + const torch::Tensor &scales, + const torch::Tensor &rotations, + const torch::Tensor &sh, // input of this operator + const float scale_modifier, + const torch::Tensor &viewmatrix, + const torch::Tensor &projmatrix, + const torch::Tensor &tan_fovx, + const torch::Tensor &tan_fovy, + const int image_height, + const int image_width, + const int degree, + const torch::Tensor &campos, // rasterization setting. + const torch::Tensor &dL_dmeans2D, + const torch::Tensor &dL_dconic_opacity, + const torch::Tensor &dL_dcolors, // gradients of output of this operator + const int R, + const bool debug, + const pybind11::dict &args) +{ + const int P = means3D.size(0); + const int H = image_height; + const int W = image_width; + const int num_viewpoints = viewmatrix.size(0); + + int M = 0; + if(sh.size(0) != 0) + { + M = sh.size(1); + } + + torch::Tensor dL_dconic = torch::zeros({num_viewpoints, P, 2, 2}, means3D.options()); + // set dL_dconic[..., 0, 0] = dL_dconic_opacity[..., 0] + dL_dconic.select(2, 0).select(2, 0).copy_(dL_dconic_opacity.select(2, 0)); + // set dL_dconic[..., 0, 1] = dL_dconic_opacity[..., 1] + dL_dconic.select(2, 0).select(2, 1).copy_(dL_dconic_opacity.select(2, 1)); + // set dL_dconic[..., 1, 1] = dL_dconic_opacity[..., 2] + dL_dconic.select(2, 1).select(2, 1).copy_(dL_dconic_opacity.select(2, 2)); + dL_dconic = dL_dconic.contiguous(); + //TODO: is this correct usage? + + torch::Tensor dL_dopacity = torch::zeros({num_viewpoints, P, 1}, means3D.options()); + // set dL_dopacity[..., 0] = dL_dconic_opacity[..., 3] + dL_dopacity.select(2, 0).copy_(dL_dconic_opacity.select(2, 3)); + dL_dopacity = dL_dopacity.contiguous(); + + torch::Tensor dL_dmeans3D = torch::zeros({num_viewpoints, P, 3}, means3D.options()); + torch::Tensor dL_dcov3D = torch::zeros({num_viewpoints, P, 6}, means3D.options()); + //dL_dcov3D is itermidiate result to compute dL_drotations and dL_dscales, do not need to return to python. + torch::Tensor dL_dscales = torch::zeros({num_viewpoints, P, 3}, means3D.options()); + torch::Tensor dL_drotations = torch::zeros({num_viewpoints, P, 4}, means3D.options()); + torch::Tensor dL_dsh = torch::zeros({num_viewpoints, P, M, 3}, means3D.options()); + + if(P != 0) + { + CudaRasterizer::Rasterizer::preprocessBackwardBatches( + num_viewpoints, + radii.contiguous().data(), + cov3D.contiguous().data(), + clamped.contiguous().data(),//the above are all per-Gaussian intermediate results. + P, degree, M, R, + W, H, //rasterization setting. + means3D.contiguous().data(), + scales.data_ptr(), + rotations.data_ptr(), + sh.contiguous().data(),//input of this operator + scale_modifier, + viewmatrix.contiguous().data(), + projmatrix.contiguous().data(), + campos.contiguous().data(), + tan_fovx.contiguous().data(), + tan_fovy.contiguous().data(),,//rasterization setting. + dL_dmeans2D.contiguous().data(), + dL_dconic.contiguous().data(), + dL_dcolors.contiguous().data(),//gradients of output of this operator + dL_dmeans3D.contiguous().data(), + dL_dcov3D.contiguous().data(), + dL_dscales.contiguous().data(), + dL_drotations.contiguous().data(), + dL_dsh.contiguous().data(),//gradients of input of this operator + debug, + args); + } + + return std::make_tuple(dL_dmeans3D, dL_dscales, dL_drotations, dL_dsh, dL_dopacity); +} ////////////////////// GetDistributionStrategy //////////////////////// diff --git a/rasterize_points.h b/rasterize_points.h index 3700126..b1d65e9 100644 --- a/rasterize_points.h +++ b/rasterize_points.h @@ -3,7 +3,7 @@ * GRAPHDECO research group, https://team.inria.fr/graphdeco * All rights reserved. * - * This software is free for non-commercial, research and evaluation use + * This software is free for non-commercial, research and evaluation use * under the terms of the LICENSE.md file. * * For inquiries contact george.drettakis@inria.fr @@ -14,198 +14,202 @@ #include #include #include - - -torch::Tensor markVisible( - torch::Tensor& means3D, - torch::Tensor& viewmatrix, - torch::Tensor& projmatrix); - - +torch::Tensor markVisible( + torch::Tensor &means3D, + torch::Tensor &viewmatrix, + torch::Tensor &projmatrix); /////////////////////////////// Preprocess /////////////////////////////// - - - std::tuple PreprocessGaussiansCUDA( - const torch::Tensor& means3D, - const torch::Tensor& scales, - const torch::Tensor& rotations, - const torch::Tensor& sh, - const torch::Tensor& opacity,//3dgs' parametes. - const float scale_modifier, - const torch::Tensor& viewmatrix, - const torch::Tensor& projmatrix, - const float tan_fovx, - const float tan_fovy, + const torch::Tensor &means3D, + const torch::Tensor &scales, + const torch::Tensor &rotations, + const torch::Tensor &sh, + const torch::Tensor &opacity, // 3dgs' parametes. + const float scale_modifier, + const torch::Tensor &viewmatrix, + const torch::Tensor &projmatrix, + const float tan_fovx, + const float tan_fovy, const int image_height, const int image_width, - const int degree, - const torch::Tensor& campos, - const bool prefiltered,//raster_settings - const bool debug, - const pybind11::dict &args); + const int degree, + const torch::Tensor &campos, + const bool prefiltered, // raster_settings + const bool debug, + const pybind11::dict &args); + +std::tuple +PreprocessGaussiansBackwardCUDA( + const torch::Tensor &radii, + const torch::Tensor &cov3D, + const torch::Tensor &clamped, // the above are all per-Gaussian intemediate results. + const torch::Tensor &means3D, + const torch::Tensor &scales, + const torch::Tensor &rotations, + const torch::Tensor &sh, // input of this operator + const float scale_modifier, + const torch::Tensor &viewmatrix, + const torch::Tensor &projmatrix, + const float tan_fovx, + const float tan_fovy, + const int image_height, + const int image_width, + const int degree, + const torch::Tensor &campos, // rasterization setting. + const torch::Tensor &dL_dmeans2D, + const torch::Tensor &dL_dconic_opacity, + const torch::Tensor &dL_dcolors, // gradients of output of this operator + const int R, + const bool debug, + const pybind11::dict &args); std::tuple PreprocessGaussiansCUDABatches( - const torch::Tensor& means3D, - const torch::Tensor& scales, - const torch::Tensor& rotations, - const torch::Tensor& sh, - const torch::Tensor& opacity,//3dgs' parametes. - const float scale_modifier, - const torch::Tensor& viewmatrix, - const torch::Tensor& projmatrix, - const torch::Tensor& tan_fovx, - const torch::Tensor& tan_fovy, + const torch::Tensor &means3D, + const torch::Tensor &scales, + const torch::Tensor &rotations, + const torch::Tensor &sh, + const torch::Tensor &opacity, // 3dgs' parametes. + const float scale_modifier, + const torch::Tensor &viewmatrix, + const torch::Tensor &projmatrix, + const torch::Tensor &tan_fovx, + const torch::Tensor &tan_fovy, const int image_height, const int image_width, - const int degree, - const torch::Tensor& campos, - const bool prefiltered,//raster_settings - const bool debug, - const pybind11::dict &args); + const int degree, + const torch::Tensor &campos, + const bool prefiltered, // raster_settings + const bool debug, + const pybind11::dict &args); std::tuple - PreprocessGaussiansBackwardCUDA( - const torch::Tensor& radii, - const torch::Tensor& cov3D, - const torch::Tensor& clamped,//the above are all per-Gaussian intemediate results. - const torch::Tensor& means3D, - const torch::Tensor& scales, - const torch::Tensor& rotations, - const torch::Tensor& sh,//input of this operator - const float scale_modifier, - const torch::Tensor& viewmatrix, - const torch::Tensor& projmatrix, - const float tan_fovx, - const float tan_fovy, +PreprocessGaussiansBackwardCUDABatches( + const torch::Tensor &radii, + const torch::Tensor &cov3D, + const torch::Tensor &clamped, // the above are all per-Gaussian intemediate results. + const torch::Tensor &means3D, + const torch::Tensor &scales, + const torch::Tensor &rotations, + const torch::Tensor &sh, // input of this operator + const float scale_modifier, + const torch::Tensor &viewmatrix, + const torch::Tensor &projmatrix, + const torch::Tensor &tan_fovx, + const torch::Tensor &tan_fovy, const int image_height, const int image_width, - const int degree, - const torch::Tensor& campos,//rasterization setting. - const torch::Tensor& dL_dmeans2D, - const torch::Tensor& dL_dconic_opacity, - const torch::Tensor& dL_dcolors,//gradients of output of this operator - const int R, - const bool debug, - const pybind11::dict &args); - + const int degree, + const torch::Tensor &campos, // rasterization setting. + const torch::Tensor &dL_dmeans2D, + const torch::Tensor &dL_dconic_opacity, + const torch::Tensor &dL_dcolors, // gradients of output of this operator + const int R, + const bool debug, + const pybind11::dict &args); ////////////////////// GetDistributionStrategy //////////////////////// torch::Tensor GetDistributionStrategyCUDA( const int image_height, - const int image_width,// image setting - torch::Tensor& means2D,// (P, 2) - torch::Tensor& radii, - const bool debug, - const pybind11::dict &args); - - - + const int image_width, // image setting + torch::Tensor &means2D, // (P, 2) + torch::Tensor &radii, + const bool debug, + const pybind11::dict &args); ////////////////////// Image Distribution Utilities //////////////////////// torch::Tensor GetTouchedLocally( - const torch::Tensor& compute_locally, - const int image_height, - const int image_width, - const int extension_distance -); + const torch::Tensor &compute_locally, + const int image_height, + const int image_width, + const int extension_distance); torch::Tensor LoadImageTilesByPos( - const torch::Tensor& local_image_rect, - const torch::Tensor& all_tiles_pos, - int image_height, - int image_width, - int min_pixel_y, - int min_pixel_x, - int local_image_rect_height, - int local_image_rect_width); + const torch::Tensor &local_image_rect, + const torch::Tensor &all_tiles_pos, + int image_height, + int image_width, + int min_pixel_y, + int min_pixel_x, + int local_image_rect_height, + int local_image_rect_width); torch::Tensor SetImageTilesByPos( - const torch::Tensor& all_tiles_pos, - const torch::Tensor& image_tiles, - int image_height, - int image_width, - int min_pixel_y, - int min_pixel_x, - int local_image_rect_height, - int local_image_rect_width); + const torch::Tensor &all_tiles_pos, + const torch::Tensor &image_tiles, + int image_height, + int image_width, + int min_pixel_y, + int min_pixel_x, + int local_image_rect_height, + int local_image_rect_width); torch::Tensor GetPixelsComputeLocallyAndInRect( - const torch::Tensor& compute_locally, - int image_height, - int image_width, - int min_pixel_y, - int max_pixel_y, - int min_pixel_x, - int max_pixel_x); - - - + const torch::Tensor &compute_locally, + int image_height, + int image_width, + int min_pixel_y, + int max_pixel_y, + int min_pixel_x, + int max_pixel_x); /////////////////////////////// Render /////////////////////////////// - std::tuple RenderGaussiansCUDA( - const torch::Tensor& background, + const torch::Tensor &background, const int image_height, - const int image_width,// image setting - torch::Tensor& means2D, - torch::Tensor& depths, - torch::Tensor& radii, - torch::Tensor& conic_opacity, - torch::Tensor& rgb,//3dgs intermediate results - const torch::Tensor& compute_locally, - const bool debug, - const pybind11::dict &args); + const int image_width, // image setting + torch::Tensor &means2D, + torch::Tensor &depths, + torch::Tensor &radii, + torch::Tensor &conic_opacity, + torch::Tensor &rgb, // 3dgs intermediate results + const torch::Tensor &compute_locally, + const bool debug, + const pybind11::dict &args); std::tuple RenderGaussiansBackwardCUDA( - const torch::Tensor& background, - const int R, - const torch::Tensor& geomBuffer, - const torch::Tensor& binningBuffer, - const torch::Tensor& imageBuffer, - const torch::Tensor& compute_locally, - const torch::Tensor& dL_dout_color, - const torch::Tensor& means2D, - const torch::Tensor& conic_opacity, - const torch::Tensor& rgb, - const bool debug, - const pybind11::dict &args); - - + const torch::Tensor &background, + const int R, + const torch::Tensor &geomBuffer, + const torch::Tensor &binningBuffer, + const torch::Tensor &imageBuffer, + const torch::Tensor &compute_locally, + const torch::Tensor &dL_dout_color, + const torch::Tensor &means2D, + const torch::Tensor &conic_opacity, + const torch::Tensor &rgb, + const bool debug, + const pybind11::dict &args); /////////////////////////////// Utility tools /////////////////////////////// - - torch::Tensor GetLocal2jIdsBoolCUDA( - int image_height, - int image_width, - int mp_rank, - int mp_world_size, - const torch::Tensor& means2D, - const torch::Tensor& radii, - const torch::Tensor& dist_global_strategy, - const pybind11::dict &args); + int image_height, + int image_width, + int mp_rank, + int mp_world_size, + const torch::Tensor &means2D, + const torch::Tensor &radii, + const torch::Tensor &dist_global_strategy, + const pybind11::dict &args); torch::Tensor GetLocal2jIdsBoolAdjustMode6CUDA( - int image_height, - int image_width, - int mp_rank, - int mp_world_size, - const torch::Tensor& means2D, - const torch::Tensor& radii, - const torch::Tensor& rectangles, - const pybind11::dict &args); - + int image_height, + int image_width, + int mp_rank, + int mp_world_size, + const torch::Tensor &means2D, + const torch::Tensor &radii, + const torch::Tensor &rectangles, + const pybind11::dict &args); std::tuple GetBlockXY(); \ No newline at end of file From 6444aa71df624d333a0f2e648565719cedd52d09 Mon Sep 17 00:00:00 2001 From: prapti19 Date: Sat, 27 Apr 2024 12:29:40 -0400 Subject: [PATCH 02/34] solve syntax errors in backward --- cuda_rasterizer/backward.cu | 1 + cuda_rasterizer/backward.h | 1 + cuda_rasterizer/rasterizer.h | 28 ++++++++++++++++++++++++++++ cuda_rasterizer/rasterizer_impl.cu | 3 +++ rasterize_points.cu | 2 +- 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 18e04f7..1239216 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -908,6 +908,7 @@ void BACKWARD::preprocess_batch( const float* cov3Ds, const float* viewmatrix, const float* projmatrix, + const float focal_x, const float focal_y, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, diff --git a/cuda_rasterizer/backward.h b/cuda_rasterizer/backward.h index 62820c0..7dcb932 100644 --- a/cuda_rasterizer/backward.h +++ b/cuda_rasterizer/backward.h @@ -75,6 +75,7 @@ namespace BACKWARD const float* cov3Ds, const float* view, const float* proj, + const float focal_x, const float focal_y, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, diff --git a/cuda_rasterizer/rasterizer.h b/cuda_rasterizer/rasterizer.h index b7f93fd..f3a5220 100644 --- a/cuda_rasterizer/rasterizer.h +++ b/cuda_rasterizer/rasterizer.h @@ -115,6 +115,34 @@ namespace CudaRasterizer float* dL_dsh,//gradients of input of this operator bool debug, const pybind11::dict &args); + + + static void preprocessBackwardBatches( + const int num_viewpoints, + const int* radii, + const float* cov3D, + const bool* clamped, + const int P, int D, int M, int R, + const int width, int height, + const float* means3D, + const float* scales, + const float* rotations, + const float* shs, + const float scale_modifier, + const float* viewmatrix, + const float* projmatrix, + const float* campos, + const float* tan_fovx, const float* tan_fovy, + const float* dL_dmean2D, + const float* dL_dconic, + float* dL_dcolor, + float* dL_dmean3D, + float* dL_dcov3D, + float* dL_dscale, + float* dL_drot, + float* dL_dsh, + bool debug, + const pybind11::dict &args); ////////////////////// GetDistributionStrategy //////////////////////// diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index c0a2461..955e129 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -613,6 +613,8 @@ void CudaRasterizer::Rasterizer::preprocessBackwardBatches( auto [global_rank, world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs(args); MyTimerOnGPU timer; + const float focal_y = height / (2.0f * tan_fovy[0]);//cureently hardcoded + const float focal_x = width / (2.0f * tan_fovx[0]); const float* cov3D_ptr = cov3D; timer.start("b20 preprocess"); @@ -629,6 +631,7 @@ void CudaRasterizer::Rasterizer::preprocessBackwardBatches( cov3D_ptr, viewmatrix, projmatrix, + focal_x, focal_y, tan_fovx, tan_fovy, (glm::vec3*)campos, (float3*)dL_dmean2D, diff --git a/rasterize_points.cu b/rasterize_points.cu index f391052..b7814fa 100644 --- a/rasterize_points.cu +++ b/rasterize_points.cu @@ -387,7 +387,7 @@ PreprocessGaussiansBackwardCUDABatches( projmatrix.contiguous().data(), campos.contiguous().data(), tan_fovx.contiguous().data(), - tan_fovy.contiguous().data(),,//rasterization setting. + tan_fovy.contiguous().data(),//rasterization setting. dL_dmeans2D.contiguous().data(), dL_dconic.contiguous().data(), dL_dcolors.contiguous().data(),//gradients of output of this operator From 8df1b631b0bc6159d5d2ae60ea2dc146cb587db5 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 20:23:47 -0400 Subject: [PATCH 03/34] Refactor GaussianRasterizationSettings class to handle raster_settings as a batch --- diff_gaussian_rasterization/__init__.py | 23 +++++- rasterization_tests.py | 100 +++++++----------------- 2 files changed, 47 insertions(+), 76 deletions(-) diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index d45e97e..9339ca8 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -59,6 +59,19 @@ def forward( ): # Restructure arguments the way that the C++ lib expects them + if isinstance(raster_settings, list): + rs = raster_settings[0] + rs.viewmatrix, rs.projmatrix, rs.campos = [ + torch.stack(tensors) for tensors in zip( + *[(rs.viewmatrix, rs.projmatrix, rs.campos) for rs in raster_settings] + ) + ] + rs.tanfovx, rs.tanfovy = [ + torch.tensor(vals, device=means3D.device) + for vals in zip(*[(rs.tanfovx, rs.tanfovy) for rs in raster_settings]) + ] + raster_settings = rs + args = ( means3D, scales, @@ -317,15 +330,17 @@ class GaussianRasterizationSettings(NamedTuple): debug : bool class GaussianRasterizerBatches(nn.Module): - def __init__(self, raster_settings): + def __init__(self, raster_settings_batch): super().__init__() - self.raster_settings_list = raster_settings + self.raster_settings_batch = raster_settings_batch def markVisible(self, positions): # Mark visible points (based on frustum culling for camera) with a boolean with torch.no_grad(): visible = [] - for viewmatrix, projmatrix in zip(self.raster_settings.viewmatrix, self.raster_settings.projmatrix): + for raster_settings in self.raster_settings_batch: + viewmatrix = raster_settings.viewmatrix + projmatrix = raster_settings.projmatrix visible.append(_C.mark_visible(positions, viewmatrix, projmatrix)) return visible @@ -338,7 +353,7 @@ def preprocess_gaussians(self, means3D, scales, rotations, shs, opacities, batch rotations, shs, opacities, - self.raster_settings_list, + self.raster_settings_batch, batched_cuda_args) class GaussianRasterizer(nn.Module): diff --git a/rasterization_tests.py b/rasterization_tests.py index afea6b9..2452cbf 100644 --- a/rasterization_tests.py +++ b/rasterization_tests.py @@ -16,6 +16,7 @@ rotations = torch.randn(num_gaussians, 4).cuda() shs = torch.randn(num_gaussians, 16, 3).cuda() opacity = torch.randn(num_gaussians, 1).cuda() +SH_ACTIVE_DEGREE = 3 def get_cuda_args(strategy, mode="train"): cuda_args = { @@ -36,52 +37,6 @@ def get_cuda_args(strategy, mode="train"): } return cuda_args -def test_gaussian_rasterizer_time(): - # Set up the input data - num_gaussians = 10000 - means3D = torch.randn(num_gaussians, 3).cuda() - scales = torch.randn(num_gaussians, 3).cuda() - rotations = torch.randn(num_gaussians, 3, 3).cuda() - shs = torch.randn(num_gaussians, 9).cuda() - opacities = torch.randn(num_gaussians, 1).cuda() - - # Set up the rasterization settings - image_height = 512 - image_width = 512 - tanfovx = 1.0 - tanfovy = 1.0 - bg = torch.ones(3).cuda() - scale_modifier = 1.0 - viewmatrix = torch.eye(4).cuda() - projmatrix = torch.eye(4).cuda() - sh_degree = 2 - campos = torch.zeros(3).cuda() - prefiltered = False - debug = False - - # mode="train" - # strategy=None - # cuda_args = get_cuda_args(strategy, mode) - - raster_settings = GaussianRasterizationSettings( - image_height, image_width, tanfovx, tanfovy, bg, - scale_modifier, viewmatrix, projmatrix, sh_degree, - campos, prefiltered, debug - ) - - # Create the GaussianRasterizer - rasterizer = GaussianRasterizer(raster_settings) - - # Measure the time for preprocess_gaussians - start_time = time.time() - means2D, rgb, conic_opacity, radii, depths = rasterizer.preprocess_gaussians( - means3D, scales, rotations, shs, opacities - ) - end_time = time.time() - - preprocess_time = end_time - start_time - print(f"Time taken by preprocess_gaussians: {preprocess_time:.4f} seconds") - def test_batched_gaussian_rasterizer(): # Set up the viewpoint cameras @@ -104,7 +59,7 @@ def test_batched_gaussian_rasterizer(): bg_color = torch.ones(3).cuda() scaling_modifier = 1.0 pc = type('PC', (), {}) - pc.active_sh_degree = 3 + pc.active_sh_degree = SH_ACTIVE_DEGREE pipe = type('Pipe', (), {}) pipe.debug = False mode = "train" @@ -207,39 +162,40 @@ def test_batched_gaussian_rasterizer_batch_processing(): bg_color = torch.ones(3).cuda() scaling_modifier = 1.0 pc = type('PC', (), {}) - pc.active_sh_degree = 3 + pc.active_sh_degree = SH_ACTIVE_DEGREE pipe = type('Pipe', (), {}) pipe.debug = False mode = "train" # Set up rasterization configuration for the batch - batched_tanfovx = torch.tensor([math.tan(camera.FoVx * 0.5) for camera in batched_viewpoint_cameras]).cuda() - batched_tanfovy = torch.tensor([math.tan(camera.FoVy * 0.5) for camera in batched_viewpoint_cameras]).cuda() - batched_viewmatrix = torch.stack([camera.world_view_transform for camera in batched_viewpoint_cameras]).cuda() - batched_projmatrix = torch.stack([camera.full_proj_transform for camera in batched_viewpoint_cameras]).cuda() - batched_campos = torch.stack([camera.camera_center for camera in batched_viewpoint_cameras]).cuda() - - batched_raster_settings = GaussianRasterizationSettings( - image_height=int(batched_viewpoint_cameras[0].image_height), - image_width=int(batched_viewpoint_cameras[0].image_width), - tanfovx=batched_tanfovx, - tanfovy=batched_tanfovy, - bg=bg_color, - scale_modifier=scaling_modifier, - viewmatrix=batched_viewmatrix, - projmatrix=batched_projmatrix, - sh_degree=pc.active_sh_degree, - campos=batched_campos, - prefiltered=False, - debug=pipe.debug - ) + batched_raster_settings = [] + batched_cuda_args = [] + for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): + ########## [START] Prepare CUDA Rasterization Settings ########## + cuda_args = get_cuda_args(strategy, mode) + batched_cuda_args.append(cuda_args) + tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) + tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) + + raster_settings = GaussianRasterizationSettings( + image_height=int(batched_viewpoint_cameras[0].image_height), + image_width=int(batched_viewpoint_cameras[0].image_width), + tanfovx=tanfovx, + tanfovy=tanfovy, + bg=bg_color, + scale_modifier=scaling_modifier, + viewmatrix=viewpoint_camera.world_view_transform, + projmatrix=viewpoint_camera.full_proj_transform, + sh_degree=pc.active_sh_degree, + campos=viewpoint_camera.camera_center, + prefiltered=False, + debug=pipe.debug + ) + batched_raster_settings.append(raster_settings) # Create the GaussianRasterizer for the batch rasterizer = GaussianRasterizerBatches(raster_settings=batched_raster_settings) - # Set up CUDA arguments for the batch - cuda_args = get_cuda_args(batched_strategies[0], mode) # TODO: Check if this is correct for the batch - # Preprocess the Gaussians for the entire batch batched_means2D, batched_rgb, batched_conic_opacity, batched_radii, batched_depths = rasterizer.preprocess_gaussians( means3D=means3D, @@ -247,7 +203,7 @@ def test_batched_gaussian_rasterizer_batch_processing(): rotations=rotations, shs=shs, opacities=opacity, - batched_cuda_args=cuda_args + batched_cuda_args=batched_cuda_args ) end_time = time.time() preprocess_time = end_time - start_time From 529710b2f3adc14e992536e2a4470082816c2da9 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 21:22:39 -0400 Subject: [PATCH 04/34] added focal_x and focal_y calculation inside the kernel --- cuda_rasterizer/backward.cu | 5 ++--- cuda_rasterizer/backward.h | 1 - cuda_rasterizer/rasterizer.h | 1 - cuda_rasterizer/rasterizer_impl.cu | 3 --- 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 1239216..fb63442 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -317,6 +317,8 @@ __global__ void computeCov2DCUDABatched( const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1; const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1; + const float h_x = W / (2.0f * tan_fovx[viewpoint_idx]); + const float h_y = H / (2.0f * tan_fovy[viewpoint_idx]); glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z), 0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z), 0, 0, 0); @@ -908,7 +910,6 @@ void BACKWARD::preprocess_batch( const float* cov3Ds, const float* viewmatrix, const float* projmatrix, - const float focal_x, const float focal_y, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, @@ -932,8 +933,6 @@ void BACKWARD::preprocess_batch( means3D, radii, cov3Ds, - focal_x, - focal_y, tan_fovx, tan_fovy, viewmatrix, diff --git a/cuda_rasterizer/backward.h b/cuda_rasterizer/backward.h index 7dcb932..62820c0 100644 --- a/cuda_rasterizer/backward.h +++ b/cuda_rasterizer/backward.h @@ -75,7 +75,6 @@ namespace BACKWARD const float* cov3Ds, const float* view, const float* proj, - const float focal_x, const float focal_y, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, diff --git a/cuda_rasterizer/rasterizer.h b/cuda_rasterizer/rasterizer.h index f3a5220..64a3b91 100644 --- a/cuda_rasterizer/rasterizer.h +++ b/cuda_rasterizer/rasterizer.h @@ -116,7 +116,6 @@ namespace CudaRasterizer bool debug, const pybind11::dict &args); - static void preprocessBackwardBatches( const int num_viewpoints, const int* radii, diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index 955e129..b76aa88 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -547,8 +547,6 @@ void CudaRasterizer::Rasterizer::preprocessBackward( auto [global_rank, world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs(args); MyTimerOnGPU timer; - const float focal_y = height / (2.0f * tan_fovy); - const float focal_x = width / (2.0f * tan_fovx); const float* cov3D_ptr = cov3D; timer.start("b20 preprocess"); @@ -563,7 +561,6 @@ void CudaRasterizer::Rasterizer::preprocessBackward( cov3D_ptr, viewmatrix, projmatrix, - focal_x, focal_y, tan_fovx, tan_fovy, (glm::vec3*)campos, (float3*)dL_dmean2D, From 3ac1ad3047525d028ba33cccf4d472f2e66490d8 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 20:33:22 -0400 Subject: [PATCH 05/34] Refactor rasterization_tests.py to use raster_settings_batch instead of batched_raster_settings --- rasterization_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rasterization_tests.py b/rasterization_tests.py index 2452cbf..f69cc6e 100644 --- a/rasterization_tests.py +++ b/rasterization_tests.py @@ -168,7 +168,7 @@ def test_batched_gaussian_rasterizer_batch_processing(): mode = "train" # Set up rasterization configuration for the batch - batched_raster_settings = [] + raster_settings_batch = [] batched_cuda_args = [] for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): ########## [START] Prepare CUDA Rasterization Settings ########## @@ -191,10 +191,10 @@ def test_batched_gaussian_rasterizer_batch_processing(): prefiltered=False, debug=pipe.debug ) - batched_raster_settings.append(raster_settings) + raster_settings_batch.append(raster_settings) # Create the GaussianRasterizer for the batch - rasterizer = GaussianRasterizerBatches(raster_settings=batched_raster_settings) + rasterizer = GaussianRasterizerBatches(raster_settings_batch=raster_settings_batch) # Preprocess the Gaussians for the entire batch batched_means2D, batched_rgb, batched_conic_opacity, batched_radii, batched_depths = rasterizer.preprocess_gaussians( From ce314e24774ab17b5f28589bc4f4cedaa3a3bb76 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 20:45:00 -0400 Subject: [PATCH 06/34] fixed namedtuple setting bug --- diff_gaussian_rasterization/__init__.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index 9339ca8..de0ff28 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -60,18 +60,23 @@ def forward( # Restructure arguments the way that the C++ lib expects them if isinstance(raster_settings, list): - rs = raster_settings[0] - rs.viewmatrix, rs.projmatrix, rs.campos = [ + viewmatrix, projmatrix, campos = [ torch.stack(tensors) for tensors in zip( *[(rs.viewmatrix, rs.projmatrix, rs.campos) for rs in raster_settings] ) ] - rs.tanfovx, rs.tanfovy = [ + tanfovx, tanfovy = [ torch.tensor(vals, device=means3D.device) for vals in zip(*[(rs.tanfovx, rs.tanfovy) for rs in raster_settings]) ] - raster_settings = rs - + raster_settings = raster_settings[0]._replace( + tanfovx=tanfovx, + tanfovy=tanfovy, + viewmatrix=viewmatrix, + projmatrix=projmatrix, + campos=campos + ) + args = ( means3D, scales, From fdd3b4fe9dfa446dfaac5c9a2db90effb97147c1 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 20:53:21 -0400 Subject: [PATCH 07/34] Refactor GaussianRasterizationSettings class to handle raster_settings as a batch --- diff_gaussian_rasterization/__init__.py | 2 -- rasterization_tests.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index de0ff28..d61f274 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -99,10 +99,8 @@ def forward( # TODO: update this. if not torch.is_tensor(raster_settings.tanfovx): - print("normal batch calling") num_rendered, means2D, depths, radii, cov3D, conic_opacity, rgb, clamped = _C.preprocess_gaussians(*args) else: - print("improved batch calling") num_rendered, means2D, depths, radii, cov3D, conic_opacity, rgb, clamped = _C.preprocess_gaussians_batched(*args) # Keep relevant tensors for backward diff --git a/rasterization_tests.py b/rasterization_tests.py index f69cc6e..7501854 100644 --- a/rasterization_tests.py +++ b/rasterization_tests.py @@ -203,7 +203,7 @@ def test_batched_gaussian_rasterizer_batch_processing(): rotations=rotations, shs=shs, opacities=opacity, - batched_cuda_args=batched_cuda_args + batched_cuda_args=batched_cuda_args[0] #TODO: look into sending list of cuda_args/strategies ) end_time = time.time() preprocess_time = end_time - start_time From cdf3bc135716a56f79517492921b1cfe5b8def7b Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 21:42:22 -0400 Subject: [PATCH 08/34] remove focal_x and focal_y calculations --- cuda_rasterizer/rasterizer_impl.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index b76aa88..f1ca89f 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -610,8 +610,6 @@ void CudaRasterizer::Rasterizer::preprocessBackwardBatches( auto [global_rank, world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs(args); MyTimerOnGPU timer; - const float focal_y = height / (2.0f * tan_fovy[0]);//cureently hardcoded - const float focal_x = width / (2.0f * tan_fovx[0]); const float* cov3D_ptr = cov3D; timer.start("b20 preprocess"); @@ -628,7 +626,6 @@ void CudaRasterizer::Rasterizer::preprocessBackwardBatches( cov3D_ptr, viewmatrix, projmatrix, - focal_x, focal_y, tan_fovx, tan_fovy, (glm::vec3*)campos, (float3*)dL_dmean2D, From 591a5c1b215556c4d332bf51776efaa16a4c906f Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 21:50:15 -0400 Subject: [PATCH 09/34] Refactor CUDA rasterizer code to include width and height parameters in preprocess functions --- cuda_rasterizer/backward.cu | 4 +++- cuda_rasterizer/backward.h | 1 + cuda_rasterizer/rasterizer_impl.cu | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index fb63442..c0506e1 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -279,7 +279,7 @@ __global__ void computeCov2DCUDABatched( const float3* means, const int* radii, const float* cov3Ds, - const float h_x, float h_y, + const int W, const int H, const float* tan_fovx, const float* tan_fovy, const float* viewmatrix_arr, const float* dL_dconics, @@ -910,6 +910,7 @@ void BACKWARD::preprocess_batch( const float* cov3Ds, const float* viewmatrix, const float* projmatrix, + const int W, const int H, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, @@ -933,6 +934,7 @@ void BACKWARD::preprocess_batch( means3D, radii, cov3Ds, + W, H, tan_fovx, tan_fovy, viewmatrix, diff --git a/cuda_rasterizer/backward.h b/cuda_rasterizer/backward.h index 62820c0..33df626 100644 --- a/cuda_rasterizer/backward.h +++ b/cuda_rasterizer/backward.h @@ -75,6 +75,7 @@ namespace BACKWARD const float* cov3Ds, const float* view, const float* proj, + const int W, const int H, const float* tan_fovx, const float* tan_fovy, const glm::vec3* campos, const float3* dL_dmean2D, diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index f1ca89f..d0c9f62 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -626,6 +626,7 @@ void CudaRasterizer::Rasterizer::preprocessBackwardBatches( cov3D_ptr, viewmatrix, projmatrix, + width, height, tan_fovx, tan_fovy, (glm::vec3*)campos, (float3*)dL_dmean2D, From 1b54023cbee6bf175d969627e06c2e536c4be327 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 21:59:07 -0400 Subject: [PATCH 10/34] Renamed W and H to image_width and image_height parameters in preprocess functions --- cuda_rasterizer/backward.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index c0506e1..7121689 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -279,7 +279,7 @@ __global__ void computeCov2DCUDABatched( const float3* means, const int* radii, const float* cov3Ds, - const int W, const int H, + const int image_width, const int image_height, const float* tan_fovx, const float* tan_fovy, const float* viewmatrix_arr, const float* dL_dconics, @@ -317,8 +317,8 @@ __global__ void computeCov2DCUDABatched( const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1; const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1; - const float h_x = W / (2.0f * tan_fovx[viewpoint_idx]); - const float h_y = H / (2.0f * tan_fovy[viewpoint_idx]); + const float h_x = image_width / (2.0f * tan_fovx[viewpoint_idx]); + const float h_y = image_height / (2.0f * tan_fovy[viewpoint_idx]); glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z), 0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z), 0, 0, 0); From 9294a07ee87de22d553450d9d990c78f5e9ef911 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 22:00:13 -0400 Subject: [PATCH 11/34] reverted focal_x and focal_y removal in normal preprocessBackward --- cuda_rasterizer/rasterizer_impl.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index d0c9f62..2ff28ba 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -547,6 +547,8 @@ void CudaRasterizer::Rasterizer::preprocessBackward( auto [global_rank, world_size, iteration, log_interval, device, zhx_debug, zhx_time, mode, dist_division_mode, log_folder] = prepareArgs(args); MyTimerOnGPU timer; + const float focal_y = height / (2.0f * tan_fovy); + const float focal_x = width / (2.0f * tan_fovx); const float* cov3D_ptr = cov3D; timer.start("b20 preprocess"); @@ -561,6 +563,7 @@ void CudaRasterizer::Rasterizer::preprocessBackward( cov3D_ptr, viewmatrix, projmatrix, + focal_x, focal_y, tan_fovx, tan_fovy, (glm::vec3*)campos, (float3*)dL_dmean2D, From 877677fca222a203981919bd4c16a14f2031413e Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 27 Apr 2024 23:16:15 -0400 Subject: [PATCH 12/34] grad_means2D to handle more than 2 dimensions --- diff_gaussian_rasterization/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index d61f274..460da7c 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -129,7 +129,8 @@ def backward(ctx, grad_means2D, grad_rgb, grad_conic_opacity, grad_radii, grad_d # change dL_dmeans2D from (P, 2) to (P, 3) # grad_means2D is (P, 2) now. Need to pad it to (P, 3) because preprocess_gaussians_backward's cuda implementation. - grad_means2D_pad = torch.zeros((grad_means2D.shape[0], 1), dtype = grad_means2D.dtype, device = grad_means2D.device) + + grad_means2D_pad = torch.zeros_like(grad_means2D[..., :1], dtype = grad_means2D.dtype, device=grad_means2D.device) grad_means2D = torch.cat((grad_means2D, grad_means2D_pad), dim = 1).contiguous() # Restructure args as C++ method expects them From 444e8a5a62420cc6eb836a6755fddd23ec0007b1 Mon Sep 17 00:00:00 2001 From: prapti19 Date: Sat, 27 Apr 2024 23:44:24 -0400 Subject: [PATCH 13/34] add tests for backward --- rasterization_backward_tests.py | 297 ++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 rasterization_backward_tests.py diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py new file mode 100644 index 0000000..4203aaa --- /dev/null +++ b/rasterization_backward_tests.py @@ -0,0 +1,297 @@ +import math +import time + +import torch + +from diff_gaussian_rasterization import ( + GaussianRasterizationSettings, + GaussianRasterizer, + GaussianRasterizerBatches, +) + +num_gaussians = 10000 +num_batches=32 +means3D = torch.randn(num_gaussians, 3).cuda() +scales = torch.randn(num_gaussians, 3).cuda() +rotations = torch.randn(num_gaussians, 4).cuda() +shs = torch.randn(num_gaussians, 16, 3).cuda() +opacity = torch.randn(num_gaussians, 1).cuda() +SH_ACTIVE_DEGREE = 3 + +means3D.requires_grad = True +scales.requires_grad = True +rotations.requires_grad = True +shs.requires_grad = True +opacity.requires_grad = True + +def get_cuda_args(strategy, mode="train"): + cuda_args = { + "mode": mode, + "world_size": "1", + "global_rank": "0", + "local_rank": "0", + "mp_world_size": "1", + "mp_rank": "0", + "log_folder": "./logs", + "log_interval": "10", + "iteration": "0", + "zhx_debug": "False", + "zhx_time": "False", + "dist_global_strategy": "default", + "avoid_pixel_all2all": False, + "stats_collector": {}, + } + return cuda_args + + +def test_batched_gaussian_rasterizer(): + # Set up the viewpoint cameras + batched_viewpoint_cameras = [] + for _ in range(num_batches): + viewpoint_camera = type('ViewpointCamera', (), {}) + viewpoint_camera.FoVx = math.radians(60) + viewpoint_camera.FoVy = math.radians(60) + viewpoint_camera.image_height = 512 + viewpoint_camera.image_width = 512 + viewpoint_camera.world_view_transform = torch.eye(4).cuda() + viewpoint_camera.full_proj_transform = torch.eye(4).cuda() + viewpoint_camera.camera_center = torch.zeros(3).cuda() + batched_viewpoint_cameras.append(viewpoint_camera) + + # Set up the strategies + batched_strategies = [None] * num_batches + + # Set up other parameters + bg_color = torch.ones(3).cuda() + scaling_modifier = 1.0 + pc = type('PC', (), {}) + pc.active_sh_degree = SH_ACTIVE_DEGREE + pipe = type('Pipe', (), {}) + pipe.debug = False + mode = "train" + + batched_rasterizers = [] + batched_cuda_args = [] + batched_screenspace_params = [] + batched_means2D = [] + batched_radii = [] + batched_conic_opacity=[] + batched_depths=[] + batched_rgb=[] + + start_time = time.time() + + for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): + ########## [START] Prepare CUDA Rasterization Settings ########## + cuda_args = get_cuda_args(strategy, mode) + batched_cuda_args.append(cuda_args) + + # Set up rasterization configuration + tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) + tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) + raster_settings = GaussianRasterizationSettings( + image_height=int(viewpoint_camera.image_height), + image_width=int(viewpoint_camera.image_width), + tanfovx=tanfovx, + tanfovy=tanfovy, + bg=bg_color, + scale_modifier=scaling_modifier, + viewmatrix=viewpoint_camera.world_view_transform, + projmatrix=viewpoint_camera.full_proj_transform, + sh_degree=pc.active_sh_degree, + campos=viewpoint_camera.camera_center, + prefiltered=False, + debug=pipe.debug + ) + rasterizer = GaussianRasterizer(raster_settings=raster_settings) + ########## [END] Prepare CUDA Rasterization Settings ########## + + #[3DGS-wise preprocess] + means2D, rgb, conic_opacity, radii, depths = rasterizer.preprocess_gaussians( + means3D=means3D, + scales=scales, + rotations=rotations, + shs=shs, + opacities=opacity, + cuda_args=cuda_args + ) + + # TODO: make the below work + # if mode == "train": + # means2D.retain_grad() + + batched_means2D.append(means2D) + screenspace_params = [means2D, rgb, conic_opacity, radii, depths] + batched_rasterizers.append(rasterizer) + batched_screenspace_params.append(screenspace_params) + batched_radii.append(radii) + batched_rgb.append(rgb) + batched_conic_opacity.append(conic_opacity) + batched_depths.append(depths) + + + end_time = time.time() + preprocess_time = end_time - start_time + print(f"Time taken by test_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") + # Perform further operations with the batched results + # Test results and performance + + batched_means2D = torch.stack(batched_means2D, dim=0) + batched_radii = torch.stack(batched_radii, dim=0) + batched_conic_opacity=torch.stack(batched_conic_opacity,dim=0) + batched_rgb=torch.stack(batched_rgb,dim=0) + batched_depths=torch.stack(batched_depths,dim=0) + + + start_backward=time.time() + target_batched_means3d=torch.ones(means3D.shape).cuda() + loss = (means3D - target_batched_means3d).pow(2).mean() + loss.backward() + end_backward=time.time() + preproc_back=end_backward-start_backward + print(f"Time taken by test_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") + + return batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths,means3D.grad + + +def test_batched_gaussian_rasterizer_batch_processing(): + # Set up the input data + start_time = time.time() + # Set up the viewpoint cameras + batched_viewpoint_cameras = [] + for _ in range(num_batches): + viewpoint_camera = type('ViewpointCamera', (), {}) + viewpoint_camera.FoVx = math.radians(60) + viewpoint_camera.FoVy = math.radians(60) + viewpoint_camera.image_height = 512 + viewpoint_camera.image_width = 512 + viewpoint_camera.world_view_transform = torch.eye(4).cuda() + viewpoint_camera.full_proj_transform = torch.eye(4).cuda() + viewpoint_camera.camera_center = torch.zeros(3).cuda() + batched_viewpoint_cameras.append(viewpoint_camera) + + # Set up the strategies + batched_strategies = [None] * num_batches + + # Set up other parameters + bg_color = torch.ones(3).cuda() + scaling_modifier = 1.0 + pc = type('PC', (), {}) + pc.active_sh_degree = SH_ACTIVE_DEGREE + pipe = type('Pipe', (), {}) + pipe.debug = False + mode = "train" + + # Set up rasterization configuration for the batch + raster_settings_batch = [] + batched_cuda_args = [] + for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): + ########## [START] Prepare CUDA Rasterization Settings ########## + cuda_args = get_cuda_args(strategy, mode) + batched_cuda_args.append(cuda_args) + tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) + tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) + + raster_settings = GaussianRasterizationSettings( + image_height=int(batched_viewpoint_cameras[0].image_height), + image_width=int(batched_viewpoint_cameras[0].image_width), + tanfovx=tanfovx, + tanfovy=tanfovy, + bg=bg_color, + scale_modifier=scaling_modifier, + viewmatrix=viewpoint_camera.world_view_transform, + projmatrix=viewpoint_camera.full_proj_transform, + sh_degree=pc.active_sh_degree, + campos=viewpoint_camera.camera_center, + prefiltered=False, + debug=pipe.debug + ) + raster_settings_batch.append(raster_settings) + + # Create the GaussianRasterizer for the batch + rasterizer = GaussianRasterizerBatches(raster_settings_batch=raster_settings_batch) + + # Preprocess the Gaussians for the entire batch + batched_means2D, batched_rgb, batched_conic_opacity, batched_radii, batched_depths = rasterizer.preprocess_gaussians( + means3D=means3D, + scales=scales, + rotations=rotations, + shs=shs, + opacities=opacity, + batched_cuda_args=batched_cuda_args[0] #TODO: look into sending list of cuda_args/strategies + ) + end_time = time.time() + preprocess_time = end_time - start_time + print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing: {preprocess_time:.4f} seconds") + + # TODO: make the below work + # if mode == "train": + # batched_means2D.retain_grad() + + + # Perform assertions on the preprocessed data + + assert batched_means2D.shape == (num_batches, num_gaussians, 2) + assert batched_rgb.shape == (num_batches, num_gaussians, 3) + assert batched_conic_opacity.shape == (num_batches, num_gaussians,4) + assert batched_radii.shape == (num_batches, num_gaussians) + assert batched_depths.shape == (num_batches, num_gaussians) + + batched_screenspace_params = [] + for i in range(num_batches): + means2D = batched_means2D[i] + rgb = batched_rgb[i] + conic_opacity = batched_conic_opacity[i] + radii = batched_radii[i] + depths = batched_depths[i] + + screenspace_params = [means2D, rgb, conic_opacity, radii, depths] + batched_screenspace_params.append(screenspace_params) + + start_backward=time.time() + means3D.grad.zero_()#need to reset it for it to check + target_batched_means3d=torch.ones(means3D.shape).cuda() + loss = (means3D - target_batched_means3d).pow(2).mean() + loss.backward() + end_backward=time.time() + preproc_back=end_backward-start_backward + print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") + + return batched_means2D, batched_radii, batched_screenspace_params, batched_conic_opacity,batched_rgb,batched_depths,means3D.grad + + +def compare_tensors(tensor1, tensor2): + if tensor1.shape != tensor2.shape: + print("Tensors have different shapes:") + print("Tensor 1 shape:", tensor1.shape) + print("Tensor 2 shape:", tensor2.shape) + return False + + equality_matrix = torch.eq(tensor1, tensor2) + if torch.all(equality_matrix): + return True + else: + print("Tensors have non-matching values.") + non_matching_indices = torch.where(equality_matrix == False) + for idx in zip(*non_matching_indices[:5]): + value1 = tensor1[idx].item() + value2 = tensor2[idx].item() + print(f"Non-matching values at index {idx}: {value1} != {value2}") + return False + +if __name__ == "__main__": + batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths,batched_dL_means3D = test_batched_gaussian_rasterizer() + batched_means2D_batch_processed, batched_radii_batch_processed, batched_screenspace_params_batch_processed,batched_conic_opacity_batch_processed,batched_rgb_batch_processed,batched_depths_batch_processed,batched_dL_means3D_batch_processed = test_batched_gaussian_rasterizer_batch_processing() + + assert compare_tensors(batched_means2D, batched_means2D_batch_processed) + assert compare_tensors(batched_radii, batched_radii_batch_processed) + assert compare_tensors(batched_conic_opacity, batched_conic_opacity_batch_processed) + + assert compare_tensors(batched_rgb, batched_rgb_batch_processed) + assert compare_tensors(batched_depths, batched_depths_batch_processed) + assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed) + + #-------BACKWARD PASS------- + assert compare_tensors(batched_dL_means3D, batched_dL_means3D_batch_processed) + + From 46b83eb804d2618be56e86d1474934e0a2a6009d Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:16:41 -0400 Subject: [PATCH 14/34] ruff formatting and gradients for remaining inputs --- rasterization_backward_tests.py | 160 +++++++++++++++++++++----------- 1 file changed, 105 insertions(+), 55 deletions(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index 4203aaa..3b693d7 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -10,7 +10,7 @@ ) num_gaussians = 10000 -num_batches=32 +num_batches = 32 means3D = torch.randn(num_gaussians, 3).cuda() scales = torch.randn(num_gaussians, 3).cuda() rotations = torch.randn(num_gaussians, 4).cuda() @@ -24,6 +24,7 @@ shs.requires_grad = True opacity.requires_grad = True + def get_cuda_args(strategy, mode="train"): cuda_args = { "mode": mode, @@ -44,11 +45,11 @@ def get_cuda_args(strategy, mode="train"): return cuda_args -def test_batched_gaussian_rasterizer(): +def test_batched_gaussian_rasterizer(): # Set up the viewpoint cameras batched_viewpoint_cameras = [] for _ in range(num_batches): - viewpoint_camera = type('ViewpointCamera', (), {}) + viewpoint_camera = type("ViewpointCamera", (), {}) viewpoint_camera.FoVx = math.radians(60) viewpoint_camera.FoVy = math.radians(60) viewpoint_camera.image_height = 512 @@ -64,9 +65,9 @@ def test_batched_gaussian_rasterizer(): # Set up other parameters bg_color = torch.ones(3).cuda() scaling_modifier = 1.0 - pc = type('PC', (), {}) + pc = type("PC", (), {}) pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type('Pipe', (), {}) + pipe = type("Pipe", (), {}) pipe.debug = False mode = "train" @@ -75,12 +76,12 @@ def test_batched_gaussian_rasterizer(): batched_screenspace_params = [] batched_means2D = [] batched_radii = [] - batched_conic_opacity=[] - batched_depths=[] - batched_rgb=[] + batched_conic_opacity = [] + batched_depths = [] + batched_rgb = [] start_time = time.time() - + for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): ########## [START] Prepare CUDA Rasterization Settings ########## cuda_args = get_cuda_args(strategy, mode) @@ -101,19 +102,14 @@ def test_batched_gaussian_rasterizer(): sh_degree=pc.active_sh_degree, campos=viewpoint_camera.camera_center, prefiltered=False, - debug=pipe.debug + debug=pipe.debug, ) rasterizer = GaussianRasterizer(raster_settings=raster_settings) ########## [END] Prepare CUDA Rasterization Settings ########## - #[3DGS-wise preprocess] + # [3DGS-wise preprocess] means2D, rgb, conic_opacity, radii, depths = rasterizer.preprocess_gaussians( - means3D=means3D, - scales=scales, - rotations=rotations, - shs=shs, - opacities=opacity, - cuda_args=cuda_args + means3D=means3D, scales=scales, rotations=rotations, shs=shs, opacities=opacity, cuda_args=cuda_args ) # TODO: make the below work @@ -129,38 +125,48 @@ def test_batched_gaussian_rasterizer(): batched_conic_opacity.append(conic_opacity) batched_depths.append(depths) - end_time = time.time() preprocess_time = end_time - start_time print(f"Time taken by test_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") # Perform further operations with the batched results # Test results and performance - + batched_means2D = torch.stack(batched_means2D, dim=0) batched_radii = torch.stack(batched_radii, dim=0) - batched_conic_opacity=torch.stack(batched_conic_opacity,dim=0) - batched_rgb=torch.stack(batched_rgb,dim=0) - batched_depths=torch.stack(batched_depths,dim=0) - + batched_conic_opacity = torch.stack(batched_conic_opacity, dim=0) + batched_rgb = torch.stack(batched_rgb, dim=0) + batched_depths = torch.stack(batched_depths, dim=0) - start_backward=time.time() - target_batched_means3d=torch.ones(means3D.shape).cuda() + start_backward = time.time() + target_batched_means3d = torch.ones(means3D.shape).cuda() loss = (means3D - target_batched_means3d).pow(2).mean() loss.backward() - end_backward=time.time() - preproc_back=end_backward-start_backward + end_backward = time.time() + preproc_back = end_backward - start_backward print(f"Time taken by test_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") - - return batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths,means3D.grad - - + + return ( + batched_means2D, + batched_radii, + batched_screenspace_params, + batched_conic_opacity, + batched_rgb, + batched_depths, + means3D.grad, + scales.grad, + rotations.grad, + shs.grad, + opacity.grad, + ) + + def test_batched_gaussian_rasterizer_batch_processing(): # Set up the input data start_time = time.time() # Set up the viewpoint cameras batched_viewpoint_cameras = [] for _ in range(num_batches): - viewpoint_camera = type('ViewpointCamera', (), {}) + viewpoint_camera = type("ViewpointCamera", (), {}) viewpoint_camera.FoVx = math.radians(60) viewpoint_camera.FoVy = math.radians(60) viewpoint_camera.image_height = 512 @@ -176,9 +182,9 @@ def test_batched_gaussian_rasterizer_batch_processing(): # Set up other parameters bg_color = torch.ones(3).cuda() scaling_modifier = 1.0 - pc = type('PC', (), {}) + pc = type("PC", (), {}) pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type('Pipe', (), {}) + pipe = type("Pipe", (), {}) pipe.debug = False mode = "train" @@ -204,7 +210,7 @@ def test_batched_gaussian_rasterizer_batch_processing(): sh_degree=pc.active_sh_degree, campos=viewpoint_camera.camera_center, prefiltered=False, - debug=pipe.debug + debug=pipe.debug, ) raster_settings_batch.append(raster_settings) @@ -212,13 +218,19 @@ def test_batched_gaussian_rasterizer_batch_processing(): rasterizer = GaussianRasterizerBatches(raster_settings_batch=raster_settings_batch) # Preprocess the Gaussians for the entire batch - batched_means2D, batched_rgb, batched_conic_opacity, batched_radii, batched_depths = rasterizer.preprocess_gaussians( + ( + batched_means2D, + batched_rgb, + batched_conic_opacity, + batched_radii, + batched_depths, + ) = rasterizer.preprocess_gaussians( means3D=means3D, scales=scales, rotations=rotations, shs=shs, opacities=opacity, - batched_cuda_args=batched_cuda_args[0] #TODO: look into sending list of cuda_args/strategies + batched_cuda_args=batched_cuda_args[0], # TODO: look into sending list of cuda_args/strategies ) end_time = time.time() preprocess_time = end_time - start_time @@ -228,15 +240,14 @@ def test_batched_gaussian_rasterizer_batch_processing(): # if mode == "train": # batched_means2D.retain_grad() - # Perform assertions on the preprocessed data - + assert batched_means2D.shape == (num_batches, num_gaussians, 2) assert batched_rgb.shape == (num_batches, num_gaussians, 3) - assert batched_conic_opacity.shape == (num_batches, num_gaussians,4) + assert batched_conic_opacity.shape == (num_batches, num_gaussians, 4) assert batched_radii.shape == (num_batches, num_gaussians) assert batched_depths.shape == (num_batches, num_gaussians) - + batched_screenspace_params = [] for i in range(num_batches): means2D = batched_means2D[i] @@ -244,20 +255,32 @@ def test_batched_gaussian_rasterizer_batch_processing(): conic_opacity = batched_conic_opacity[i] radii = batched_radii[i] depths = batched_depths[i] - + screenspace_params = [means2D, rgb, conic_opacity, radii, depths] batched_screenspace_params.append(screenspace_params) - start_backward=time.time() - means3D.grad.zero_()#need to reset it for it to check - target_batched_means3d=torch.ones(means3D.shape).cuda() + start_backward = time.time() + means3D.grad.zero_() # need to reset it for it to check + target_batched_means3d = torch.ones(means3D.shape).cuda() loss = (means3D - target_batched_means3d).pow(2).mean() loss.backward() - end_backward=time.time() - preproc_back=end_backward-start_backward + end_backward = time.time() + preproc_back = end_backward - start_backward print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") - - return batched_means2D, batched_radii, batched_screenspace_params, batched_conic_opacity,batched_rgb,batched_depths,means3D.grad + + return ( + batched_means2D, + batched_radii, + batched_screenspace_params, + batched_conic_opacity, + batched_rgb, + batched_depths, + means3D.grad, + scales.grad, + rotations.grad, + shs.grad, + opacity.grad, + ) def compare_tensors(tensor1, tensor2): @@ -279,10 +302,35 @@ def compare_tensors(tensor1, tensor2): print(f"Non-matching values at index {idx}: {value1} != {value2}") return False + if __name__ == "__main__": - batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths,batched_dL_means3D = test_batched_gaussian_rasterizer() - batched_means2D_batch_processed, batched_radii_batch_processed, batched_screenspace_params_batch_processed,batched_conic_opacity_batch_processed,batched_rgb_batch_processed,batched_depths_batch_processed,batched_dL_means3D_batch_processed = test_batched_gaussian_rasterizer_batch_processing() - + ( + batched_means2D, + batched_radii, + batched_screenspace_params, + batched_conic_opacity, + batched_rgb, + batched_depths, + batched_dL_means3D, + batched_dL_scales, + batched_dL_rotations, + batched_dL_shs, + batched_dL_opacity, + ) = test_batched_gaussian_rasterizer() + ( + batched_means2D_batch_processed, + batched_radii_batch_processed, + batched_screenspace_params_batch_processed, + batched_conic_opacity_batch_processed, + batched_rgb_batch_processed, + batched_depths_batch_processed, + batched_dL_means3D_batch_processed, + batched_dL_scales_batch_processed, + batched_dL_rotations_batch_processed, + batched_dL_shs_batch_processed, + batched_dL_opacity_batch_processed, + ) = test_batched_gaussian_rasterizer_batch_processing() + assert compare_tensors(batched_means2D, batched_means2D_batch_processed) assert compare_tensors(batched_radii, batched_radii_batch_processed) assert compare_tensors(batched_conic_opacity, batched_conic_opacity_batch_processed) @@ -291,7 +339,9 @@ def compare_tensors(tensor1, tensor2): assert compare_tensors(batched_depths, batched_depths_batch_processed) assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed) - #-------BACKWARD PASS------- + # -------BACKWARD PASS------- assert compare_tensors(batched_dL_means3D, batched_dL_means3D_batch_processed) - - + assert compare_tensors(batched_dL_scales, batched_dL_scales_batch_processed) + assert compare_tensors(batched_dL_rotations, batched_dL_rotations_batch_processed) + assert compare_tensors(batched_dL_shs, batched_dL_shs_batch_processed) + assert compare_tensors(batched_dL_opacity, batched_dL_opacity_batch_processed) From 710b56fe324b4187101df45518b8824bf6561d6f Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:16:56 -0400 Subject: [PATCH 15/34] Add pyproject.toml file with ruff line-length set to 120 --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6faf548 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.ruff] +line-length = 120 \ No newline at end of file From 2ca5ae615fee542a8b3d268b6d8c4c6a5f7b000a Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:28:13 -0400 Subject: [PATCH 16/34] Refactor ruff.toml file to set line-length to 120 and indent-width to 4 --- pyproject.toml | 2 -- ruff.toml | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 pyproject.toml create mode 100644 ruff.toml diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 6faf548..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,2 +0,0 @@ -[tool.ruff] -line-length = 120 \ No newline at end of file diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..b11f04d --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +line-length = 120 +indent-width = 4 From 7a4b6b462738d31e15a6259e342a2f797c10afe8 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:30:53 -0400 Subject: [PATCH 17/34] Refactor compare_tensors function to handle None values in rasterization_backward_tests.py --- rasterization_backward_tests.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index 3b693d7..e37392c 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -284,7 +284,12 @@ def test_batched_gaussian_rasterizer_batch_processing(): def compare_tensors(tensor1, tensor2): - if tensor1.shape != tensor2.shape: + if tensor1 is None and tensor2 is None: + return True + elif tensor1 is None or tensor2 is None: + print("One of the tensors is None.") + return False + elif tensor1.shape != tensor2.shape: print("Tensors have different shapes:") print("Tensor 1 shape:", tensor1.shape) print("Tensor 2 shape:", tensor2.shape) @@ -331,17 +336,17 @@ def compare_tensors(tensor1, tensor2): batched_dL_opacity_batch_processed, ) = test_batched_gaussian_rasterizer_batch_processing() - assert compare_tensors(batched_means2D, batched_means2D_batch_processed) - assert compare_tensors(batched_radii, batched_radii_batch_processed) - assert compare_tensors(batched_conic_opacity, batched_conic_opacity_batch_processed) + assert compare_tensors(batched_means2D, batched_means2D_batch_processed), "Means2D do not match." + assert compare_tensors(batched_radii, batched_radii_batch_processed), "Radii do not match." + assert compare_tensors(batched_conic_opacity, batched_conic_opacity_batch_processed), "Conic opacity do not match." - assert compare_tensors(batched_rgb, batched_rgb_batch_processed) - assert compare_tensors(batched_depths, batched_depths_batch_processed) - assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed) + assert compare_tensors(batched_rgb, batched_rgb_batch_processed), "RGB values do not match." + assert compare_tensors(batched_depths, batched_depths_batch_processed), "Depths do not match." + assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed), "Screenspace params do not match." # -------BACKWARD PASS------- - assert compare_tensors(batched_dL_means3D, batched_dL_means3D_batch_processed) - assert compare_tensors(batched_dL_scales, batched_dL_scales_batch_processed) - assert compare_tensors(batched_dL_rotations, batched_dL_rotations_batch_processed) - assert compare_tensors(batched_dL_shs, batched_dL_shs_batch_processed) - assert compare_tensors(batched_dL_opacity, batched_dL_opacity_batch_processed) + assert compare_tensors(batched_dL_means3D, batched_dL_means3D_batch_processed), "dL_means3D do not match." + assert compare_tensors(batched_dL_scales, batched_dL_scales_batch_processed), "dL_scales do not match." + assert compare_tensors(batched_dL_rotations, batched_dL_rotations_batch_processed), "dL_rotations do not match." + assert compare_tensors(batched_dL_shs, batched_dL_shs_batch_processed), "dL_shs do not match." + assert compare_tensors(batched_dL_opacity, batched_dL_opacity_batch_processed), "dL_opacity do not match." From 14889e645acad2293a6757733fb5eb0bbb03a3df Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:39:15 -0400 Subject: [PATCH 18/34] Update ruff.toml file to set line-length to 120 --- ruff.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ruff.toml b/ruff.toml index b11f04d..0381d4d 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,2 +1 @@ -line-length = 120 -indent-width = 4 +line-length = 120 \ No newline at end of file From 5682d26bfc0cb3165c3b8c0d6fd3fadf143b88e2 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:39:25 -0400 Subject: [PATCH 19/34] Refactor rasterization_backward_tests.py to include gradient checks for all inputs --- rasterization_backward_tests.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index e37392c..7058b01 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -145,6 +145,12 @@ def test_batched_gaussian_rasterizer(): preproc_back = end_backward - start_backward print(f"Time taken by test_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") + assert means3D.grad is not None, "Means3D gradient is None." + assert scales.grad is not None, "Scales gradient is None." + assert rotations.grad is not None, "Rotations gradient is None." + assert shs.grad is not None, "SHs gradient is None." + assert opacity.grad is not None, "Opacity gradient is None." + return ( batched_means2D, batched_radii, @@ -268,6 +274,12 @@ def test_batched_gaussian_rasterizer_batch_processing(): preproc_back = end_backward - start_backward print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") + assert means3D.grad is not None, "Means3D gradient is None." + assert scales.grad is not None, "Scales gradient is None." + assert rotations.grad is not None, "Rotations gradient is None." + assert shs.grad is not None, "SHs gradient is None." + assert opacity.grad is not None, "Opacity gradient is None." + return ( batched_means2D, batched_radii, @@ -342,7 +354,9 @@ def compare_tensors(tensor1, tensor2): assert compare_tensors(batched_rgb, batched_rgb_batch_processed), "RGB values do not match." assert compare_tensors(batched_depths, batched_depths_batch_processed), "Depths do not match." - assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed), "Screenspace params do not match." + assert len(batched_screenspace_params) == len( + batched_screenspace_params_batch_processed + ), "Screenspace params do not match." # -------BACKWARD PASS------- assert compare_tensors(batched_dL_means3D, batched_dL_means3D_batch_processed), "dL_means3D do not match." From 945e8cf3271f30c1e8c66763a605cb0b4390b8ff Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 18:50:39 -0400 Subject: [PATCH 20/34] gradients calculated for all the variables to check and cloning them --- rasterization_backward_tests.py | 43 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index 7058b01..dd6eb64 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -25,6 +25,20 @@ opacity.requires_grad = True +def compute_dummy_loss(): + losses = [(tensor - torch.ones_like(tensor)).pow(2).mean() for tensor in [means3D, scales, rotations, shs, opacity]] + loss = sum(losses) + return loss + + +def zero_grad(): + means3D.grad = None + scales.grad = None + rotations.grad = None + shs.grad = None + opacity.grad = None + + def get_cuda_args(strategy, mode="train"): cuda_args = { "mode": mode, @@ -137,9 +151,9 @@ def test_batched_gaussian_rasterizer(): batched_rgb = torch.stack(batched_rgb, dim=0) batched_depths = torch.stack(batched_depths, dim=0) + zero_grad() start_backward = time.time() - target_batched_means3d = torch.ones(means3D.shape).cuda() - loss = (means3D - target_batched_means3d).pow(2).mean() + loss = compute_dummy_loss() loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward @@ -158,11 +172,11 @@ def test_batched_gaussian_rasterizer(): batched_conic_opacity, batched_rgb, batched_depths, - means3D.grad, - scales.grad, - rotations.grad, - shs.grad, - opacity.grad, + means3D.grad.clone(), + scales.grad.clone(), + rotations.grad.clone(), + shs.grad.clone(), + opacity.grad.clone(), ) @@ -265,10 +279,9 @@ def test_batched_gaussian_rasterizer_batch_processing(): screenspace_params = [means2D, rgb, conic_opacity, radii, depths] batched_screenspace_params.append(screenspace_params) + zero_grad() start_backward = time.time() - means3D.grad.zero_() # need to reset it for it to check - target_batched_means3d = torch.ones(means3D.shape).cuda() - loss = (means3D - target_batched_means3d).pow(2).mean() + loss = compute_dummy_loss() loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward @@ -287,11 +300,11 @@ def test_batched_gaussian_rasterizer_batch_processing(): batched_conic_opacity, batched_rgb, batched_depths, - means3D.grad, - scales.grad, - rotations.grad, - shs.grad, - opacity.grad, + means3D.grad.clone(), + scales.grad.clone(), + rotations.grad.clone(), + shs.grad.clone(), + opacity.grad.clone(), ) From c84d7cd0ab0ee7bf9824c8468dd5da7e9d87b028 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:17:44 -0400 Subject: [PATCH 21/34] converted to pytest testing --- rasterization_backward_tests.py | 143 +++++++++++++------------------- 1 file changed, 59 insertions(+), 84 deletions(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index dd6eb64..6d55dc4 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -1,6 +1,7 @@ import math import time +import pytest import torch from diff_gaussian_rasterization import ( @@ -11,34 +12,62 @@ num_gaussians = 10000 num_batches = 32 -means3D = torch.randn(num_gaussians, 3).cuda() -scales = torch.randn(num_gaussians, 3).cuda() -rotations = torch.randn(num_gaussians, 4).cuda() -shs = torch.randn(num_gaussians, 16, 3).cuda() -opacity = torch.randn(num_gaussians, 1).cuda() SH_ACTIVE_DEGREE = 3 -means3D.requires_grad = True -scales.requires_grad = True -rotations.requires_grad = True -shs.requires_grad = True -opacity.requires_grad = True +@pytest.fixture(scope="module") +def setup_data(): + # Set up the input data, viewpoint cameras, strategies, etc. + means3D = torch.randn(num_gaussians, 3).cuda() + scales = torch.randn(num_gaussians, 3).cuda() + rotations = torch.randn(num_gaussians, 4).cuda() + shs = torch.randn(num_gaussians, 16, 3).cuda() + opacity = torch.randn(num_gaussians, 1).cuda() -def compute_dummy_loss(): + means3D.requires_grad = True + scales.requires_grad = True + rotations.requires_grad = True + shs.requires_grad = True + opacity.requires_grad = True + + batched_viewpoint_cameras = [] + for _ in range(num_batches): + viewpoint_camera = type("ViewpointCamera", (), {}) + viewpoint_camera.FoVx = math.radians(60) + viewpoint_camera.FoVy = math.radians(60) + viewpoint_camera.image_height = 512 + viewpoint_camera.image_width = 512 + viewpoint_camera.world_view_transform = torch.eye(4).cuda() + viewpoint_camera.full_proj_transform = torch.eye(4).cuda() + viewpoint_camera.camera_center = torch.zeros(3).cuda() + batched_viewpoint_cameras.append(viewpoint_camera) + + batched_strategies = [None] * num_batches + + bg_color = torch.ones(3).cuda() + scaling_modifier = 1.0 + pc = type("PC", (), {}) + pc.active_sh_degree = SH_ACTIVE_DEGREE + pipe = type("Pipe", (), {}) + pipe.debug = False + mode = "train" + + return means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode + + +def compute_dummy_loss(means3D, scales, rotations, shs, opacity): losses = [(tensor - torch.ones_like(tensor)).pow(2).mean() for tensor in [means3D, scales, rotations, shs, opacity]] loss = sum(losses) return loss -def zero_grad(): +def zero_grad(means3D, scales, rotations, shs, opacity): means3D.grad = None scales.grad = None rotations.grad = None shs.grad = None opacity.grad = None - def get_cuda_args(strategy, mode="train"): cuda_args = { "mode": mode, @@ -59,31 +88,8 @@ def get_cuda_args(strategy, mode="train"): return cuda_args -def test_batched_gaussian_rasterizer(): - # Set up the viewpoint cameras - batched_viewpoint_cameras = [] - for _ in range(num_batches): - viewpoint_camera = type("ViewpointCamera", (), {}) - viewpoint_camera.FoVx = math.radians(60) - viewpoint_camera.FoVy = math.radians(60) - viewpoint_camera.image_height = 512 - viewpoint_camera.image_width = 512 - viewpoint_camera.world_view_transform = torch.eye(4).cuda() - viewpoint_camera.full_proj_transform = torch.eye(4).cuda() - viewpoint_camera.camera_center = torch.zeros(3).cuda() - batched_viewpoint_cameras.append(viewpoint_camera) - - # Set up the strategies - batched_strategies = [None] * num_batches - - # Set up other parameters - bg_color = torch.ones(3).cuda() - scaling_modifier = 1.0 - pc = type("PC", (), {}) - pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type("Pipe", (), {}) - pipe.debug = False - mode = "train" +def run_batched_gaussian_rasterizer(setup_data): + means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode = setup_data batched_rasterizers = [] batched_cuda_args = [] @@ -126,10 +132,6 @@ def test_batched_gaussian_rasterizer(): means3D=means3D, scales=scales, rotations=rotations, shs=shs, opacities=opacity, cuda_args=cuda_args ) - # TODO: make the below work - # if mode == "train": - # means2D.retain_grad() - batched_means2D.append(means2D) screenspace_params = [means2D, rgb, conic_opacity, radii, depths] batched_rasterizers.append(rasterizer) @@ -141,9 +143,7 @@ def test_batched_gaussian_rasterizer(): end_time = time.time() preprocess_time = end_time - start_time - print(f"Time taken by test_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") - # Perform further operations with the batched results - # Test results and performance + print(f"Time taken by run_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") batched_means2D = torch.stack(batched_means2D, dim=0) batched_radii = torch.stack(batched_radii, dim=0) @@ -151,13 +151,13 @@ def test_batched_gaussian_rasterizer(): batched_rgb = torch.stack(batched_rgb, dim=0) batched_depths = torch.stack(batched_depths, dim=0) - zero_grad() + zero_grad(means3D, scales, rotations, shs, opacity) start_backward = time.time() - loss = compute_dummy_loss() + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity): loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward - print(f"Time taken by test_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") + print(f"Time taken by run_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") assert means3D.grad is not None, "Means3D gradient is None." assert scales.grad is not None, "Scales gradient is None." @@ -180,33 +180,11 @@ def test_batched_gaussian_rasterizer(): ) -def test_batched_gaussian_rasterizer_batch_processing(): +def run_batched_gaussian_rasterizer_batch_processing(setup_data): + means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode = setup_data + # Set up the input data start_time = time.time() - # Set up the viewpoint cameras - batched_viewpoint_cameras = [] - for _ in range(num_batches): - viewpoint_camera = type("ViewpointCamera", (), {}) - viewpoint_camera.FoVx = math.radians(60) - viewpoint_camera.FoVy = math.radians(60) - viewpoint_camera.image_height = 512 - viewpoint_camera.image_width = 512 - viewpoint_camera.world_view_transform = torch.eye(4).cuda() - viewpoint_camera.full_proj_transform = torch.eye(4).cuda() - viewpoint_camera.camera_center = torch.zeros(3).cuda() - batched_viewpoint_cameras.append(viewpoint_camera) - - # Set up the strategies - batched_strategies = [None] * num_batches - - # Set up other parameters - bg_color = torch.ones(3).cuda() - scaling_modifier = 1.0 - pc = type("PC", (), {}) - pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type("Pipe", (), {}) - pipe.debug = False - mode = "train" # Set up rasterization configuration for the batch raster_settings_batch = [] @@ -254,11 +232,7 @@ def test_batched_gaussian_rasterizer_batch_processing(): ) end_time = time.time() preprocess_time = end_time - start_time - print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing: {preprocess_time:.4f} seconds") - - # TODO: make the below work - # if mode == "train": - # batched_means2D.retain_grad() + print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing: {preprocess_time:.4f} seconds") # Perform assertions on the preprocessed data @@ -279,13 +253,13 @@ def test_batched_gaussian_rasterizer_batch_processing(): screenspace_params = [means2D, rgb, conic_opacity, radii, depths] batched_screenspace_params.append(screenspace_params) - zero_grad() + zero_grad(means3D, scales, rotations, shs, opacity) start_backward = time.time() - loss = compute_dummy_loss() + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity): loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward - print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") + print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") assert means3D.grad is not None, "Means3D gradient is None." assert scales.grad is not None, "Scales gradient is None." @@ -333,7 +307,7 @@ def compare_tensors(tensor1, tensor2): return False -if __name__ == "__main__": +def test_compare_batched_gaussian_rasterizer_results(setup_data): ( batched_means2D, batched_radii, @@ -346,7 +320,7 @@ def compare_tensors(tensor1, tensor2): batched_dL_rotations, batched_dL_shs, batched_dL_opacity, - ) = test_batched_gaussian_rasterizer() + ) = run_batched_gaussian_rasterizer(setup_data) ( batched_means2D_batch_processed, batched_radii_batch_processed, @@ -359,7 +333,7 @@ def compare_tensors(tensor1, tensor2): batched_dL_rotations_batch_processed, batched_dL_shs_batch_processed, batched_dL_opacity_batch_processed, - ) = test_batched_gaussian_rasterizer_batch_processing() + ) = run_batched_gaussian_rasterizer_batch_processing(setup_data) assert compare_tensors(batched_means2D, batched_means2D_batch_processed), "Means2D do not match." assert compare_tensors(batched_radii, batched_radii_batch_processed), "Radii do not match." @@ -377,3 +351,4 @@ def compare_tensors(tensor1, tensor2): assert compare_tensors(batched_dL_rotations, batched_dL_rotations_batch_processed), "dL_rotations do not match." assert compare_tensors(batched_dL_shs, batched_dL_shs_batch_processed), "dL_shs do not match." assert compare_tensors(batched_dL_opacity, batched_dL_opacity_batch_processed), "dL_opacity do not match." + \ No newline at end of file From 6f38446485a9eead56b58e7975f46b1a783d2600 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:19:30 -0400 Subject: [PATCH 22/34] fixed colon bug and ruff formatiting --- rasterization_backward_tests.py | 51 +++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/rasterization_backward_tests.py b/rasterization_backward_tests.py index 6d55dc4..171d1b1 100644 --- a/rasterization_backward_tests.py +++ b/rasterization_backward_tests.py @@ -52,7 +52,20 @@ def setup_data(): pipe.debug = False mode = "train" - return means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode + return ( + means3D, + scales, + rotations, + shs, + opacity, + batched_viewpoint_cameras, + batched_strategies, + bg_color, + scaling_modifier, + pc, + pipe, + mode, + ) def compute_dummy_loss(means3D, scales, rotations, shs, opacity): @@ -68,6 +81,7 @@ def zero_grad(means3D, scales, rotations, shs, opacity): shs.grad = None opacity.grad = None + def get_cuda_args(strategy, mode="train"): cuda_args = { "mode": mode, @@ -89,7 +103,20 @@ def get_cuda_args(strategy, mode="train"): def run_batched_gaussian_rasterizer(setup_data): - means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode = setup_data + ( + means3D, + scales, + rotations, + shs, + opacity, + batched_viewpoint_cameras, + batched_strategies, + bg_color, + scaling_modifier, + pc, + pipe, + mode, + ) = setup_data batched_rasterizers = [] batched_cuda_args = [] @@ -153,7 +180,7 @@ def run_batched_gaussian_rasterizer(setup_data): zero_grad(means3D, scales, rotations, shs, opacity) start_backward = time.time() - loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity): + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward @@ -181,7 +208,20 @@ def run_batched_gaussian_rasterizer(setup_data): def run_batched_gaussian_rasterizer_batch_processing(setup_data): - means3D, scales, rotations, shs, opacity, batched_viewpoint_cameras, batched_strategies, bg_color, scaling_modifier, pc, pipe, mode = setup_data + ( + means3D, + scales, + rotations, + shs, + opacity, + batched_viewpoint_cameras, + batched_strategies, + bg_color, + scaling_modifier, + pc, + pipe, + mode, + ) = setup_data # Set up the input data start_time = time.time() @@ -255,7 +295,7 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): zero_grad(means3D, scales, rotations, shs, opacity) start_backward = time.time() - loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity): + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) loss.backward() end_backward = time.time() preproc_back = end_backward - start_backward @@ -351,4 +391,3 @@ def test_compare_batched_gaussian_rasterizer_results(setup_data): assert compare_tensors(batched_dL_rotations, batched_dL_rotations_batch_processed), "dL_rotations do not match." assert compare_tensors(batched_dL_shs, batched_dL_shs_batch_processed), "dL_shs do not match." assert compare_tensors(batched_dL_opacity, batched_dL_opacity_batch_processed), "dL_opacity do not match." - \ No newline at end of file From 7b30782d1a21409b988f95ac0145d31f8126bec7 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:21:27 -0400 Subject: [PATCH 23/34] Add __pycache__/ to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index aa1ae78..bb3a0d2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ diff_gaussian_rasterization.egg-info/ dist/ diff_gaussian_rasterization/__pycache__/ *so + __pycache__/ \ No newline at end of file From 13a55591331c4fbea48d3e1be964f269a203f8aa Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:24:30 -0400 Subject: [PATCH 24/34] renamed to *_test.py --- rasterization_backward_tests.py => rasterization_backward_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rasterization_backward_tests.py => rasterization_backward_test.py (100%) diff --git a/rasterization_backward_tests.py b/rasterization_backward_test.py similarity index 100% rename from rasterization_backward_tests.py rename to rasterization_backward_test.py From 5b2488150f7695457953df03c44e1bcac644c267 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:37:30 -0400 Subject: [PATCH 25/34] Update .gitignore to include __pycache__/ --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bb3a0d2..df0a5ed 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ diff_gaussian_rasterization.egg-info/ dist/ diff_gaussian_rasterization/__pycache__/ *so - __pycache__/ \ No newline at end of file +__pycache__/ \ No newline at end of file From 9e6f4a98264740f554e6bb0809360bcb3498753b Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:41:53 -0400 Subject: [PATCH 26/34] moved test into tests folder --- .../rasterization_preprocess_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename rasterization_backward_test.py => tests/rasterization_preprocess_test.py (100%) diff --git a/rasterization_backward_test.py b/tests/rasterization_preprocess_test.py similarity index 100% rename from rasterization_backward_test.py rename to tests/rasterization_preprocess_test.py From 7be38fa298c01d2336a9b8335954ddac12bfb1d8 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:42:03 -0400 Subject: [PATCH 27/34] Add instructions for running tests in README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e165b0..5bfbbec 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,8 @@ Used as the rasterization engine for the paper "3D Gaussian Splatting for Real-T url = {https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/} } - \ No newline at end of file + + +## Running tests +Use pytest to run the tests. The tests are located in the `tests` directory. To run all tests, simply run `pytest` in the root directory of the project. +Use the `--capture=no` flag to see the output of the tests including the performance metrics. From 307e1567479ccfc9f9b5be3a6b5319b1f1a776c4 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 19:50:08 -0400 Subject: [PATCH 28/34] deleted old test file --- rasterization_tests.py | 270 ----------------------------------------- 1 file changed, 270 deletions(-) delete mode 100644 rasterization_tests.py diff --git a/rasterization_tests.py b/rasterization_tests.py deleted file mode 100644 index 7501854..0000000 --- a/rasterization_tests.py +++ /dev/null @@ -1,270 +0,0 @@ -import math -import time - -import torch - -from diff_gaussian_rasterization import ( - GaussianRasterizationSettings, - GaussianRasterizer, - GaussianRasterizerBatches, -) - -num_gaussians = 10000 -num_batches=32 -means3D = torch.randn(num_gaussians, 3).cuda() -scales = torch.randn(num_gaussians, 3).cuda() -rotations = torch.randn(num_gaussians, 4).cuda() -shs = torch.randn(num_gaussians, 16, 3).cuda() -opacity = torch.randn(num_gaussians, 1).cuda() -SH_ACTIVE_DEGREE = 3 - -def get_cuda_args(strategy, mode="train"): - cuda_args = { - "mode": mode, - "world_size": "1", - "global_rank": "0", - "local_rank": "0", - "mp_world_size": "1", - "mp_rank": "0", - "log_folder": "./logs", - "log_interval": "10", - "iteration": "0", - "zhx_debug": "False", - "zhx_time": "False", - "dist_global_strategy": "default", - "avoid_pixel_all2all": False, - "stats_collector": {}, - } - return cuda_args - - -def test_batched_gaussian_rasterizer(): - # Set up the viewpoint cameras - batched_viewpoint_cameras = [] - for _ in range(num_batches): - viewpoint_camera = type('ViewpointCamera', (), {}) - viewpoint_camera.FoVx = math.radians(60) - viewpoint_camera.FoVy = math.radians(60) - viewpoint_camera.image_height = 512 - viewpoint_camera.image_width = 512 - viewpoint_camera.world_view_transform = torch.eye(4).cuda() - viewpoint_camera.full_proj_transform = torch.eye(4).cuda() - viewpoint_camera.camera_center = torch.zeros(3).cuda() - batched_viewpoint_cameras.append(viewpoint_camera) - - # Set up the strategies - batched_strategies = [None] * num_batches - - # Set up other parameters - bg_color = torch.ones(3).cuda() - scaling_modifier = 1.0 - pc = type('PC', (), {}) - pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type('Pipe', (), {}) - pipe.debug = False - mode = "train" - - batched_rasterizers = [] - batched_cuda_args = [] - batched_screenspace_params = [] - batched_means2D = [] - batched_radii = [] - batched_conic_opacity=[] - batched_depths=[] - batched_rgb=[] - - start_time = time.time() - - for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): - ########## [START] Prepare CUDA Rasterization Settings ########## - cuda_args = get_cuda_args(strategy, mode) - batched_cuda_args.append(cuda_args) - - # Set up rasterization configuration - tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) - tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) - raster_settings = GaussianRasterizationSettings( - image_height=int(viewpoint_camera.image_height), - image_width=int(viewpoint_camera.image_width), - tanfovx=tanfovx, - tanfovy=tanfovy, - bg=bg_color, - scale_modifier=scaling_modifier, - viewmatrix=viewpoint_camera.world_view_transform, - projmatrix=viewpoint_camera.full_proj_transform, - sh_degree=pc.active_sh_degree, - campos=viewpoint_camera.camera_center, - prefiltered=False, - debug=pipe.debug - ) - rasterizer = GaussianRasterizer(raster_settings=raster_settings) - ########## [END] Prepare CUDA Rasterization Settings ########## - - #[3DGS-wise preprocess] - means2D, rgb, conic_opacity, radii, depths = rasterizer.preprocess_gaussians( - means3D=means3D, - scales=scales, - rotations=rotations, - shs=shs, - opacities=opacity, - cuda_args=cuda_args - ) - - # TODO: make the below work - # if mode == "train": - # means2D.retain_grad() - - batched_means2D.append(means2D) - screenspace_params = [means2D, rgb, conic_opacity, radii, depths] - batched_rasterizers.append(rasterizer) - batched_screenspace_params.append(screenspace_params) - batched_radii.append(radii) - batched_rgb.append(rgb) - batched_conic_opacity.append(conic_opacity) - batched_depths.append(depths) - - - end_time = time.time() - preprocess_time = end_time - start_time - print(f"Time taken by test_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") - # Perform further operations with the batched results - # Test results and performance - - batched_means2D = torch.stack(batched_means2D, dim=0) - batched_radii = torch.stack(batched_radii, dim=0) - batched_conic_opacity=torch.stack(batched_conic_opacity,dim=0) - batched_rgb=torch.stack(batched_rgb,dim=0) - batched_depths=torch.stack(batched_depths,dim=0) - - return batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths - - -def test_batched_gaussian_rasterizer_batch_processing(): - # Set up the input data - start_time = time.time() - # Set up the viewpoint cameras - batched_viewpoint_cameras = [] - for _ in range(num_batches): - viewpoint_camera = type('ViewpointCamera', (), {}) - viewpoint_camera.FoVx = math.radians(60) - viewpoint_camera.FoVy = math.radians(60) - viewpoint_camera.image_height = 512 - viewpoint_camera.image_width = 512 - viewpoint_camera.world_view_transform = torch.eye(4).cuda() - viewpoint_camera.full_proj_transform = torch.eye(4).cuda() - viewpoint_camera.camera_center = torch.zeros(3).cuda() - batched_viewpoint_cameras.append(viewpoint_camera) - - # Set up the strategies - batched_strategies = [None] * num_batches - - # Set up other parameters - bg_color = torch.ones(3).cuda() - scaling_modifier = 1.0 - pc = type('PC', (), {}) - pc.active_sh_degree = SH_ACTIVE_DEGREE - pipe = type('Pipe', (), {}) - pipe.debug = False - mode = "train" - - # Set up rasterization configuration for the batch - raster_settings_batch = [] - batched_cuda_args = [] - for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): - ########## [START] Prepare CUDA Rasterization Settings ########## - cuda_args = get_cuda_args(strategy, mode) - batched_cuda_args.append(cuda_args) - tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) - tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) - - raster_settings = GaussianRasterizationSettings( - image_height=int(batched_viewpoint_cameras[0].image_height), - image_width=int(batched_viewpoint_cameras[0].image_width), - tanfovx=tanfovx, - tanfovy=tanfovy, - bg=bg_color, - scale_modifier=scaling_modifier, - viewmatrix=viewpoint_camera.world_view_transform, - projmatrix=viewpoint_camera.full_proj_transform, - sh_degree=pc.active_sh_degree, - campos=viewpoint_camera.camera_center, - prefiltered=False, - debug=pipe.debug - ) - raster_settings_batch.append(raster_settings) - - # Create the GaussianRasterizer for the batch - rasterizer = GaussianRasterizerBatches(raster_settings_batch=raster_settings_batch) - - # Preprocess the Gaussians for the entire batch - batched_means2D, batched_rgb, batched_conic_opacity, batched_radii, batched_depths = rasterizer.preprocess_gaussians( - means3D=means3D, - scales=scales, - rotations=rotations, - shs=shs, - opacities=opacity, - batched_cuda_args=batched_cuda_args[0] #TODO: look into sending list of cuda_args/strategies - ) - end_time = time.time() - preprocess_time = end_time - start_time - print(f"Time taken by test_batched_gaussian_rasterizer_batch_processing: {preprocess_time:.4f} seconds") - - # TODO: make the below work - # if mode == "train": - # batched_means2D.retain_grad() - - - # Perform assertions on the preprocessed data - - assert batched_means2D.shape == (num_batches, num_gaussians, 2) - assert batched_rgb.shape == (num_batches, num_gaussians, 3) - assert batched_conic_opacity.shape == (num_batches, num_gaussians,4) - assert batched_radii.shape == (num_batches, num_gaussians) - assert batched_depths.shape == (num_batches, num_gaussians) - - batched_screenspace_params = [] - for i in range(num_batches): - means2D = batched_means2D[i] - rgb = batched_rgb[i] - conic_opacity = batched_conic_opacity[i] - radii = batched_radii[i] - depths = batched_depths[i] - - screenspace_params = [means2D, rgb, conic_opacity, radii, depths] - batched_screenspace_params.append(screenspace_params) - - return batched_means2D, batched_radii, batched_screenspace_params, batched_conic_opacity,batched_rgb,batched_depths - - -def compare_tensors(tensor1, tensor2): - if tensor1.shape != tensor2.shape: - print("Tensors have different shapes:") - print("Tensor 1 shape:", tensor1.shape) - print("Tensor 2 shape:", tensor2.shape) - return False - - equality_matrix = torch.eq(tensor1, tensor2) - if torch.all(equality_matrix): - return True - else: - print("Tensors have non-matching values.") - non_matching_indices = torch.where(equality_matrix == False) - for idx in zip(*non_matching_indices[:5]): - value1 = tensor1[idx].item() - value2 = tensor2[idx].item() - print(f"Non-matching values at index {idx}: {value1} != {value2}") - return False - -if __name__ == "__main__": - batched_means2D, batched_radii, batched_screenspace_params,batched_conic_opacity,batched_rgb,batched_depths = test_batched_gaussian_rasterizer() - batched_means2D_batch_processed, batched_radii_batch_processed, batched_screenspace_params_batch_processed,batched_conic_opacity_batch_processed,batched_rgb_batch_processed,batched_depths_batch_processed = test_batched_gaussian_rasterizer_batch_processing() - - assert compare_tensors(batched_means2D, batched_means2D_batch_processed) - assert compare_tensors(batched_radii, batched_radii_batch_processed) - assert compare_tensors(batched_conic_opacity, batched_conic_opacity_batch_processed) - - assert compare_tensors(batched_rgb, batched_rgb_batch_processed) - assert compare_tensors(batched_depths, batched_depths_batch_processed) - assert len(batched_screenspace_params) == len(batched_screenspace_params_batch_processed) - - From 21ee225e3cae5ece3c4f8909847e8d94f6358140 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sun, 28 Apr 2024 20:03:14 -0400 Subject: [PATCH 29/34] renamed idx to point_idx and view_idx to result_idx in backward --- cuda_rasterizer/backward.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 7121689..1a870d5 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -17,21 +17,21 @@ namespace cg = cooperative_groups; // Backward pass for conversion of spherical harmonics to RGB for // each Gaussian. -__device__ void computeColorFromSH(int idx, int view_idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs) +__device__ void computeColorFromSH(int point_idx, int result_idx, int deg, int max_coeffs, const glm::vec3* means, glm::vec3 campos, const float* shs, const bool* clamped, const glm::vec3* dL_dcolor, glm::vec3* dL_dmeans, glm::vec3* dL_dshs) { // Compute intermediate values, as it is done during forward - glm::vec3 pos = means[idx]; + glm::vec3 pos = means[point_idx]; glm::vec3 dir_orig = pos - campos; glm::vec3 dir = dir_orig / glm::length(dir_orig); - glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs; + glm::vec3* sh = ((glm::vec3*)shs) + point_idx * max_coeffs; // Use PyTorch rule for clamping: if clamping was applied, // gradient becomes 0. - glm::vec3 dL_dRGB = dL_dcolor[idx]; - dL_dRGB.x *= clamped[3 * view_idx + 0] ? 0 : 1; - dL_dRGB.y *= clamped[3 * view_idx + 1] ? 0 : 1; - dL_dRGB.z *= clamped[3 * view_idx + 2] ? 0 : 1; + glm::vec3 dL_dRGB = dL_dcolor[point_idx]; + dL_dRGB.x *= clamped[3 * result_idx + 0] ? 0 : 1; + dL_dRGB.y *= clamped[3 * result_idx + 1] ? 0 : 1; + dL_dRGB.z *= clamped[3 * result_idx + 2] ? 0 : 1; glm::vec3 dRGBdx(0, 0, 0); glm::vec3 dRGBdy(0, 0, 0); @@ -41,7 +41,7 @@ __device__ void computeColorFromSH(int idx, int view_idx, int deg, int max_coeff float z = dir.z; // Target location for this Gaussian to write SH gradients to - glm::vec3* dL_dsh = dL_dshs + idx * max_coeffs; + glm::vec3* dL_dsh = dL_dshs + point_idx * max_coeffs; // No tricks here, just high school-level calculus. float dRGBdsh0 = SH_C0; @@ -135,7 +135,7 @@ __device__ void computeColorFromSH(int idx, int view_idx, int deg, int max_coeff // Gradients of loss w.r.t. Gaussian means, but only the portion // that is caused because the mean affects the view-dependent color. // Additional mean gradient is accumulated in below methods. - dL_dmeans[idx] += glm::vec3(dL_dmean.x, dL_dmean.y, dL_dmean.z); + dL_dmeans[point_idx] += glm::vec3(dL_dmean.x, dL_dmean.y, dL_dmean.z); } // Backward version of INVERSE 2D covariance matrix computation From 363b4eedff49e453749cd0fed2136f2cf521c0b7 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Wed, 8 May 2024 00:30:35 -0400 Subject: [PATCH 30/34] moved from python time to torch record --- tests/rasterization_preprocess_test.py | 59 +++++++++++++++++--------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/tests/rasterization_preprocess_test.py b/tests/rasterization_preprocess_test.py index 171d1b1..5eebefa 100644 --- a/tests/rasterization_preprocess_test.py +++ b/tests/rasterization_preprocess_test.py @@ -1,5 +1,4 @@ import math -import time import pytest import torch @@ -10,8 +9,8 @@ GaussianRasterizerBatches, ) -num_gaussians = 10000 -num_batches = 32 +num_gaussians = 1000000 +num_batches = 64 SH_ACTIVE_DEGREE = 3 @@ -127,8 +126,10 @@ def run_batched_gaussian_rasterizer(setup_data): batched_depths = [] batched_rgb = [] - start_time = time.time() - + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + torch.cuda.synchronize() + start_event.record() for i, (viewpoint_camera, strategy) in enumerate(zip(batched_viewpoint_cameras, batched_strategies)): ########## [START] Prepare CUDA Rasterization Settings ########## cuda_args = get_cuda_args(strategy, mode) @@ -168,9 +169,10 @@ def run_batched_gaussian_rasterizer(setup_data): batched_conic_opacity.append(conic_opacity) batched_depths.append(depths) - end_time = time.time() - preprocess_time = end_time - start_time - print(f"Time taken by run_batched_gaussian_rasterizer: {preprocess_time:.4f} seconds") + end_event.record() + torch.cuda.synchronize() + elapsed_time_ms = start_event.elapsed_time(end_event) + print(f"Time taken by test_batched_gaussian_rasterizer: {elapsed_time_ms:.4f} ms") batched_means2D = torch.stack(batched_means2D, dim=0) batched_radii = torch.stack(batched_radii, dim=0) @@ -179,12 +181,18 @@ def run_batched_gaussian_rasterizer(setup_data): batched_depths = torch.stack(batched_depths, dim=0) zero_grad(means3D, scales, rotations, shs, opacity) - start_backward = time.time() + start_backward_event = torch.cuda.Event(enable_timing=True) + end_backward_event = torch.cuda.Event(enable_timing=True) + torch.cuda.synchronize() + start_backward_event.record() + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) loss.backward() - end_backward = time.time() - preproc_back = end_backward - start_backward - print(f"Time taken by run_batched_gaussian_rasterizer BACKWARD: {preproc_back:.4f} seconds") + + end_backward_event.record() + torch.cuda.synchronize() + backward_time_ms = start_backward_event.elapsed_time(end_backward_event) + print(f"Time taken by run_batched_gaussian_rasterizer BACKWARD: {backward_time_ms:.4f} ms") assert means3D.grad is not None, "Means3D gradient is None." assert scales.grad is not None, "Scales gradient is None." @@ -224,7 +232,10 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): ) = setup_data # Set up the input data - start_time = time.time() + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + torch.cuda.synchronize() + start_event.record() # Set up rasterization configuration for the batch raster_settings_batch = [] @@ -270,9 +281,10 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): opacities=opacity, batched_cuda_args=batched_cuda_args[0], # TODO: look into sending list of cuda_args/strategies ) - end_time = time.time() - preprocess_time = end_time - start_time - print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing: {preprocess_time:.4f} seconds") + end_event.record() + torch.cuda.synchronize() + elapsed_time_ms = start_event.elapsed_time(end_event) + print(f"Time taken by test_batched_gaussian_rasterizer: {elapsed_time_ms:.4f} ms") # Perform assertions on the preprocessed data @@ -294,12 +306,19 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): batched_screenspace_params.append(screenspace_params) zero_grad(means3D, scales, rotations, shs, opacity) - start_backward = time.time() + + start_backward_event = torch.cuda.Event(enable_timing=True) + end_backward_event = torch.cuda.Event(enable_timing=True) + torch.cuda.synchronize() + start_backward_event.record() + loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) loss.backward() - end_backward = time.time() - preproc_back = end_backward - start_backward - print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing BACKWARD: {preproc_back:.4f} seconds") + + end_backward_event.record() + torch.cuda.synchronize() + backward_time_ms = start_backward_event.elapsed_time(end_backward_event) + print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing BACKWARD: {backward_time_ms:.4f} ms") assert means3D.grad is not None, "Means3D gradient is None." assert scales.grad is not None, "Scales gradient is None." From 91d158224f94cb24c3d870f500e9524ab782b07e Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Wed, 8 May 2024 00:30:53 -0400 Subject: [PATCH 31/34] fixed num_points in preprocessForwardBatches --- cuda_rasterizer/rasterizer_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index 2ff28ba..c740222 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -472,8 +472,8 @@ int CudaRasterizer::Rasterizer::preprocessForwardBatches( // In sep_rendering==True case, we will compute tiles_touched in the renderForward. // TODO: remove it later by modifying FORWARD::preprocess when we deprecate sep_rendering==False case uint32_t* tiles_touched_temp_buffer; - CHECK_CUDA(cudaMalloc(&tiles_touched_temp_buffer, P * sizeof(uint32_t)), debug); - CHECK_CUDA(cudaMemset(tiles_touched_temp_buffer, 0, P * sizeof(uint32_t)), debug); + CHECK_CUDA(cudaMalloc(&tiles_touched_temp_buffer, num_viewpoints * P * sizeof(uint32_t)), debug); + CHECK_CUDA(cudaMemset(tiles_touched_temp_buffer, 0, num_viewpoints * P * sizeof(uint32_t)), debug); timer.start("10 preprocess"); // Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB) From ee767da5d5e7c422ca42d5c26ad7ca57febbf7e6 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 11 May 2024 10:00:30 -0400 Subject: [PATCH 32/34] Refactor test function names for clarity and consistency --- tests/rasterization_preprocess_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/rasterization_preprocess_test.py b/tests/rasterization_preprocess_test.py index 5eebefa..9d98bfd 100644 --- a/tests/rasterization_preprocess_test.py +++ b/tests/rasterization_preprocess_test.py @@ -172,7 +172,7 @@ def run_batched_gaussian_rasterizer(setup_data): end_event.record() torch.cuda.synchronize() elapsed_time_ms = start_event.elapsed_time(end_event) - print(f"Time taken by test_batched_gaussian_rasterizer: {elapsed_time_ms:.4f} ms") + print(f"Time taken by run_batched_gaussian_rasterizer: {elapsed_time_ms:.4f} ms") batched_means2D = torch.stack(batched_means2D, dim=0) batched_radii = torch.stack(batched_radii, dim=0) @@ -284,7 +284,7 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): end_event.record() torch.cuda.synchronize() elapsed_time_ms = start_event.elapsed_time(end_event) - print(f"Time taken by test_batched_gaussian_rasterizer: {elapsed_time_ms:.4f} ms") + print(f"Time taken by run_batched_gaussian_rasterizer_batch_processing: {elapsed_time_ms:.4f} ms") # Perform assertions on the preprocessed data From 2e7f032ef8b8935177fac6a9a5bd2786189fb5c4 Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 11 May 2024 11:02:15 -0400 Subject: [PATCH 33/34] fixed but in printing only first 5 non matching indices --- tests/rasterization_preprocess_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/rasterization_preprocess_test.py b/tests/rasterization_preprocess_test.py index 9d98bfd..0cf9414 100644 --- a/tests/rasterization_preprocess_test.py +++ b/tests/rasterization_preprocess_test.py @@ -359,7 +359,10 @@ def compare_tensors(tensor1, tensor2): else: print("Tensors have non-matching values.") non_matching_indices = torch.where(equality_matrix == False) - for idx in zip(*non_matching_indices[:5]): + num_non_matching = non_matching_indices[0].shape[0] + max_indices_to_print = min(5, num_non_matching) + for i in range(max_indices_to_print): + idx = tuple(index[i] for index in non_matching_indices) value1 = tensor1[idx].item() value2 = tensor2[idx].item() print(f"Non-matching values at index {idx}: {value1} != {value2}") From e8edb865befa8fded6a85d5ce4aef55b540bf84f Mon Sep 17 00:00:00 2001 From: Sandeep Menon Date: Sat, 11 May 2024 12:28:37 -0400 Subject: [PATCH 34/34] fixed backward bug of backward kernel not getting executed --- cuda_rasterizer/backward.cu | 11 +++++------ diff_gaussian_rasterization/__init__.py | 3 +-- tests/rasterization_preprocess_test.py | 8 ++++---- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 1a870d5..d0be2f3 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -41,9 +41,9 @@ __device__ void computeColorFromSH(int point_idx, int result_idx, int deg, int m float z = dir.z; // Target location for this Gaussian to write SH gradients to - glm::vec3* dL_dsh = dL_dshs + point_idx * max_coeffs; + glm::vec3 *dL_dsh = dL_dshs + result_idx * max_coeffs; - // No tricks here, just high school-level calculus. + // No tricks here, just high school-level calculus. float dRGBdsh0 = SH_C0; dL_dsh[0] = dRGBdsh0 * dL_dRGB; if (deg > 0) @@ -55,7 +55,7 @@ __device__ void computeColorFromSH(int point_idx, int result_idx, int deg, int m dL_dsh[2] = dRGBdsh2 * dL_dRGB; dL_dsh[3] = dRGBdsh3 * dL_dRGB; - dRGBdx = -SH_C1 * sh[3]; + dRGBdx = -SH_C1 * sh[3]; dRGBdy = -SH_C1 * sh[1]; dRGBdz = SH_C1 * sh[2]; @@ -75,7 +75,7 @@ __device__ void computeColorFromSH(int point_idx, int result_idx, int deg, int m dL_dsh[7] = dRGBdsh7 * dL_dRGB; dL_dsh[8] = dRGBdsh8 * dL_dRGB; - dRGBdx += SH_C2[0] * y * sh[4] + SH_C2[2] * 2.f * -x * sh[6] + SH_C2[3] * z * sh[7] + SH_C2[4] * 2.f * x * sh[8]; + dRGBdx += SH_C2[0] * y * sh[4] + SH_C2[2] * 2.f * -x * sh[6] + SH_C2[3] * z * sh[7] + SH_C2[4] * 2.f * x * sh[8]; dRGBdy += SH_C2[0] * x * sh[4] + SH_C2[1] * z * sh[5] + SH_C2[2] * 2.f * -y * sh[6] + SH_C2[4] * 2.f * -y * sh[8]; dRGBdz += SH_C2[1] * y * sh[5] + SH_C2[2] * 2.f * 2.f * z * sh[6] + SH_C2[3] * x * sh[7]; @@ -96,7 +96,7 @@ __device__ void computeColorFromSH(int point_idx, int result_idx, int deg, int m dL_dsh[14] = dRGBdsh14 * dL_dRGB; dL_dsh[15] = dRGBdsh15 * dL_dRGB; - dRGBdx += ( + dRGBdx += ( SH_C3[0] * sh[9] * 3.f * 2.f * xy + SH_C3[1] * sh[10] * yz + SH_C3[2] * sh[11] * -2.f * xy + @@ -563,7 +563,6 @@ __global__ void preprocessCUDABatched( auto point_idx = blockIdx.x * blockDim.x + threadIdx.x; auto viewpoint_idx = blockIdx.y; if (viewpoint_idx >= num_viewpoints || point_idx >= P) return; - return; auto idx = viewpoint_idx * P + point_idx; if (!(radii[idx] > 0)) diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index 1426142..318ee3c 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -129,8 +129,7 @@ def backward(ctx, grad_means2D, grad_rgb, grad_conic_opacity, grad_radii, grad_d # grad_means2D is (P, 2) now. Need to pad it to (P, 3) because preprocess_gaussians_backward's cuda implementation. grad_means2D_pad = torch.zeros_like(grad_means2D[..., :1], dtype = grad_means2D.dtype, device=grad_means2D.device) - grad_means2D = torch.cat((grad_means2D, grad_means2D_pad), dim = 1).contiguous() - + grad_means2D = torch.cat((grad_means2D, grad_means2D_pad), dim = -1).contiguous() # Restructure args as C++ method expects them args = (radii, cov3D, diff --git a/tests/rasterization_preprocess_test.py b/tests/rasterization_preprocess_test.py index 0cf9414..7ddaac9 100644 --- a/tests/rasterization_preprocess_test.py +++ b/tests/rasterization_preprocess_test.py @@ -67,8 +67,8 @@ def setup_data(): ) -def compute_dummy_loss(means3D, scales, rotations, shs, opacity): - losses = [(tensor - torch.ones_like(tensor)).pow(2).mean() for tensor in [means3D, scales, rotations, shs, opacity]] +def compute_dummy_loss(batched_means2D, batched_rgb, batched_conic_opacity): + losses = [(tensor - torch.ones_like(tensor)).pow(2).mean() for tensor in [batched_means2D, batched_conic_opacity, batched_rgb]] loss = sum(losses) return loss @@ -186,7 +186,7 @@ def run_batched_gaussian_rasterizer(setup_data): torch.cuda.synchronize() start_backward_event.record() - loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) + loss = compute_dummy_loss(batched_means2D, batched_rgb, batched_conic_opacity) loss.backward() end_backward_event.record() @@ -312,7 +312,7 @@ def run_batched_gaussian_rasterizer_batch_processing(setup_data): torch.cuda.synchronize() start_backward_event.record() - loss = compute_dummy_loss(means3D, scales, rotations, shs, opacity) + loss = compute_dummy_loss(batched_means2D, batched_rgb, batched_conic_opacity) loss.backward() end_backward_event.record()