From cdc3fe2ef2fbb4d9fbabfba67c66a779f18d48b8 Mon Sep 17 00:00:00 2001 From: lilkeet Date: Sat, 22 Feb 2025 21:48:18 -0600 Subject: [PATCH 1/5] new destroy hook form for cuda tensor pointers --- src/arraymancer/tensor/backend/cuda.nim | 2 +- src/arraymancer/tensor/data_structure.nim | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arraymancer/tensor/backend/cuda.nim b/src/arraymancer/tensor/backend/cuda.nim index 23e6a73c..f90f9a1b 100644 --- a/src/arraymancer/tensor/backend/cuda.nim +++ b/src/arraymancer/tensor/backend/cuda.nim @@ -33,7 +33,7 @@ proc cudaMalloc*[T](size: Natural): ptr UncheckedArray[T] {.noSideEffect, inline proc newCudaStorage*[T: SomeFloat](length: int): CudaStorage[T] {.noSideEffect.}= result.Flen = length - new(result.Fref_tracking, deallocCuda) + new result.Fref_tracking result.Fdata = cast[ptr UncheckedArray[T]](cudaMalloc[T](result.Flen)) result.Fref_tracking.value = result.Fdata diff --git a/src/arraymancer/tensor/data_structure.nim b/src/arraymancer/tensor/data_structure.nim index 5d7166cf..a6d6d75a 100644 --- a/src/arraymancer/tensor/data_structure.nim +++ b/src/arraymancer/tensor/data_structure.nim @@ -79,12 +79,12 @@ type AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T] - -proc deallocCuda*[T](p: CudaTensorRefTracker[T]) {.noSideEffect.}= +proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= if not p.value.isNil: check cudaFree(p.value) + # ############### # Field accessors # ############### From 2064d7e5ea7efe5df5a8d367bd86ba55b639ba6c Mon Sep 17 00:00:00 2001 From: lilkeet Date: Sat, 22 Feb 2025 21:52:47 -0600 Subject: [PATCH 2/5] new form for hook for unified memory as well --- src/arraymancer/tensor/backend/cuda.nim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arraymancer/tensor/backend/cuda.nim b/src/arraymancer/tensor/backend/cuda.nim index f90f9a1b..87952618 100644 --- a/src/arraymancer/tensor/backend/cuda.nim +++ b/src/arraymancer/tensor/backend/cuda.nim @@ -88,7 +88,7 @@ type len*: cint # Number of elements allocated in memory -proc deallocCuda*(p: CudaLayoutArray) {.noSideEffect.}= +proc `=destroy`*(p: CudaLayoutArrayObj) {.noSideEffect.}= if not p.value.isNil: check cudaFree(p.value) @@ -104,8 +104,8 @@ proc layoutOnDevice*[T:SomeFloat](t: CudaTensor[T]): CudaTensorLayout[T] {.noSid result.data = t.get_data_ptr result.len = t.size.cint - new result.shape, deallocCuda - new result.strides, deallocCuda + new result.shape + new result.strides result.shape.value = cudaMalloc[cint](MAXRANK) result.strides.value = cudaMalloc[cint](MAXRANK) From 5f5e75b38787ce74d7eb0ead24940eede8afa35a Mon Sep 17 00:00:00 2001 From: lilkeet Date: Sat, 22 Feb 2025 22:13:14 -0600 Subject: [PATCH 3/5] remove bad form of error throwing on destructors. worst case we'll leak. --- src/arraymancer/tensor/backend/cuda.nim | 2 +- src/arraymancer/tensor/data_structure.nim | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arraymancer/tensor/backend/cuda.nim b/src/arraymancer/tensor/backend/cuda.nim index 87952618..9c08e477 100644 --- a/src/arraymancer/tensor/backend/cuda.nim +++ b/src/arraymancer/tensor/backend/cuda.nim @@ -90,7 +90,7 @@ type proc `=destroy`*(p: CudaLayoutArrayObj) {.noSideEffect.}= if not p.value.isNil: - check cudaFree(p.value) + discard cudaFree(p.value) proc layoutOnDevice*[T:SomeFloat](t: CudaTensor[T]): CudaTensorLayout[T] {.noSideEffect.}= ## Store a CudaTensor shape, strides, etc information on the GPU diff --git a/src/arraymancer/tensor/data_structure.nim b/src/arraymancer/tensor/data_structure.nim index a6d6d75a..8cb33651 100644 --- a/src/arraymancer/tensor/data_structure.nim +++ b/src/arraymancer/tensor/data_structure.nim @@ -16,7 +16,7 @@ import ../laser/dynamic_stack_arrays, ../laser/tensor/datatypes, nimblas, - nimcuda/cuda12_5/[cuda_runtime_api, check], + nimcuda/cuda12_5/[cuda_runtime_api], # Standard library std/[complex] @@ -81,7 +81,7 @@ type proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= if not p.value.isNil: - check cudaFree(p.value) + discard cudaFree(p.value) From 30580cb4aedb7d4a3a8511ffb3ac00c01c29bc91 Mon Sep 17 00:00:00 2001 From: lilkeet Date: Sat, 22 Feb 2025 22:22:17 -0600 Subject: [PATCH 4/5] nim 1.x.x compat --- src/arraymancer/tensor/backend/cuda.nim | 13 ++++++++++--- src/arraymancer/tensor/data_structure.nim | 11 ++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/arraymancer/tensor/backend/cuda.nim b/src/arraymancer/tensor/backend/cuda.nim index 9c08e477..4101fb03 100644 --- a/src/arraymancer/tensor/backend/cuda.nim +++ b/src/arraymancer/tensor/backend/cuda.nim @@ -88,9 +88,16 @@ type len*: cint # Number of elements allocated in memory -proc `=destroy`*(p: CudaLayoutArrayObj) {.noSideEffect.}= - if not p.value.isNil: - discard cudaFree(p.value) +when NimMajor == 1: + proc `=destroy`*(p: var CudaLayoutArrayObj) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) +else: + proc `=destroy`*(p: CudaLayoutArrayObj) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) + + proc layoutOnDevice*[T:SomeFloat](t: CudaTensor[T]): CudaTensorLayout[T] {.noSideEffect.}= ## Store a CudaTensor shape, strides, etc information on the GPU diff --git a/src/arraymancer/tensor/data_structure.nim b/src/arraymancer/tensor/data_structure.nim index 8cb33651..47c62d20 100644 --- a/src/arraymancer/tensor/data_structure.nim +++ b/src/arraymancer/tensor/data_structure.nim @@ -79,9 +79,14 @@ type AnyTensor*[T] = Tensor[T] or CudaTensor[T] or ClTensor[T] -proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= - if not p.value.isNil: - discard cudaFree(p.value) +when NimMajor == 1: + proc `=destroy`*[T](p: var CudaTensorRefTrackerObj[T]) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) +else: + proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) From 2539c2c867dea53261a70bcc73cb292a6918f45a Mon Sep 17 00:00:00 2001 From: lilkeet Date: Sun, 2 Mar 2025 20:37:11 -0600 Subject: [PATCH 5/5] move destructor hooks to match #676 --- src/arraymancer/tensor/data_structure.nim | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/arraymancer/tensor/data_structure.nim b/src/arraymancer/tensor/data_structure.nim index 5f5e71b3..7a282a92 100644 --- a/src/arraymancer/tensor/data_structure.nim +++ b/src/arraymancer/tensor/data_structure.nim @@ -57,9 +57,14 @@ when defined(cuda): offset*: int storage*: CudaStorage[T] - proc deallocCuda*[T](p: CudaTensorRefTracker[T]) {.noSideEffect.}= - if not p.value.isNil: - check cudaFree(p.value) + when NimMajor == 1: + proc `=destroy`*[T](p: var CudaTensorRefTrackerObj[T]) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) + else: + proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= + if not p.value.isNil: + discard cudaFree(p.value) when defined(opencl): type @@ -96,15 +101,6 @@ else: type GpuTensor[T] = AnyTensor[T] and not Tensor[T] -when NimMajor == 1: - proc `=destroy`*[T](p: var CudaTensorRefTrackerObj[T]) {.noSideEffect.}= - if not p.value.isNil: - discard cudaFree(p.value) -else: - proc `=destroy`*[T](p: CudaTensorRefTrackerObj[T]) {.noSideEffect.}= - if not p.value.isNil: - discard cudaFree(p.value) - # ###############