FluxML · mcabbott · Nov 28, 2022 · Nov 23, 2022 · Nov 23, 2022 · Nov 23, 2022
diff --git a/src/NNlib.jl b/src/NNlib.jl
@@ -61,6 +61,9 @@ export conv, conv!, ∇conv_data, ∇conv_data!, ∇conv_filter,
 include("conv_bias_act.jl")
 export conv_bias_act, conv_bias_act!
 
+include("fold.jl")
+export unfold, unfold!, fold, fold!
+
 include("ctc.jl")
 export ctc_loss
 

diff --git a/src/fold.jl b/src/fold.jl
@@ -0,0 +1,137 @@
+
+"""
+    unfold(x, kernel_size; stride = 1, pad = 0, dilation = 0, flipped = false)
+
+Places sliding windows of x into a container tensor of size (num_windows, window_size, batchsize).
+The window size is determined by the prod(spatial dims of kernel)*input_channels.
+The number of sliding windows will match those of convolution (conv) with the same kernel_size and arguments.
+Uses NNlib.im2col! as backend.
+"""
+function unfold(x::AbstractArray{T, N}, kernel_size::NTuple{K}; stride = 1, pad = 0, dilation = 1, flipped = false) where {T, K, N}
+    stride = expand(Val(N - 2), stride)
+    padding = expand(Val(N - 2), pad)
+    dilation = expand(Val(N - 2), dilation)
+    cdims = DenseConvDims(size(x), kernel_size; stride, padding, dilation, flipkernel=flipped)
+    return unfold(x, cdims)
+end
+
+"""
+    fold(y, output_size, kernel_size; stride = 1, pad = 0, dilation = 0, flipped = false)
+
+Accumulates sliding windows from the output of unfold into a container tensor of size `output_size`.
+An inverse to `unfold` can be obtained by using `fold` and accounting for scaling issues. 
+For example,
+
+```jldoctest
+julia> kernel_size, pad = (3, 3, 1, 1), 1;
+
+julia> x = reshape(1:64, 8, 8, 1, 1) |> collect;
+
+julia> y = unfold(x, kernel_size; pad=pad);
+
+julia> size(y)
+(64, 9, 1)
+
+julia> z = fold(y, size(x), kernel_size; pad=pad);
+
+julia> d = fold(unfold(ones(eltype(x), size(x)...), kernel_size; pad=pad), size(x), kernel_size; pad=pad)
+8×8×1×1 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 4  6  6  6  6  6  6  4
+ 6  9  9  9  9  9  9  6
+ 6  9  9  9  9  9  9  6
+ 6  9  9  9  9  9  9  6
+ 6  9  9  9  9  9  9  6
+ 6  9  9  9  9  9  9  6
+ 6  9  9  9  9  9  9  6
+ 4  6  6  6  6  6  6  4
+
+julia> x == z./d
+true
+
+```
+Uses NNlib.col2im! as backend.
+"""
+function fold(x::AbstractArray{T, 3}, output_size::NTuple{N}, kernel_size::NTuple{K}; stride = 1, pad = 0, dilation = 1, flipped = false) where {T, K, N}
+    stride = expand(Val(N - 2), stride)
+    padding = expand(Val(N - 2), pad)
+    dilation = expand(Val(N - 2), dilation)
+    cdims = DenseConvDims(output_size, kernel_size; stride, padding, dilation, flipkernel=flipped)
+    return fold(x, output_size, cdims)
+end
+
+# im2col_dims returns (numblocks, blocksize, threadnum) where thread dim is used as thread-local
+# workspace for multithreaded conv. Ultimately, we want to threadnum with batchsize.
+unfold_dims(cdims::DenseConvDims) = im2col_dims(cdims)[1:2]
+
+# auto-allocating versions
+function unfold(x::AbstractArray{T, N}, cdims::DenseConvDims) where {T, N}
+    y = similar(x, unfold_dims(cdims)..., size(x, N)) # (numblocks, blocksize, batchsize)
+    return unfold!(y, x, cdims)
+end
+
+function fold(y::AbstractArray{T, 3}, output_size::NTuple, cdims::DenseConvDims) where {T}
+    x = similar(y, output_size) 
+    return fold!(x, y, cdims)
+end
+
+# N < 5 -dimension in-place versions 
+function unfold!(y::AbstractArray{yT, 3}, x::AbstractArray{xT, N}, cdims::DenseConvDims) where {yT, xT, N}
+    unfold!(
+        y, 
+        insert_singleton_spatial_dimension(x, 5-N), 
+        insert_singleton_spatial_dimension(cdims, 5-N), 
+    )
+    return y
+end
+
+function fold!(x::AbstractArray{xT, N}, y::AbstractArray{yT, 3}, cdims::DenseConvDims) where {yT, xT, N}
+    fold!(
+        insert_singleton_spatial_dimension(x, 5-N), 
+        y,
+        insert_singleton_spatial_dimension(cdims, 5-N), 
+    )
+    return x
+end
+
+# 5-dimension in-place versions 
+function unfold!(y::AbstractArray{yT, 3}, x::AbstractArray{xT, 5}, cdims::DenseConvDims) where {yT, xT}
+    @threads for batch_idx in 1:size(x, 5)
+        y_slice = view(y, :, :, batch_idx)
+        im2col!(y_slice, view(x, :, :, :, :, batch_idx), cdims)
+    end
+    return y
+end
+
+function fold!(x::AbstractArray{xT, 5}, y::AbstractArray{yT, 3}, cdims::DenseConvDims) where {xT, yT}
+    @threads for batch_idx in 1:size(x, 5)
+        y_slice = view(y, :, :, batch_idx)
+        col2im!(view(x, :, :, :, :, batch_idx), y_slice, cdims)
+    end
+    return x
+end
+
+# reverse diff rules
+function rrule(::typeof(unfold), x, cdims::DenseConvDims; kw...)
+    function unfold_pullback(Δ)
+        return (
+            NoTangent(),
+            fold(unthunk(Δ), size(x), cdims; kw...),
+            NoTangent(),
+        )
+    end
+    return unfold(x, cdims; kw...), unfold_pullback
+end
+
+function rrule(::typeof(fold), x, output_size, cdims::DenseConvDims; kw...)
+    function fold_pullback(Δ)
+        return (
+            NoTangent(),
+            unfold(unthunk(Δ), cdims; kw...),
+            NoTangent(),
+            NoTangent(),
+        )
+    end
+    return fold(x, output_size, cdims; kw...), fold_pullback
+end
+
diff --git a/test/fold.jl b/test/fold.jl
@@ -0,0 +1,38 @@
+using NNlib, Test
+
+@testset "unfold wrapper" begin
+    x = rand(rng, 16, 16, 3, 10)
+    w = rand(rng, 5, 5, 3, 2)
+    @test size(unfold(x, size(w))) == (144, 75, 10)
+    @test size(unfold(x, size(w); pad=2)) == (256, 75, 10)
+    @test size(unfold(x, size(w); stride=2)) == (36, 75, 10)
+    @test size(unfold(x, size(w); dilation=2)) == (64, 75, 10)
+end
+
+@testset "Inverses: spatial_rank=$spatial_rank" for spatial_rank in (1, 2, 3)
+    x = rand(rng, repeat([8], spatial_rank)..., 3, 2)
+    w = rand(rng, repeat([3], spatial_rank)..., 3, 3)
+    cdims = DenseConvDims(x, w; padding=1)
+    y = unfold(x, cdims)
+    z = fold(y, size(x), cdims)
+    divisor = fold(unfold(ones(eltype(x), size(x)...), cdims), size(x), cdims)
+    @test isapprox(z ./ divisor, x, rtol=1.0e-7)
+
+    # introduce stride
+    cdims = DenseConvDims(x, w; padding=1, stride=2)
+    y = unfold(x, cdims)
+    z = fold(y, size(x), cdims)
+    divisor = fold(unfold(ones(eltype(x), size(x)...), cdims), size(x), cdims)
+    @test isapprox(z ./ divisor, x, rtol=1.0e-7)
+end
+
+@testset "AutoDiff: spatial_rank=$spatial_rank" for spatial_rank in (1, 2, 3)
+    x = rand(rng, repeat([5], spatial_rank)..., 3, 2)
+    w = rand(rng, repeat([3], spatial_rank)..., 3, 3)
+    cdims = DenseConvDims(x, w)
+    gradtest(x -> sum(unfold(x, cdims)), x)
+
+    y = unfold(x, cdims)
+    gradtest(y -> sum(fold(y, size(x), cdims)), y)
-    gradtest(x -> sum(unfold(x, cdims)), x)
-    
-    y = unfold(x, cdims)
-    gradtest(y -> sum(fold(y, size(x), cdims)), y)
+    gradtest(unfold, x, cdims; check_rrule=true)
+    
+    y = unfold(x, cdims)
+    gradtest(fold, y, size(x), cdims; check_rrule=true)
-    gradtest(x -> sum(unfold(x, cdims)), x)
-    
-    y = unfold(x, cdims)
-    gradtest(y -> sum(fold(y, size(x), cdims)), y)
+    gradtest(unfold, x, cdims; check_rrule=true)
+    
+    y = unfold(x, cdims)
+    gradtest(fold, y, size(x), cdims; check_rrule=true)
+end
+
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -52,6 +52,10 @@ include("test_utils.jl")
         include("ctc.jl")
     end
 
+    @testset "Fold/Unfold" begin
+        include("fold.jl")
+    end
+
     @testset "Inference" begin
         include("inference.jl")
     end