diff --git a/Project.toml b/Project.toml index 197e50d..3bc3ad4 100755 --- a/Project.toml +++ b/Project.toml @@ -1,16 +1,14 @@ name = "ClusterDepth" uuid = "c8d8bbfa-f476-4995-adff-2987f04015d1" authors = ["Benedikt V. Ehinger", "Maanik Marathe"] -version = "0.2.0" +version = "0.2.1" [deps] -Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Images = "0.25" Random = "1" SparseArrays = "1" StatsBase = "0.33, 0.34" diff --git a/docs/make.jl b/docs/make.jl index 67b682e..7385e2f 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,7 +2,7 @@ using ClusterDepth using Documenter using Glob using Literate -DocMeta.setdocmeta!(ClusterDepth, :DocTestSetup, :(using ClusterDepth); recursive=true) +DocMeta.setdocmeta!(ClusterDepth, :DocTestSetup, :(using ClusterDepth); recursive = true) GENERATED = joinpath(@__DIR__, "src") @@ -13,17 +13,17 @@ for subfolder ∈ ["explanations", "HowTo", "tutorials", "reference"] end makedocs(; - modules=[ClusterDepth], - authors="Benedikt V. Ehinger, Maanik Marathe", - repo="https://github.com/s-ccs/ClusterDepth.jl/blob/{commit}{path}#{line}", - sitename="ClusterDepth.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://s-ccs.github.io/ClusterDepth.jl", - edit_link="main", - assets=String[], + modules = [ClusterDepth], + authors = "Benedikt V. Ehinger, Maanik Marathe", + repo = "https://github.com/s-ccs/ClusterDepth.jl/blob/{commit}{path}#{line}", + sitename = "ClusterDepth.jl", + format = Documenter.HTML(; + prettyurls = get(ENV, "CI", "false") == "true", + canonical = "https://s-ccs.github.io/ClusterDepth.jl", + edit_link = "main", + assets = String[], ), - pages=[ + pages = [ "Home" => "index.md", "Tutorials" => [ "An EEG Example" => "tutorials/eeg.md", @@ -36,7 +36,4 @@ makedocs(; ], ) -deploydocs(; - repo="github.com/s-ccs/ClusterDepth.jl", - devbranch="main", -) +deploydocs(; repo = "github.com/s-ccs/ClusterDepth.jl", devbranch = "main") diff --git a/docs/src/reference/type1.jl b/docs/src/reference/type1.jl index 8322e17..44c872a 100755 --- a/docs/src/reference/type1.jl +++ b/docs/src/reference/type1.jl @@ -15,7 +15,11 @@ using DataFrames # ## Setup Simulation # Let's setup a simulation using UnfoldSim.jl. We simulate a simple 1x2 design with 20 subjects n_subjects = 20 -design = MultiSubjectDesign(n_subjects=n_subjects, n_items=2, items_between=Dict(:condition => ["small", "large"])) +design = MultiSubjectDesign( + n_subjects=n_subjects, + n_items=2, + items_between=Dict(:condition => ["small", "large"]), +) first(generate_events(design), 5) # @@ -33,11 +37,19 @@ signal = MixedModelComponent(; # Let's move the actual simulation into a function, so we can call it many times. # Note that we use (`RedNoise`)[https://unfoldtoolbox.github.io/UnfoldSim.jl/dev/literate/reference/noisetypes/] which has lot's of Autocorrelation between timepoints. nice! function run_fun(r) - data, events = simulate(MersenneTwister(r), design, signal, UniformOnset(; offset=5, width=4), RedNoise(noiselevel=1); return_epoched=true) + data, events = simulate( + MersenneTwister(r), + design, + signal, + UniformOnset(; offset=5, width=4), + RedNoise(noiselevel=1); + return_epoched=true, + ) data = reshape(data, size(data, 1), :) data = data[:, events.condition.=="small"] .- data[:, events.condition.=="large"] - return data, clusterdepth(data'; τ=quantile(TDist(n_subjects - 1), 0.95), nperm=1000) + return data, + clusterdepth(data'; τ=quantile(TDist(n_subjects - 1), 0.95), nperm=1000) end; # ## Understanding the simulation @@ -45,7 +57,12 @@ end; data, pval = run_fun(5) conditionSmall = data[:, 1:2:end] conditionLarge = data[:, 2:2:end] -pval_uncorrected = 1 .- cdf.(TDist(n_subjects - 1), abs.(ClusterDepth.studentt(conditionSmall .- conditionLarge))) +pval_uncorrected = + 1 .- + cdf.( + TDist(n_subjects - 1), + abs.(ClusterDepth.studentt(conditionSmall .- conditionLarge)), + ) sig = pval_uncorrected .<= 0.025; # For the uncorrected p-values based on the t-distribution, we get a type1 error over "time": @@ -67,8 +84,8 @@ sig = allowmissing(sig) sig[sig.==0] .= missing @show sum(skipmissing(sig)) lines!(sig, color=:gray, linewidth=4) -lines!(ax, mean(conditionSmall, dims=2)[:, 1], solid_color=:red) -lines!(ax, mean(conditionLarge, dims=2)[:, 1], solid_color=:blue) +lines!(ax, mean(conditionSmall, dims=2)[:, 1], color=:red) +lines!(ax, mean(conditionLarge, dims=2)[:, 1], color=:blue) hist!(Axis(f[3, 1], title="uncorrected pvalues"), pval_uncorrected, bins=0:0.01:1.1) hist!(Axis(f[3, 2], title="clusterdepth corrected pvalues"), pval, bins=0:0.01:1.1) @@ -81,12 +98,13 @@ res = fill(NaN, reps, 2) Threads.@threads for r = 1:reps data, pvals = run_fun(r) res[r, 1] = mean(pvals .<= 0.05) - res[r, 2] = mean(abs.(ClusterDepth.studentt(data)) .>= quantile(TDist(n_subjects - 1), 0.975)) + res[r, 2] = + mean(abs.(ClusterDepth.studentt(data)) .>= quantile(TDist(n_subjects - 1), 0.975)) end; # Finally, let's calculate the percentage of simulations where we find a significant effect somewhere mean(res .> 0, dims=1) |> x -> (:clusterdepth => x[1], :uncorrected => x[2]) -# Nice, correction seems to work in principle :) Clusterdepth is not exactly 5%, but with more repetitions we should get there (e.g. with 5000 repetitions, we get 0.051%). +# Nice, correction seems to work in principle :) Clusterdepth is not necessarily exactly 5%, but with more repetitions we should get there (e.g. with 5000 repetitions, we got 0.051%). # !!! info -# if you look closely, the `:uncorrected` value (around 60%) is not as bad as the 99% promised in the introduction. This is due to the correlation between the tests introduced by the noise. Indeed, a good exercise is to repeat everything, but put `RedNoise` to `WhiteNoise` \ No newline at end of file +# if you look closely, the `:uncorrected` value (can be around 60%) is not as bad as the 99% promised in the introduction. This is due to the correlation between the tests introduced by the noise. Indeed, a good exercise is to repeat everything, but put `RedNoise` to `WhiteNoise` diff --git a/docs/src/reference/type1_troendle.jl b/docs/src/reference/type1_troendle.jl index ecf80b4..4ef8fc5 100755 --- a/docs/src/reference/type1_troendle.jl +++ b/docs/src/reference/type1_troendle.jl @@ -50,23 +50,22 @@ end; res = any(pvals_all[:, :, :] .<= 0.05, dims=3)[:, :, 1] mean(res .> 0, dims=1) |> x -> (:troendle => x[1], :uncorrected => x[2]) -# Nice. Troendle fits perfectly and the uncorrected is pretty close to what we calculated above! +# Nice. Troendle corrects the data and we are below 0.05. The uncorrected simulations are close to what we calculated above! # Finally we end this with a short figure to get a better idea of how this data looks like and a histogram of the p-values f = Figure() -ax = f[1, 1] = Axis(f) +ax = f[1, 1] = Axis(f, title="t-values") lines!(ax, abs.(ClusterDepth.studentt(data))) -heatmap!(Axis(f[2, 1]), data) -series!(Axis(f[2, 2]), data[:, 1:7]') -h1 = scatter!(Axis(f[1, 2]; yscale=log10), pvals, label="troendle") +heatmap!(Axis(f[1, 2], title="heatmap of data"), data) +series!(Axis(f[2, 2], title="data: subset of left plot"), data[:, 1:7]') +h1 = scatter!(Axis(f[2, 1]; yscale=log10, title="troendle pvals"), pvals, label="troendle") hlines!([0.05, 0.01]) -hist!(Axis(f[3, 1]), pvals_all[:, 1, :][:], bins=0:0.01:1.1) -hist!(Axis(f[3, 2]), pvals_all[:, 2, :][:], bins=0:0.01:1.1) +hist!(Axis(f[3, 1], title="troendle corrected"), pvals_all[:, 1, :][:], bins=0:0.01:1.1) +hist!(Axis(f[3, 2], title="uncorrected"), pvals_all[:, 2, :][:], bins=0:0.01:1.1) f - diff --git a/docs/src/tutorials/demo.jl b/docs/src/tutorials/demo.jl index d2ae1a1..636de95 100755 --- a/docs/src/tutorials/demo.jl +++ b/docs/src/tutorials/demo.jl @@ -3,34 +3,34 @@ using Random using CairoMakie -n_t =40 # timepoints +n_t = 40 # timepoints n_sub = 50 n_perm = 5000 snr = 0.5 # signal to nois ## add a signal to the middle -signal = vcat(zeros(n_t÷4), sin.(range(0,π,length=n_t÷2)), zeros(n_t÷4)) +signal = vcat(zeros(n_t ÷ 4), sin.(range(0, π, length = n_t ÷ 2)), zeros(n_t ÷ 4)) ## same signal for all subs -signal = repeat(signal,1,n_sub) +signal = repeat(signal, 1, n_sub) ## add noise -data = randn(MersenneTwister(123),n_t,n_sub).+ snr .* signal +data = randn(MersenneTwister(123), n_t, n_sub) .+ snr .* signal ## by default assumes τ=2.3 (~alpha=0.05), and one-sample ttest @time pvals = clusterdepth(data); f = Figure() -ax = f[1,1] = Axis(f) +ax = f[1, 1] = Axis(f) lines!(abs.(ClusterDepth.studentt(data))) -h1 = scatter(f[1,2],pvals;axis=(;yscale=log10),label="troendle") +h1 = scatter(f[1, 2], pvals; axis = (; yscale = log10), label = "troendle") -pvals2 = clusterdepth(data;pval_type=:naive) -h2 = scatter!(1.2:40.2,pvals2,color="red",label="naive") +pvals2 = clusterdepth(data; pval_type = :naive) +h2 = scatter!(1.2:40.2, pvals2, color = "red", label = "naive") #hlines!(([0.05])) axislegend() -f \ No newline at end of file +f diff --git a/docs/src/tutorials/eeg-multichannel.jl b/docs/src/tutorials/eeg-multichannel.jl index 675ff9a..6fdfc6b 100755 --- a/docs/src/tutorials/eeg-multichannel.jl +++ b/docs/src/tutorials/eeg-multichannel.jl @@ -6,16 +6,18 @@ using Unfold using UnfoldMakie using Statistics -# ## How to use clusterDepth multiple comparison correction on multichannel data +# # How to use the ClusterDepth multiple comparison correction on multichannel data -# This tutorial is adapted from the first EEG example and uses the HArtMuT NYhead model (https://github.com/harmening/HArtMuT) to simulate multiple channels. +# This tutorial is adapted from the single-channel EEG example, and complements it with the HArtMuT NYhead model (https://github.com/harmening/HArtMuT) to simulate multiple channels. # First set up the EEG simulation as before, with one subject and 40 trials: -design = SingleSubjectDesign(conditions=Dict(:condition => ["car", "face"])) |> x -> RepeatDesign(x, 40); +design = + SingleSubjectDesign(conditions=Dict(:condition => ["car", "face"])) |> + x -> RepeatDesign(x, 40); p1 = LinearModelComponent(; basis=p100(; sfreq=250), formula=@formula(0 ~ 1), - β=[1.0] + β=[1.0], ); n170 = LinearModelComponent(; @@ -39,7 +41,10 @@ src_coords = [ ]; headmodel_HArtMuT = headmodel() -get_closest = coord -> UnfoldSim.closest_src(coord, headmodel_HArtMuT.cortical["pos"]) |> pi -> magnitude(headmodel_HArtMuT; type="perpendicular")[:, pi] +get_closest = + coord -> + UnfoldSim.closest_src(coord, headmodel_HArtMuT.cortical["pos"]) |> + pi -> magnitude(headmodel_HArtMuT; type="perpendicular")[:, pi] p1_l = p1 |> c -> MultichannelComponent(c, get_closest([-20, -78, -10])) p1_r = p1 |> c -> MultichannelComponent(c, get_closest([20, -78, -10])) @@ -47,38 +52,55 @@ n170_r = n170 |> c -> MultichannelComponent(c, get_closest([50, -40, -25])) p300_do = p300 |> c -> MultichannelComponent(c, get_closest([0, -50, -40])) p300_up = p300 |> c -> MultichannelComponent(c, get_closest([0, 5, 20])) -data, events = simulate(MersenneTwister(1), design, [p1_l, p1_r, n170_r, p300_do, p300_up], +data, events = simulate( + MersenneTwister(1), + design, + [p1_l, p1_r, n170_r, p300_do, p300_up], UniformOnset(; offset=0.5 * 250, width=100), - RedNoise(noiselevel=1); return_epoched=true); + RedNoise(noiselevel=1); + return_epoched=true, +); # ## Plotting # This is what the data looks like, for one channel/trial respectively: f = Figure() Axis(f[1, 1], title="Single channel, all trials", xlabel="time", ylabel="y") -series!(data[1, :, :]', solid_color=:black) +series!(data[1, :, :]', solid_color=(:black, 0.1)) lines!(mean(data[1, :, :], dims=2)[:, 1], color=:red) - hlines!([0], color=:gray) + Axis(f[2, 1], title="All channels, average over trials", xlabel="time", ylabel="y") -series!(mean(data, dims=3)[:, :, 1], solid_color=:black) +series!(mean(data, dims=3)[:, :, 1], solid_color=(:black, 0.1)) hlines!([0], color=:gray) f # And some topoplots: -positions = [Point2f(p[1] + 0.5, p[2] + 0.5) for p in to_positions(headmodel_HArtMuT.electrodes["pos"]')] +positions = [ + Point2f(p[1] + 0.5, p[2] + 0.5) for + p in to_positions(headmodel_HArtMuT.electrodes["pos"]') +] -df = UnfoldMakie.eeg_matrix_to_dataframe(mean(data, dims=3)[:, :, 1], string.(1:length(positions))); +df = UnfoldMakie.eeg_array_to_dataframe( + mean(data, dims=3)[:, :, 1], + string.(1:length(positions)), +); Δbin = 20 # 20 samples / bin -plot_topoplotseries(df, Δbin; positions=positions, visual=(; enlarge=1, label_scatter=false)) +plot_topoplotseries( + df, + bin_width=20, + positions=positions, + visual=(; enlarge=1, label_scatter=false), +) # ## ClusterDepth # Now that the simulation is done, let's try out ClusterDepth and plot our results # Note that this is a simple test of "activity" vs. 0 -pvals = clusterdepth(data; τ=1.6, nperm=100); -fig, ax, hm = heatmap(transpose(pvals)) +pvals = clusterdepth(data; τ=1.6, nperm=200); +fig, ax, hm = heatmap(transpose(pvals), colorscale=log10) ax.title = "pvals"; ax.xlabel = "time"; ax.ylabel = "channel"; Colorbar(fig[:, end+1], hm); fig +loopvectorization \ No newline at end of file diff --git a/docs/src/tutorials/eeg.jl b/docs/src/tutorials/eeg.jl index eab8748..970b062 100755 --- a/docs/src/tutorials/eeg.jl +++ b/docs/src/tutorials/eeg.jl @@ -8,17 +8,23 @@ using DataFrames using Unfold using UnfoldMakie -# ## How to use clusterDepth multiple comparison correction +# # How to use the ClusterDepth multiple comparison correction + # !!! info # This tutorial focuses on single-channel data. For multichannel data, see the tutorial "Further EEG Example". - -# Let's setup an EEG simulation using UnfoldSim.jl. We simulate a simple 1x2 design with 20 subjects, each with 40 trials +# ## Simulating test-data +# Let's setup an EEG simulation using UnfoldSim.jl. We simulate a one factor 1x2 design with 20 subjects, each with 40 trials n_subjects = 20 -design = MultiSubjectDesign(n_subjects=n_subjects, n_items=40, items_between=Dict(:condition => ["car", "face"])) -first(generate_events(design), 5) - -# next we define a ground-truth signal + relation to events/design with Wilkinson Formulas -# let's simulate a P100, a N170 and a P300 - but an effect only on the N170 +design = MultiSubjectDesign( + n_subjects=n_subjects, + n_items=40, + items_between=Dict(:condition => ["car", "face"]), +) +first(generate_events(design), 3) + +# Next we define a ground-truth signal based on a Linear Mixed Model. +# +# We simulate a **P100**, a **N170** and a **P300** - but an effect only on the **N170** p1 = MixedModelComponent(; basis=UnfoldSim.p100(; sfreq=250), formula=@formula(0 ~ 1 + (1 | subject)), @@ -39,17 +45,29 @@ p300 = MixedModelComponent(; σs=Dict(:subject => [1, 1.0]), # but a random slope for condition ); -## Start the simulation -data, events = simulate(MersenneTwister(1), design, [p1, n170, p300], UniformOnset(; offset=500, width=100), RedNoise(noiselevel=1); return_epoched=true) +data, events = simulate( + MersenneTwister(1), + design, + [p1, n170, p300], + UniformOnset(; offset=500, width=100), + RedNoise(noiselevel=1); + return_epoched=true, +) times = range(0, stop=size(data, 1) / 250, length=size(data, 1)); -# let's fit an Unfold Model for each subject +# ## Fit a model to each subject +# We now have data, but no multiple comparison problem yet - we have to fit one **analysis** regression model to each time-point of each subject. Thereby, we perform 113 tests and would (without multiple testing correction) expect 5-6 samples to be significant, even if there is no true effect (but there is one ;)). # !!! note # In principle, we do not need Unfold here - we could simply calculate (subjectwise) means of the conditions, and their time-resolved difference. Using Unfold.jl here simply generalizes it to more complex design, e.g. with continuous predictors etc. -models = map((d, ev) -> (fit(UnfoldModel, @formula(0 ~ 1 + condition), DataFrame(ev), d, times), ev.subject[1]), +models = map( + (d, ev) -> ( + fit(UnfoldModel, @formula(0 ~ 1 + condition), DataFrame(ev), d, times), + ev.subject[1], + ), eachslice(data; dims=3), - groupby(events, :subject)) + groupby(events, :subject), +); # now we can inspect the data easily, and extract the face-effect @@ -57,17 +75,31 @@ function add_subject!(df, s) df[!, :subject] .= s return df end -allEffects = map((x) -> (effects(Dict(:condition => ["car", "face"]), x[1]), x[2]) |> (x) -> add_subject!(x[1], x[2]), models) |> e -> reduce(vcat, e) +allEffects = + map( + (x) -> + (effects(Dict(:condition => ["car", "face"]), x[1]), x[2]) |> + (x) -> add_subject!(x[1], x[2]), + models, + ) |> e -> reduce(vcat, e) plot_erp(allEffects; mapping=(color=:condition, group=:subject)) - -# extract the face-coefficient from the linear model -allCoefs = map(m -> (coeftable(m[1]), m[2]) |> (x) -> add_subject!(x[1], x[2]), models) |> e -> reduce(vcat, e) +# Every line is from one subject, the color indicates our two conditions. +# +# It is easier to see potential differences if we would plot the difference: +# +# First we extract all coefficients in a nice, tidy DataFrame +allCoefs = + map(m -> (coeftable(m[1]), m[2]) |> + (x) -> add_subject!(x[1], x[2]), models) |> + e -> reduce(vcat, e) plot_erp(allCoefs; mapping=(group=:subject, col=:coefname)) - -# let's unstack the tidy-coef table into a matrix and put it to clusterdepth for clusterpermutation testing +# This plot now shows the intercept (in our contrast-case the `condition:"car"` ERP), and the difference curve. +# +# Next we unstack the tidy-coef table into a matrix and put it to clusterdepth for clusterpermutation testing faceCoefs = allCoefs |> x -> subset(x, :coefname => x -> x .== "condition: face") -erpMatrix = unstack(faceCoefs, :subject, :time, :estimate) |> x -> Matrix(x[:, 2:end])' |> collect +erpMatrix = + unstack(faceCoefs, :subject, :time, :estimate) |> x -> Matrix(x[:, 2:end])' |> collect summary(erpMatrix) # ## Clusterdepth @@ -78,15 +110,19 @@ pvals = clusterdepth(erpMatrix; τ=quantile(TDist(n_subjects - 1), 0.95), nperm= # Some plotting, and we add the identified cluster # first calculate the ERP -faceERP = groupby(faceCoefs, [:time, :coefname]) |> - x -> combine(x, :estimate => mean => :estimate, - :estimate => std => :stderror); +faceERP = + groupby(faceCoefs, [:time, :coefname]) |> + x -> combine(x, :estimate => mean => :estimate, :estimate => (x -> std(x) / sqrt(length(x))) => :stderror); # put the pvalues into a nicer format -pvalDF = ClusterDepth.cluster(pvals .<= 0.05) |> x -> DataFrame(:from => x[1] ./ 250, :to => (x[1] .+ x[2]) ./ 250, :coefname => "condition: face") -plot_erp(faceERP; stderror=true, pvalue=pvalDF) - -# Looks good to me! We identified the cluster :-) - -# old unused code to use extra=(;pvalue=pvalDF) in the plotting function, but didnt work. - +pvalDF = + ClusterDepth.cluster(pvals .<= 0.05) |> + x -> DataFrame( + :from => x[1] ./ 250, + :to => (x[1] .+ x[2]) ./ 250, + :coefname => "condition: face", + ) +plot_erp(faceERP; stderror=true, significance=pvalDF) +hlines!([0]) +current_figure() +# Looks good! We identified the cluster :-) \ No newline at end of file diff --git a/src/ClusterDepth.jl b/src/ClusterDepth.jl index 8418123..f56dd1d 100755 --- a/src/ClusterDepth.jl +++ b/src/ClusterDepth.jl @@ -1,7 +1,7 @@ module ClusterDepth using Random -using Images +#using ImageMorphology using SparseArrays #using ExtendableSparse using StatsBase diff --git a/src/cluster.jl b/src/cluster.jl index 006ca1f..10a335f 100755 --- a/src/cluster.jl +++ b/src/cluster.jl @@ -1,5 +1,6 @@ """ +using Base: Stateful clusterdepth(rng,data::AbstractArray;τ=2.3, statfun=x->abs.(studentt(x)),permfun=sign_permute!,nperm=5000,pval_type=:troendle) calculate clusterdepth of given datamatrix. @@ -24,14 +25,14 @@ clusterdepth(data::AbstractArray, args...; kwargs...) = function clusterdepth( rng, data::AbstractArray; - τ = 2.3, - stat_type = :onesample_ttest, - perm_type = :sign, - side_type = :abs, - nperm = 5000, - pval_type = :troendle, - (statfun!) = nothing, - statfun = nothing, + τ=2.3, + stat_type=:onesample_ttest, + perm_type=:sign, + side_type=:abs, + nperm=5000, + pval_type=:troendle, + (statfun!)=nothing, + statfun=nothing, ) if stat_type == :onesample_ttest statfun! = studentt! @@ -57,13 +58,13 @@ function clusterdepth( data, permfun, τ; - nₚ = nperm, - (statfun!) = statfun!, - statfun = statfun, - sidefun = sidefun, + nₚ=nperm, + (statfun!)=statfun!, + statfun=statfun, + sidefun=sidefun, ) - return pvals(statfun(data), cdmTuple, τ; type = pval_type) + return pvals(statfun(data), cdmTuple, τ; type=pval_type) end @@ -74,10 +75,10 @@ function perm_clusterdepths_both( data, permfun, τ; - statfun = nothing, - (statfun!) = nothing, - nₚ = 1000, - sidefun = nothing, + statfun=nothing, + (statfun!)=nothing, + nₚ=1000, + sidefun=nothing, ) @assert !(isnothing(statfun) && isnothing(statfun!)) "either statfun or statfun! has to be defined" @@ -186,7 +187,7 @@ function calc_clusterdepth(d0, τ) end maxL = 1 + maximum(len) # go up only to max-depth - + # @debug maxL valCol_head = Vector{Float64}(undef, maxL) valCol_tail = Vector{Float64}(undef, maxL) @@ -217,25 +218,38 @@ if the first and last cluster start on the first/last sample, we dont know their Input is assumed to be a thresholded Array with only 0/1 """ -function cluster(data) - label = label_components(data) - K = maximum(label) - start = fill(0, K) - stop = fill(0, K) - for k = 1:K - #length[k] = sum(label.==k) - start[k] = findfirst(==(k), label) - stop[k] = findlast(==(k), label) +function cluster(data::BitVector) + + start = [] + stop = [] + + state = false + for i in eachindex(data) + if data[i] == 1 + if !state + append!(start, i) + state = true + end + else + if state + append!(stop, i - 1) + state = false + end + end end - len = stop .- start + + + + # if the first and last cluster start on the first/last sample, we dont know their real depth - if length(start) > 0 && start[end] + len[end] == length(data) + if length(start) == length(stop) + 1 start = start[1:end-1] - len = len[1:end-1] end + len = stop .- start + if length(start) > 0 && start[1] == 1 start = start[2:end] len = len[2:end] diff --git a/test/cluster.jl b/test/cluster.jl index ec291fa..3dae67d 100755 --- a/test/cluster.jl +++ b/test/cluster.jl @@ -2,12 +2,20 @@ s, l = ClusterDepth.cluster( [4.0, 0.0, 10.0, 0.0, 3.0, 4.0, 0, 4.0, 4.0, 0.0, 0.0, 5.0] .> 0.9, ) + @test s == [3, 5, 8] @test l == [0, 1, 1] s, l = ClusterDepth.cluster([0.0, 0.0, 0.0, 0.0] .> 0.9) @test s == [] @test l == [] + + + s, l = ClusterDepth.cluster( + [4.0, 0.0, 10.0, 0.0, 3.0, 4.0, 0, 4.0, 4.0] .> 0.9, + ) + @test s == [3, 5] + @test l == [0, 1] end @testset "Tests for 2D data" begin @@ -17,5 +25,5 @@ end @testset "Tests for 3D data" begin data = randn(StableRNG(1), 3, 20, 5) - @show ClusterDepth.clusterdepth(data; τ = 0.4, nperm = 5) + @show ClusterDepth.clusterdepth(data; τ=0.4, nperm=5) end diff --git a/test/pvals.jl b/test/pvals.jl index 65c897e..ad7655d 100755 --- a/test/pvals.jl +++ b/test/pvals.jl @@ -22,7 +22,7 @@ [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0], cdm, 0.1, - type = :naive, + type=:naive, ) @test p[7] ≈ 1 / 1001 @@ -55,6 +55,8 @@ p = ClusterDepth.pvals([0.0, 1.0, 2.0, 3.0, 4.0, 5.1, 2.0, 0], (cdm, cdm), 0.9) @test all((p .> 0.05) .== [1, 1, 1, 0, 0, 0, 1, 1]) + start, len = ClusterDepth.cluster([0.0, 1.0, 2.0, 3.0, 4.0, 5.1, 2.0, 0] .> 0.9) # get observed clusters + p = ClusterDepth.pvals([0.0, 1.0, 2.0, 3.0, 4.0, 2.0, 1.0, 0.0, 6], (cdm, cdm), 0.9) @test all((p .> 0.05) .== [1, 1, 1, 0, 0, 1, 1, 1, 1])