From 6345f44541e65f8c09af65af4c9cdf4fcbdd0f05 Mon Sep 17 00:00:00 2001 From: PharmCat <13901158+PharmCat@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:26:44 +0300 Subject: [PATCH] Add Examples in doc, change Project.toml, add tast and #43 (#43) * Update Project.toml * Update CI.yml * doc example * remove strict --- .github/workflows/CI.yml | 4 +- Project.toml | 33 +++-- docs/Project.toml | 20 +-- docs/make.jl | 8 +- docs/src/lib/examples.md | 300 +++++++++++++++++++++++++++++++++++++++ src/categorical.jl | 2 +- src/design.jl | 52 +++---- src/kl_exchange.jl | 13 +- test/kl_exchange.jl | 2 +- test/paley.jl | 50 +++++++ test/runtests.jl | 2 +- 11 files changed, 421 insertions(+), 65 deletions(-) create mode 100644 docs/src/lib/examples.md create mode 100644 test/paley.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 23548fd..0a2c358 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: version: - - '1.6.2' + - '1.6.7' - '1' # automatically expands to the latest stable 1.x release of Julia - 'nightly' os: @@ -52,7 +52,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 with: - version: '1.6.2' + version: '1.6.7' - uses: julia-actions/julia-buildpkg@v1 - run: | julia --project=docs -e ' diff --git a/Project.toml b/Project.toml index 0293ae9..9e95d1a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,33 +1,36 @@ name = "ExperimentalDesign" uuid = "4babbea4-9e7d-11e9-116f-e1ada04bd296" authors = ["Pedro Bruel "] -version = "0.4.0" +version = "0.4.1" [deps] Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" LatinHypercubeSampling = "a5e1c1ea-c99a-51d3-a14d-a9a37257b02d" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -Combinatorics = "1.0" -DataFrames = "~1.2.0" -Distributions = "~0.25.11" -DocStringExtensions = "~0.8.5" -Documenter = "~0.27" -GLM = "~1.5" +Combinatorics = "1" +DataFrames = "1.2, 1.3, 1.4" +Distributions = "0.25" +DocStringExtensions = "0.8, 0.9" LatinHypercubeSampling = "1.8" -Primes = "~0.5" -StatsBase = "~0.33.8" -StatsModels = "~0.6.23" -julia = "~1.6" +Primes = "0.5" +StatsModels = "0.6" +julia = "1.8" + +[extras] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["DataFrames", "StatsBase", "Test", "GLM", "Documenter"] diff --git a/docs/Project.toml b/docs/Project.toml index 65400da..11d2f3e 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -11,14 +11,16 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" [compat] -DataFrames = "~1.2.0" -Distributions = "~0.25.11" -DocStringExtensions = "~0.8.5" -Documenter = "~0.27" -GLM = "~1.5" -Primes = "~0.5" -StatsBase = "~0.33.8" -StatsModels = "~0.6.23" -julia = "~1.6" +DataFrames = "1" +Distributions = "0.25" +DocStringExtensions = "0.8" +Documenter = "0.27" +GLM = "1.5, 1.6, 1.7" +Primes = "0.5" +StatsBase = "0.33" +StatsModels = "0.6" +StatsPlots = "0.14, 0.15" +julia = "1.6" diff --git a/docs/make.jl b/docs/make.jl index f66109b..ee3239b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Documenter, ExperimentalDesign +using Documenter, ExperimentalDesign, StatsModels, GLM, DataFrames, Distributions, Random, StatsPlots random_seed = 443591 @@ -13,8 +13,7 @@ end DocMeta.setdocmeta!(ExperimentalDesign, :DocTestSetup, - :(using ExperimentalDesign, Distributions, - Random, StatsModels, DataFrames; + :(using ExperimentalDesign, StatsModels, GLM, DataFrames, Distributions, Random, StatsPlots; Random.seed!($random_seed);); recursive = true) @@ -28,10 +27,11 @@ makedocs( sitename = "ExperimentalDesign.jl", authors = "Pedro Bruel and contributors.", linkcheck = !("skiplinks" in ARGS), - strict = !("strict=false" in ARGS), + #strict = !("strict=false" in ARGS), doctest = ("doctest=only" in ARGS) ? :only : true, pages = [ "Home" => "index.md", + "Examples" => "lib/examples.md", "Library" => Any[ "Public" => "lib/public.md", "Internals" => "lib/internals.md" diff --git a/docs/src/lib/examples.md b/docs/src/lib/examples.md new file mode 100644 index 0000000..4590954 --- /dev/null +++ b/docs/src/lib/examples.md @@ -0,0 +1,300 @@ +### Optimal Design with KL-Exchange + +#### Generating Random Designs (Uniform) + +```@example edexample +using ExperimentalDesign, StatsModels, GLM, DataFrames, Distributions, Random, StatsPlots + +design_distribution = DesignDistribution((size = Uniform(23, 32), weight = Uniform(0, 100))) +``` + +```@example edexample + +rand(design_distribution, 3) +``` + +```@example edexample + +design = rand(design_distribution, 400) + +p = @df design.matrix scatter(:size, + :weight, + size = (600, 600), + xlabel = "size", + ylabel = "weight", + xlim = [23.0, 32.0], + ylim = [0.0, 100.0], + legend = false, + title = "Uniformly Sampled Design") + +png(p, "plot1.png") +nothing +``` + +![](plot1.png) + +#### Generating Experiments for a Linear Hypothesis + +```@example edexample +design = rand(design_distribution, 400); + +f = @formula 0 ~ size + weight + +optimal_design = OptimalDesign(design, f, 10) + +p = @df optimal_design.matrix scatter(:size, + :weight, + size = (600, 600), + xlabel = "size", + ylabel = "weight", + xlim = [23.0, 32.0], + ylim = [0.0, 100.0], + legend = false, + title = "Design for y = size + weight") +png(p, "plot2.png") +nothing +``` + +![](plot2.png) + +#### Generating Experiments for Other Terms + + +```@example edexample + + +design = rand(design_distribution, 400); +f = @formula 0 ~ size + weight + size ^ 2 + (1 / weight) + +optimal_design = OptimalDesign(design, f, 20) + +p = @df optimal_design.matrix scatter(:size, + :weight, + size = (600, 600), + xlabel = "size", + ylabel = "weight", + xlim = [23.0, 32.0], + ylim = [0.0, 100.0], + legend = false, + title = "Design for y = size + weight + (size ^ 2) + (1 / weight)") +png(p, "plot3.png") +nothing +``` + +![](plot3.png) + +```@example edexample + +design = rand(design_distribution, 800); +f = @formula 0 ~ size + weight + size ^ 2 +optimal_design = OptimalDesign(design, f, 10) + +p = @df optimal_design.matrix scatter(:size, + :weight, + size = (600, 600), + xlabel = "size", + ylabel = "weight", + legend = false, + title = "Design for y = size + weight + (size ^ 2)") + +png(p, "plot4.png") +nothing +``` + +![](plot4.png) + +#### Designs with Categorical Factors + +```@example edexample +design_distribution = DesignDistribution((f1 = DiscreteUniform(0, 5), + f2 = CategoricalFactor(["cf", "cg", "ca"]))) + +``` + +```@example edexample +design = rand(design_distribution, 300); +f = @formula 0 ~ f1 + f1 ^ 2 + f2 + +optimal_design = OptimalDesign(design, f, 10) +``` + +```@example edexample +p = @df optimal_design.matrix scatter(:f1, + :f2, + size = (600, 600), + xlabel = "f1", + ylabel = "f2", + legend = false, + title = "Optimal Design for y = f1 + f2") +png(p, "plot5.png") +nothing +``` + +![](plot5.png) + + +### Screening with Plackett-Burman Designs + +#### Generating Plackett-Burman Designs + +A Plackett-Burman design is an orthogonal design matrix for factors $f_1,\dots,f_N$. Factors are encoded by high and low values, which can be mapped to the interval $[-1, 1]$. For designs in this package, the design matrix is a `DataFrame` from the [DataFrame package](https://juliastats.org/GLM.jl/stable/). For example, let's create a Plackett-Burman design for 6 factors: + +```@example edexample +design = PlackettBurman(6) +design.matrix +``` + +Note that it is not possible to construct exact Plackett-Burman designs for all numbers of factors. In the example above, we needed a seventh extra "dummy" column to construct the design for six factors. + +Using the `PlackettBurman` constructor enables quick construction of minimal screening designs for scenarios where we ignore interactions. We can access the underlying formula, which is a `Term` object from the [StatsModels package](https://juliastats.org/StatsModels.jl/stable/): + + +```@example edexample +println(design.formula) +``` + +Notice we ignore interactions and include the dummy factor in the model. Strong main effects attributed to dummy factors may indicate important interactions. + +We can obtain a tuple with the names of dummy factors: + + +```@example edexample +design.dummy_factors +``` + +We can also get the main factors tuple: + +```@example edexample +design.factors +``` + +You can check other constructors on [the docs](https://phrb.github.io/ExperimentalDesign.jl/dev/lib/public/#ExperimentalDesign.PlackettBurman-Tuple{Int64}). + +#### Computing Main Effects + +Suppose that the response variable on the experiments specified in our screening design is computed by: + +$$ +y = 1.2 + (2.3f_1) + (-3.4f_2) + (7.12f_3) + (-0.03f_4) + (1.1f_5) + (-0.5f_6) + \varepsilon +$$ + +The coefficients we want to estimate are: + +| Intercept | factor1 | factor2 | factor3 | factor4 | factor5 | factor6 | +|---|---|---|---|---|---|---| +| 1.2 | 2.3 | -3.4 | 7.12 | -0.03 | 1.1 | -0.5 | + +The corresponding Julia function is: + +```@example edexample +function y(x) + return (1.2) + + (2.3 * x[1]) + + (-3.4 * x[2]) + + (7.12 * x[3]) + + (-0.03 * x[4]) + + (1.1 * x[5]) + + (-0.5 * x[6]) + + (1.1 * randn()) +end +``` + +We can compute the response column for our design using the cell below. Recall that the default is to call the response column `:response`. We are going to set the seeds each time we run `y(x)`, so we analyse same results. Play with different seeds to observe variability of estimates. + +```@example edexample +Random.seed!(192938) + +design.matrix[!, :response] = y.(eachrow(design.matrix[:, collect(design.factors)])) +design.matrix +``` + +Now, we use the `lm` function from the [GLM package](https://juliastats.org/GLM.jl/stable/) to fit a linear model using the design's matrix and formula: + +```@example edexample +lm(term(:response) ~ design.formula.rhs, design.matrix) +``` + +The table below shows the coefficients estimated by the linear model fit using the Plackett-Burman Design. The purpose of a screening design is not to estimate the actual coefficients, but instead to compute factor main effects. Note that standard errors are the same for every factor estimate. This happens because the design is orthogonal. + +| | Intercept | factor1 | factor2 | factor3 | factor4 | factor5 | factor6 | dummy1 | +|---|---|---|---|---|---|---|---|---| +| Original | 1.2 | 2.3 | -3.4 | 7.12 | -0.03 | 1.1 | -0.5 | $-$ | +| Plackett-Burman Main Effects | $-$ | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | -0.437176 | + +We can use the coefficient magnitudes to infer that factor 3 probably has a strong main effect, and that factor 6 has not. Our dummy column had a relatively small coefficient estimate, so we could attempt to ignore interactions on subsequent experiments. + +#### Fitting a Linear Model + +We can also try to fit a linear model on our design data in order to estimate coefficients. We would need to drop the dummy column and add the intercept term: + +```@example edexample +lm(term(:response) ~ sum(term.(design.factors)), design.matrix) +``` + +Our table so far looks like this: + +| | Intercept | factor1 | factor2 | factor3 | factor4 | factor5 | factor6 | dummy1 | +|---|---|---|---|---|---|---|---|---| +| Original | 1.2 | 2.3 | -3.4 | 7.12 | -0.03 | 1.1 | -0.5 | $-$ | +| Plackett-Burman Main Effects | $-$ | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | -0.437176 | +| Plackett-Burman Estimate | 0.940183 | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | $-$ | + +Notice that, since the standard errors are the same for all factors, factors with stronger main effects are better estimated. Notice that, despite the "good" coefficient estimates, the confidence intervals are really large. + +This is a biased comparison where the screening design "works" for coefficient estimation as well, but we would rather use fractional factorial or optimal designs to estimate the coefficients of factors with strong effects. Screening should be used to compute main effects and identifying which factors to test next. + +#### Generating Random Designs + +We can also compare the coefficients produced by the same linear model fit, but using a random design. For more information, check [the docs](https://phrb.github.io/ExperimentalDesign.jl/dev/lib/public/#ExperimentalDesign.RandomDesign-Tuple{NamedTuple}). + +```@example edexample +Random.seed!(8418172) + +design_distribution = DesignDistribution(DiscreteNonParametric([-1, 1], [0.5, 0.5]), 6) +random_design = rand(design_distribution, 8) + +random_design.matrix[!, :response] = y.(eachrow(random_design.matrix[:, :])) +random_design.matrix +``` + +```@example edexample +lm(term(:response) ~ random_design.formula.rhs, random_design.matrix) +``` + +Now, our table looks like this: + +| | Intercept | factor1 | factor2 | factor3 | factor4 | factor5 | factor6 | dummy1 | +|---|---|---|---|---|---|---|---|---| +| Original | 1.2 | 2.3 | -3.4 | 7.12 | -0.03 | 1.1 | -0.5 | $-$ | +| Plackett-Burman Main Effects | $-$ | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | -0.437176 | +| Plackett-Burman Estimate | 0.940183 | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | $-$ | +| Single Random Design Estimate | 0.761531 | 1.67467 | -3.05027 | 7.72484 | 0.141204 | 1.71071 | -0.558869 | $-$ | + +The estimates produced using random designs will have larger confidence intervals, and therefore increased variability. The Plackett-Burman design is fixed, but can be randomised. The variability of main effects estimates using screening designs will depend on measurement or model error. + +#### Generating Full Factorial Designs + +In this toy example, it is possible to generate all the possible combinations of six binary factors and compute the response. Although it costs 64 experiments, the linear model fit for the full factorial design should produce the best coefficient estimates. + +The simplest full factorial design constructor receives an array of possible factor levels. For more, check [the docs](https://phrb.github.io/ExperimentalDesign.jl/dev/lib/public/#ExperimentalDesign.FullFactorial-Tuple{NamedTuple,StatsModels.FormulaTerm}). + +```@example edexample +Random.seed!(2989476) + +factorial_design = FullFactorial(fill([-1, 1], 6)) +factorial_design.matrix[!, :response] = y.(eachrow(factorial_design.matrix[:, :])) + +lm(term(:response) ~ factorial_design.formula.rhs, factorial_design.matrix) +``` + +The confidence intervals for this fit are much smaller. Since we have all information on all factors and this is a balanced design, the standard error is the same for all estimates. Here's the complete table: + +| | Intercept | factor1 | factor2 | factor3 | factor4 | factor5 | factor6 | dummy1 | +|---|---|---|---|---|---|---|---|---| +| Original | 1.2 | 2.3 | -3.4 | 7.12 | -0.03 | 1.1 | -0.5 | $-$ | +| Plackett-Burman Main Effects | $-$ | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | -0.437176 | +| Plackett-Burman Estimate | 0.940183 | 2.66359 | -2.80249 | 7.71644 | -0.0497774 | 0.612681 | -0.112675 | $-$ | +| Single Random Design Estimate | 0.600392 | 2.2371 | -2.56857 | 8.05743 | 0.140622 | 0.907918 | -0.600354 | $-$ | +| Full Factorial Estimate | 1.13095 | 2.23668 | -3.4775 | 6.95531 | -0.160546 | 0.975471 | -0.357748 | $-$ | + +Full factorial designs may be too expensive in actual applications. Fractional factorial designs or optimal designs can be used to decrease costs while still providing good estimates. Screening designs are extremely cheap, and can help determine which factors can potentially be dropped on more expensive and precise designs. diff --git a/src/categorical.jl b/src/categorical.jl index 03e7673..13ace1a 100644 --- a/src/categorical.jl +++ b/src/categorical.jl @@ -18,7 +18,7 @@ $(TYPEDSIGNATURES) julia> a = CategoricalFactor([:a, :b, 2, 1.0]) CategoricalFactor( values: Any[:a, :b, 2, 1.0] -distribution: Distributions.DiscreteUniform(a=1, b=4) +distribution: DiscreteUniform(a=1, b=4) ) ``` diff --git a/src/design.jl b/src/design.jl index e8b133c..941157a 100644 --- a/src/design.jl +++ b/src/design.jl @@ -153,8 +153,8 @@ Factors: (:factor1, :factor2, :factor3) Formula: y ~ -1 + factor1 + factor2 + factor3 + factor1 & factor1 + factor2 & factor2 + factor3 & factor3 + factor1 & factor2 + factor1 & factor3 + factor2 & factor3 Design Matrix: 15×3 DataFrame - Row │ factor1 factor2 factor3 - │ Float64 Float64 Float64 + Row │ factor1 factor2 factor3 + │ Float64 Float64 Float64 ─────┼─────────────────────────── 1 │ -1.0 -1.0 0.0 2 │ 1.0 -1.0 0.0 @@ -207,8 +207,8 @@ Factors: (:factor1, :factor2, :factor3) Formula: y ~ -1 + factor1 & factor1 + factor2 & factor2 + factor3 & factor3 + factor1 & factor2 + factor1 & factor3 + factor2 & factor3 + factor1 + factor2 + factor3 Design Matrix: 15×3 DataFrame - Row │ factor1 factor2 factor3 - │ Float64 Float64 Float64 + Row │ factor1 factor2 factor3 + │ Float64 Float64 Float64 ─────┼─────────────────────────── 1 │ -1.0 -1.0 0.0 2 │ 1.0 -1.0 0.0 @@ -535,8 +535,8 @@ Factors: (a = [-1, 1], b = [-1, 1], c = [-1, 1]) Formula: y ~ a + b + c + a & b + a & c + b & c + a & b & c Design Matrix: 8×7 DataFrame - Row │ a b c a_b a_c b_c a_b_c - │ Int64 Int64 Int64 Int64 Int64 Int64 Int64 + Row │ a b c a_b a_c b_c a_b_c + │ Int64 Int64 Int64 Int64 Int64 Int64 Int64 ─────┼───────────────────────────────────────────────── 1 │ -1 -1 -1 1 1 1 -1 2 │ 1 -1 -1 -1 -1 1 1 @@ -618,9 +618,9 @@ julia> DesignDistribution((f1 = Uniform(2, 3), f2 = DiscreteUniform(-1, 5), f3 = DesignDistribution Formula: 0 ~ f1 + f2 + f3 Factor Distributions: -f1: Distributions.Uniform{Float64}(a=2.0, b=3.0) -f2: Distributions.DiscreteUniform(a=-1, b=5) -f3: Distributions.Uniform{Float64}(a=5.0, b=10.0) +f1: Uniform{Float64}(a=2.0, b=3.0) +f2: DiscreteUniform(a=-1, b=5) +f3: Uniform{Float64}(a=5.0, b=10.0) ``` """ @@ -636,9 +636,9 @@ julia> DesignDistribution((Uniform(2, 3), DiscreteUniform(-1, 5), Uniform(5, 10) DesignDistribution Formula: 0 ~ factor1 + factor2 + factor3 Factor Distributions: -factor1: Distributions.Uniform{Float64}(a=2.0, b=3.0) -factor2: Distributions.DiscreteUniform(a=-1, b=5) -factor3: Distributions.Uniform{Float64}(a=5.0, b=10.0) +factor1: Uniform{Float64}(a=2.0, b=3.0) +factor2: DiscreteUniform(a=-1, b=5) +factor3: Uniform{Float64}(a=5.0, b=10.0) ``` """ @@ -654,9 +654,9 @@ julia> DesignDistribution([Uniform(2, 3), DiscreteUniform(-1, 5), Uniform(5, 10) DesignDistribution Formula: 0 ~ factor1 + factor2 + factor3 Factor Distributions: -factor1: Distributions.Uniform{Float64}(a=2.0, b=3.0) -factor2: Distributions.DiscreteUniform(a=-1, b=5) -factor3: Distributions.Uniform{Float64}(a=5.0, b=10.0) +factor1: Uniform{Float64}(a=2.0, b=3.0) +factor2: DiscreteUniform(a=-1, b=5) +factor3: Uniform{Float64}(a=5.0, b=10.0) ``` """ @@ -672,12 +672,12 @@ julia> DesignDistribution(DiscreteNonParametric([-1, 1], [0.5, 0.5]), 6) DesignDistribution Formula: 0 ~ factor1 + factor2 + factor3 + factor4 + factor5 + factor6 Factor Distributions: -factor1: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) -factor2: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) -factor3: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) -factor4: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) -factor5: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) -factor6: Distributions.DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor1: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor2: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor3: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor4: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor5: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) +factor6: DiscreteNonParametric{Int64, Float64, Vector{Int64}, Vector{Float64}}(support=[-1, 1], p=[0.5, 0.5]) ``` """ @@ -693,7 +693,7 @@ $(TYPEDSIGNATURES) julia> rand(DesignDistribution((f1 = Uniform(2, 3), f2 = DiscreteUniform(-1, 5), f3 = Uniform(5, 10))), 12) ExperimentalDesign.RandomDesign Dimension: (12, 3) -Factors: (f1 = Distributions.Uniform{Float64}(a=2.0, b=3.0), f2 = Distributions.DiscreteUniform(a=-1, b=5), f3 = Distributions.Uniform{Float64}(a=5.0, b=10.0)) +Factors: (f1 = Uniform{Float64}(a=2.0, b=3.0), f2 = DiscreteUniform(a=-1, b=5), f3 = Uniform{Float64}(a=5.0, b=10.0)) Formula: 0 ~ f1 + f2 + f3 Design Matrix: 12×3 DataFrame @@ -881,9 +881,9 @@ julia> design_distribution = DesignDistribution((f1 = Uniform(2, 3), f2 = Discre DesignDistribution Formula: 0 ~ f1 + f2 + f3 Factor Distributions: -f1: Distributions.Uniform{Float64}(a=2.0, b=3.0) -f2: Distributions.DiscreteUniform(a=-1, b=5) -f3: Distributions.Uniform{Float64}(a=5.0, b=10.0) +f1: Uniform{Float64}(a=2.0, b=3.0) +f2: DiscreteUniform(a=-1, b=5) +f3: Uniform{Float64}(a=5.0, b=10.0) julia> design = rand(design_distribution, 400); @@ -892,7 +892,7 @@ julia> f = @formula 0 ~ f1 + f2 + f3 + f2 ^ 2; julia> OptimalDesign(design, f, 10) OptimalDesign Dimension: (10, 3) -Factors: (f1 = Distributions.Uniform{Float64}(a=2.0, b=3.0), f2 = Distributions.DiscreteUniform(a=-1, b=5), f3 = Distributions.Uniform{Float64}(a=5.0, b=10.0)) +Factors: (f1 = Uniform{Float64}(a=2.0, b=3.0), f2 = DiscreteUniform(a=-1, b=5), f3 = Uniform{Float64}(a=5.0, b=10.0)) Formula: 0 ~ f1 + f2 + f3 + :(f2 ^ 2) Selected Candidate Rows: [244, 49, 375, 43, 369, 44, 16, 346, 175, 205] Optimality Criteria: Dict(:D => 2.3940431912232483) diff --git a/src/kl_exchange.jl b/src/kl_exchange.jl index 4d33903..2ea3195 100644 --- a/src/kl_exchange.jl +++ b/src/kl_exchange.jl @@ -3,10 +3,11 @@ using LinearAlgebra, StatsModels, ExperimentalDesign, Distributions, DataFrames """ $(TYPEDSIGNATURES) -```jldoctest -julia> 1 + 1 -2 -``` +Criterion of D-optimality, which seeks to minimize ``|(X^T · X) − I*tol| / N^{1/b}``, +or equivalently maximize the determinant of the information matrix ``X^T · X`` +of the design. This criterion results in maximizing the differential +Shannon information content of the parameter estimates. + """ function d_criterion(model_matrix; tolerance = 1e-9) @@ -70,7 +71,7 @@ julia> n_candidates = 3000; julia> n_factors = 30; -julia> design_generator = DesignDistribution(Distributions.Uniform(0, 1), n_factors); +julia> design_generator = DesignDistribution(Uniform(0, 1), n_factors); julia> candidates = rand(design_generator, n_candidates); @@ -214,7 +215,7 @@ function test() n = 50 println("Allocating memory") - design_generator = DesignDistribution(Distributions.Uniform(0, 1), n) + design_generator = DesignDistribution(Uniform(0, 1), n) @time candidates = rand(design_generator, c) println("Design created, calling exchange") diff --git a/test/kl_exchange.jl b/test/kl_exchange.jl index bca77c5..527f658 100644 --- a/test/kl_exchange.jl +++ b/test/kl_exchange.jl @@ -19,7 +19,7 @@ Random.seed!(random_seed) n_candidates = 3000 n_factors = 30; - design_generator = DesignDistribution(Distributions.Uniform(0, 1), n_factors) + design_generator = DesignDistribution(Uniform(0, 1), n_factors) candidates = rand(design_generator, n_candidates) selected_rows = kl_exchange(candidates.formula, candidates.matrix, diff --git a/test/paley.jl b/test/paley.jl new file mode 100644 index 0000000..0d7b3b9 --- /dev/null +++ b/test/paley.jl @@ -0,0 +1,50 @@ +@testset "paley" begin + M = paley(Matrix{Int}(undef, 8, 8)) + + Mt = [-1 1 1 -1 1 1 1 -1 + 1 1 -1 1 1 1 -1 -1 + 1 -1 1 1 1 -1 -1 1 +-1 1 1 1 -1 -1 1 1 + 1 1 1 -1 -1 1 1 -1 + 1 1 -1 -1 1 1 -1 1 + 1 -1 -1 1 1 -1 1 1 +-1 -1 1 1 -1 1 1 1] + + @test M == Mt + + M2 = plackettburman(4) + + Mt2 = [1 1 1 1 1 1 1 + -1 1 -1 1 1 -1 -1 + 1 -1 1 1 -1 -1 -1 + -1 1 1 -1 -1 -1 1 + 1 1 -1 -1 -1 1 -1 + 1 -1 -1 -1 1 -1 1 + -1 -1 -1 1 -1 1 1 + -1 -1 1 -1 1 1 -1] + + @test M2 == Mt2 +end + + +@testset "boxbehnken" begin + M = boxbehnken(3) + + Mt = [ -1.0 -1.0 0.0 + 1.0 -1.0 0.0 + -1.0 1.0 0.0 + 1.0 1.0 0.0 + -1.0 0.0 -1.0 + 1.0 0.0 -1.0 + -1.0 0.0 1.0 + 1.0 0.0 1.0 + 0.0 -1.0 -1.0 + 0.0 1.0 -1.0 + 0.0 -1.0 1.0 + 0.0 1.0 1.0 + 0.0 0.0 0.0 + 0.0 0.0 0.0 + 0.0 0.0 0.0] + @test M == Mt + +end diff --git a/test/runtests.jl b/test/runtests.jl index 8749ad9..b8399c5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,7 +6,7 @@ using Test, StatsModels # tests = ["kl_exchange.jl", "doctests.jl"] -tests = ["kl_exchange.jl"] +tests = ["kl_exchange.jl", "paley.jl"] @testset "ExperimentalDesign" begin for test in tests