Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add functionality to compare estimations #39

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions estimage/statops/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import numpy as np
import scipy as sp


# \int_{-\inf}^{\inf}l(x)\int_{x}^{\inf}g(t)\, \textup{d}t\, \textup{d}x
# where l is the function that we think should be lower, and g the greater
def _integrate(dom, one, two):
res = 0
for i, x in enumerate(dom):
toadd = two[i] * 0.5
toadd += sum(two[i + 1:])
toadd *= one[i]
res += toadd
return res


def _integrate_estimate_both_degenerate(one, two):
if one.expected == two.expected:
return 0.5
if one.expected < two.expected:
return 1
return 0


def _integrate_estimate_second_degenerate(one, two):
threshold = two.expected
rv = one._get_rv()
return rv.cdf(threshold)


def _integrate_estimate_first_degenerate(one, two):
threshold = one.expected
rv = two._get_rv()
return 1 - rv.cdf(threshold)


INTEGRATION_PRECISION = 0.001


# \int_{-\inf}^{\inf}l(x)\int_{x}^{\inf}g(t)\, \textup{d}t\, \textup{d}x is in fact
# \int_{-\inf}^{\inf}l(x)\, \textup{d}x - \int_{-\inf}^{\inf}l(x)G(x)\, \textup{d}x =
# = 1 - \int_{-\inf}^{\inf}l(x)G(x)\, \textup{d}x where
# G(x) is distribution function of the g randvar
def _integrate_estimate(one, two):
# unlike upper bound case, PDF and CDF are zero below their respective lower bound
effective_lower_bound = max(one.source.optimistic, two.source.optimistic)
safe_upper_bound = max(one.source.pessimistic, two.source.pessimistic)
lower_rv = one._get_rv()
upper_rv = two._get_rv()
def integrand(x): return lower_rv.pdf(x) * upper_rv.cdf(x)
result = sp.integrate.quad(integrand, effective_lower_bound, safe_upper_bound, epsabs=INTEGRATION_PRECISION)
return 1 - result[0]


def is_lower(dom, one, two):
dom = np.array(dom, dtype=float)
one = np.array(one, dtype=float)
one /= one.sum()
two = np.array(two, dtype=float)
two /= two.sum()
return _integrate(dom, one, two)


def estimate_is_lower(one, two):
if one.sigma == 0 and two.sigma == 0:
return _integrate_estimate_both_degenerate(one, two)
if two.sigma == 0:
return _integrate_estimate_second_degenerate(one, two)
if one.sigma == 0:
return _integrate_estimate_first_degenerate(one, two)
return _integrate_estimate(one, two)
76 changes: 76 additions & 0 deletions tests/test_statops_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import estimage.statops.compare as tm
import estimage.data as data

import pytest


def montecarlo_is_lower_test(one, two, samples=1000):
rvs_one, rvs_two = [est._get_rv().rvs(samples) for est in (one, two)]
return sum(rvs_one < rvs_two) / samples


def integ_approx(x):
return pytest.approx(x, abs=tm.INTEGRATION_PRECISION)


def test_compare_trivial():
assert tm.is_lower([0], [1], [1]) == 0.5
degenerate_estimate = data.Estimate(1, 0)
assert tm.estimate_is_lower(degenerate_estimate, degenerate_estimate) == 0.5


def test_compare_equal():
assert tm.is_lower([0, 1], [1, 1], [1, 1]) == 0.5
assert tm.is_lower([0, 1, 2], [1, 1, 1], [1, 1, 1]) == 0.5
assert tm.is_lower([0, 1, 2, 3], [1, 1, 1, 1], [1, 1, 1, 1]) == 0.5
assert tm.is_lower([0, 1, 2], [0, 1, 0], [1, 1, 1]) == 0.5
simple_estimate = data.Estimate.from_triple(1, 0, 2)
assert tm.estimate_is_lower(simple_estimate, simple_estimate) == 0.5
concrete_estimate = data.Estimate.from_triple(2, 1, 3)
assert tm.estimate_is_lower(concrete_estimate, concrete_estimate) == 0.5
assert montecarlo_is_lower_test(concrete_estimate, concrete_estimate) == pytest.approx(0.5, abs=0.05)


def test_compare_clear():
assert tm.is_lower([0, 1], [0, 1], [1, 0]) == 0
assert tm.is_lower([0, 1], [1, 0], [0, 1]) == 1
assert tm.estimate_is_lower(data.Estimate(1, 0), data.Estimate(0, 0)) == 0
assert tm.estimate_is_lower(data.Estimate(0, 0), data.Estimate(1, 0)) == 1
assert tm.is_lower([0, 1, 2, 3], [1, 1, 0, 0], [0, 0, 1, 1]) == 1
assert tm.is_lower([0, 1, 2, 3], [0, 0, 1, 1], [1, 1, 0, 0]) == 0
low_estimate = data.Estimate.from_triple(1, 0, 3)
high_estimate = data.Estimate.from_triple(4, 3, 6)
assert tm.estimate_is_lower(low_estimate, high_estimate) == integ_approx(1)
assert tm.estimate_is_lower(high_estimate, low_estimate) == integ_approx(0)


def test_compare_overlapping():
low_estimate = data.Estimate.from_triple(1, 0, 3)
test_estimate = data.Estimate(1, 0)
assert 0 < tm.estimate_is_lower(low_estimate, test_estimate) < 0.5
assert 0.5 < tm.estimate_is_lower(test_estimate, low_estimate) < 1
almost_low_estimate = data.Estimate.from_triple(1.1, 0.1, 3.1)
assert 0.5 < tm.estimate_is_lower(low_estimate, almost_low_estimate) < 0.6
low_end_estimate = data.Estimate.from_triple(0.5, 0, 1)
assert 0.1 < tm.estimate_is_lower(low_estimate, low_end_estimate) < 0.2
rate = tm.estimate_is_lower(low_end_estimate, low_estimate)
assert 0.8 < rate < 0.9
assert montecarlo_is_lower_test(low_end_estimate, low_estimate) == pytest.approx(rate, abs=0.05)


def test_compare_general():
one = data.Estimate.from_triple(2, 1, 3)
two = data.Estimate.from_triple(1.5, -2, 4)
rate = tm.estimate_is_lower(one, two)
assert montecarlo_is_lower_test(one, two, 4000) == pytest.approx(rate, abs=0.01)
one = data.Estimate.from_triple(2.5, 1, 3)
two = data.Estimate.from_triple(1.5, 1, 4)
rate = tm.estimate_is_lower(one, two)
assert montecarlo_is_lower_test(one, two) == pytest.approx(rate, abs=0.05)


def test_compare_weighted():
assert 0.99 < tm.is_lower([0, 1, 2, 3], [1, 1, 0, 0], [0, 0.01, 1, 1]) < 1

# TODO: Comparison of estimates
# TODO: %-complete
Loading