Skip to content

Commit

Permalink
pedigrees and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hannesbecher committed Oct 3, 2024
1 parent cf0d463 commit 2c1b195
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 2 deletions.
30 changes: 28 additions & 2 deletions msprime/pedigrees.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,9 @@ def sim_pedigree_backward(
# Because we don't have overlapping generations we can add the ancestors
# to the pedigree once the previous generation has been produced
for time in range(0, end_time):
population = np.arange(N_function(time), dtype=np.int32)
# get current population size from function, add 1 to time to match behaviour of
# sim_pedigree_forward
population = np.arange(N_function(time + 1), dtype=np.int32)
parents = rng.choice(population, (len(ancestors), 2))
unique_parents = np.unique(parents)
parent_ids = np.searchsorted(unique_parents, parents).astype(np.int32)
Expand Down Expand Up @@ -440,7 +442,6 @@ def sim_pedigree_forward(
# To make the semantics compatible with dtwf, the end_time means the
# *end* of generation end_time
for time in reversed(range(end_time + 1)):
# N = p_size(time) # This could be derived from the Demography
N = N_function(time) # This could be derived from the Demography
# NB this is *with* replacement, so 1 / N chance of selfing
parents = rng.choice(population, (N, 2))
Expand All @@ -457,6 +458,31 @@ def sim_pedigree(
end_time=None,
direction="forward",
):
"""
Simulates a pedigree
Arguments:
population_size (int or function): The population size at each generation.
If a function, it should take a single argument, the generation number,
and return the population size at that generation.
num_samples (int): The number of individuals at generation 0
If None, the population size at generation 0 is used.
sequence_length (float): The length of the sequence to simulate.
If None, the sequence length is set to -1.
random_seed (int): The random seed to use for the simulation.
end_time (int): The number of generations to simulate.
direction (str): The direction of time to simulate. Either "forward" or
"backward".
Returns:
A TableCollection containing the simulated pedigree.
Examples:
>>> sim_pedigree(population_size=10, end_time=5, direction="forward")
>>> N_function = lambda t: [10,20,100,1000,2][t]
>>> sim_pedigree(population_size=N_function, end_time=4, direction="backward")
"""

# Internal utility for generating pedigree data. This function is not
# part of the public API and subject to arbitrary changes/removal
# in the future.
Expand Down
36 changes: 36 additions & 0 deletions tests/test_pedigree.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,42 @@ def test_valid_pedigree(self):
tc.individuals.append(row)
tc.tree_sequence() # creating tree sequence should succeed

@pytest.mark.parametrize("direction", ["backward", "forward"])
def test_equal_N_Nfunc(self, direction):
seed = np.random.randint(1e6)
p1 = pedigrees.sim_pedigree(
num_samples=10,
population_size=10,
end_time=20,
random_seed=seed,
direction=direction,
)
p2 = pedigrees.sim_pedigree(
num_samples=10,
population_size=lambda _: 10,
end_time=20,
random_seed=seed,
direction=direction,
)
assert p1 == p2

@pytest.mark.parametrize("direction", ["backward", "forward"])
def test_pop_collapse(self, direction):
seed = np.random.randint(1e6)
collapse_time = 10
ped = pedigrees.sim_pedigree(
num_samples=100,
population_size=lambda t: 100 if t < collapse_time else 1,
end_time=20,
random_seed=seed,
direction=direction,
)
times_old_nodes = ped.nodes.time[ped.nodes.time > collapse_time - 1]
_, node_counts = np.unique(times_old_nodes, return_counts=True)
# if we have a collapse down to one (diploid) individual, there should be only
# two nodes at each time point after the collapse
assert all(node_counts == 2)


def join_pedigrees(tables_list):
"""
Expand Down

0 comments on commit 2c1b195

Please # to comment.