Skip to content

Commit b17940d

Browse files
Stuff for nexus
1 parent 58b876b commit b17940d

File tree

5 files changed

+307
-70
lines changed

5 files changed

+307
-70
lines changed

python/tests/test_fasta.py

+44-2
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,49 @@ def test_unwrapped(self):
316316

317317
def test_missing_data(self):
318318
ts = missing_data_example()
319-
ref = "-" * int(ts.sequence_length)
320-
text = ts.as_fasta(reference_sequence=ref, wrap_width=0)
319+
ref = "A" * int(ts.sequence_length)
320+
text = ts.as_fasta(reference_sequence=ref)
321321
alignment_map = self.parse(text)
322322
assert get_alignment_map(ts, ref) == alignment_map
323+
324+
325+
class TestDendropyMissingData:
326+
"""
327+
Test that we detect missing data correctly in dendropy under
328+
various combinations of options.
329+
"""
330+
331+
# 2.00┊ 4 ┊
332+
# ┊ ┏━┻┓ ┊
333+
# 1.00┊ ┃ 3 ┊
334+
# ┊ ┃ ┏┻┓ ┊
335+
# 0.00┊ 0 1 2 5 ┊
336+
# 0 10
337+
# | |
338+
# pos 2 9
339+
# anc A T
340+
341+
def ts(self):
342+
ts = tskit.Tree.generate_balanced(3, span=10).tree_sequence
343+
tables = ts.dump_tables()
344+
tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
345+
tables.sites.add_row(2, ancestral_state="A")
346+
tables.sites.add_row(9, ancestral_state="T")
347+
tables.mutations.add_row(site=0, node=0, derived_state="G")
348+
tables.mutations.add_row(site=1, node=3, derived_state="C")
349+
return tables.tree_sequence()
350+
351+
def test_defaults_A_ref(self):
352+
ts = self.ts()
353+
ref = "A" * int(ts.sequence_length)
354+
text = ts.as_fasta(reference_sequence=ref, missing_data_character="N")
355+
# print(text)
356+
d = dendropy.DnaCharacterMatrix.get(data=text, schema="fasta")
357+
assert d.sequence_size == 10
358+
# for k, v in d.items():
359+
# print(k, v)
360+
# FINISH ME
361+
# a0 = d["n0"]
362+
# a5 = d["n5"]
363+
# print(a0.character_type_at(0))
364+
# print(a5)

python/tests/test_genotypes.py

+104
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,31 @@ def test_fasta_reference_sequence(self):
11581158
)
11591159
assert expected == self.ts().as_fasta(reference_sequence=ref)
11601160

1161+
def test_nexus_reference_sequence(self):
1162+
ref = "0123456789"
1163+
expected = textwrap.dedent(
1164+
"""\
1165+
#NEXUS
1166+
BEGIN TAXA;
1167+
DIMENSIONS NTAX=3;
1168+
TAXLABELS n0 n1 n2;
1169+
END;
1170+
BEGIN DATA;
1171+
DIMENSIONS NCHAR=10;
1172+
FORMAT datatype=dna missing=-;
1173+
MATRIX
1174+
n0 01G345678T
1175+
n1 01A345678C
1176+
n2 01A345678C
1177+
;
1178+
END;
1179+
BEGIN TREES;
1180+
TREE t0^10 = [&R] (n0:2,(n1:1,n2:1):1);
1181+
END;
1182+
"""
1183+
)
1184+
assert expected == self.ts().as_nexus(reference_sequence=ref)
1185+
11611186

11621187
class TestMissingDataExample:
11631188
# 2.00┊ 4 ┊
@@ -1303,6 +1328,85 @@ def test_fasta_impute_missing(self):
13031328
reference_sequence=ref, isolated_as_missing=False
13041329
)
13051330

1331+
# Note: the nexus tree output isn't compatible with our representation of
1332+
# missing data as trees with isolated roots (newick parsers won't accept
1333+
# this as valid input), so we set include_trees=False for thes examples.
1334+
def test_nexus_reference_sequence(self):
1335+
ref = "0123456789"
1336+
expected = textwrap.dedent(
1337+
"""\
1338+
#NEXUS
1339+
BEGIN TAXA;
1340+
DIMENSIONS NTAX=4;
1341+
TAXLABELS n0 n1 n2 n5;
1342+
END;
1343+
BEGIN DATA;
1344+
DIMENSIONS NCHAR=10;
1345+
FORMAT datatype=dna missing=-;
1346+
MATRIX
1347+
n0 01G345678T
1348+
n1 01A345678C
1349+
n2 01A345678C
1350+
n5 01-345678-
1351+
END;
1352+
"""
1353+
)
1354+
assert expected == self.ts().as_nexus(
1355+
reference_sequence=ref, include_trees=False
1356+
)
1357+
1358+
def test_nexus_reference_sequence_missing_data_char(self):
1359+
ref = "0123456789"
1360+
expected = textwrap.dedent(
1361+
"""\
1362+
#NEXUS
1363+
BEGIN TAXA;
1364+
DIMENSIONS NTAX=4;
1365+
TAXLABELS n0 n1 n2 n5;
1366+
END;
1367+
BEGIN DATA;
1368+
DIMENSIONS NCHAR=10;
1369+
FORMAT datatype=dna missing=Q;
1370+
MATRIX
1371+
n0 01G345678T
1372+
n1 01A345678C
1373+
n2 01A345678C
1374+
n5 01Q345678Q
1375+
END;
1376+
"""
1377+
)
1378+
assert expected == self.ts().as_nexus(
1379+
reference_sequence=ref,
1380+
missing_data_character="Q",
1381+
include_trees=False,
1382+
)
1383+
1384+
def test_nexus_impute_missing(self):
1385+
ref = "0123456789"
1386+
expected = textwrap.dedent(
1387+
"""\
1388+
#NEXUS
1389+
BEGIN TAXA;
1390+
DIMENSIONS NTAX=4;
1391+
TAXLABELS n0 n1 n2 n5;
1392+
END;
1393+
BEGIN DATA;
1394+
DIMENSIONS NCHAR=10;
1395+
FORMAT datatype=dna missing=-;
1396+
MATRIX
1397+
n0 01G345678T
1398+
n1 01A345678C
1399+
n2 01A345678C
1400+
n5 01A345678T
1401+
END;
1402+
"""
1403+
)
1404+
assert expected == self.ts().as_nexus(
1405+
reference_sequence=ref,
1406+
isolated_as_missing=False,
1407+
include_trees=False,
1408+
)
1409+
13061410

13071411
class TestAlignmentsErrors:
13081412
@tests.cached_example

0 commit comments

Comments
 (0)