@@ -1169,7 +1169,7 @@ def test_nexus_reference_sequence(self):
1169
1169
END;
1170
1170
BEGIN DATA;
1171
1171
DIMENSIONS NCHAR=10;
1172
- FORMAT DATATYPE=DNA MISSING=? ;
1172
+ FORMAT DATATYPE=DNA;
1173
1173
MATRIX
1174
1174
n0 01G345678T
1175
1175
n1 01A345678C
@@ -1232,6 +1232,13 @@ def test_alignments_default(self):
1232
1232
assert A [2 ] == "NNANNNNNNC"
1233
1233
assert A [3 ] == "NNNNNNNNNN"
1234
1234
1235
+ def test_alignments_fails (self ):
1236
+ # https://github.com/tskit-dev/tskit/issues/1896
1237
+ ref = "N" * 10
1238
+ with pytest .raises (ValueError , match = "1896" ):
1239
+ next (self .ts ().alignments (reference_sequence = ref ))
1240
+
1241
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1235
1242
def test_alignments_impute_missing (self ):
1236
1243
ref = "N" * 10
1237
1244
A = list (
@@ -1256,16 +1263,18 @@ def test_alignments_missing_char_ref(self):
1256
1263
assert A [0 ] == "NNGNNNNNNT"
1257
1264
assert A [1 ] == "NNANNNNNNC"
1258
1265
assert A [2 ] == "NNANNNNNNC"
1259
- assert A [3 ] == "NNzNNNNNNz "
1266
+ assert A [3 ] == "zzzzzzzzzz "
1260
1267
1268
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1261
1269
def test_alignments_reference_sequence (self ):
1262
1270
ref = "0123456789"
1263
1271
A = list (self .ts ().alignments (reference_sequence = ref ))
1264
1272
assert A [0 ] == "01G345678T"
1265
1273
assert A [1 ] == "01A345678C"
1266
1274
assert A [2 ] == "01A345678C"
1267
- assert A [3 ] == "01N345678N "
1275
+ assert A [3 ] == "NNNNNNNNNN "
1268
1276
1277
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1269
1278
def test_alignments_reference_sequence_missing_data_char (self ):
1270
1279
ref = "0123456789"
1271
1280
A = list (
@@ -1274,8 +1283,9 @@ def test_alignments_reference_sequence_missing_data_char(self):
1274
1283
assert A [0 ] == "01G345678T"
1275
1284
assert A [1 ] == "01A345678C"
1276
1285
assert A [2 ] == "01A345678C"
1277
- assert A [3 ] == "01Q345678Q "
1286
+ assert A [3 ] == "QQQQQQQQQQ "
1278
1287
1288
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1279
1289
def test_fasta_reference_sequence (self ):
1280
1290
ref = "0123456789"
1281
1291
expected = textwrap .dedent (
@@ -1287,11 +1297,12 @@ def test_fasta_reference_sequence(self):
1287
1297
>n2
1288
1298
01A345678C
1289
1299
>n5
1290
- 01N345678N
1300
+ NNNNNNNNNN
1291
1301
"""
1292
1302
)
1293
1303
assert expected == self .ts ().as_fasta (reference_sequence = ref )
1294
1304
1305
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1295
1306
def test_fasta_reference_sequence_missing_data_char (self ):
1296
1307
ref = "0123456789"
1297
1308
expected = textwrap .dedent (
@@ -1303,13 +1314,14 @@ def test_fasta_reference_sequence_missing_data_char(self):
1303
1314
>n2
1304
1315
01A345678C
1305
1316
>n5
1306
- 01Q345678Q
1317
+ QQQQQQQQQQ
1307
1318
"""
1308
1319
)
1309
1320
assert expected == self .ts ().as_fasta (
1310
1321
reference_sequence = ref , missing_data_character = "Q"
1311
1322
)
1312
1323
1324
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1313
1325
def test_fasta_impute_missing (self ):
1314
1326
ref = "N" * 10
1315
1327
expected = textwrap .dedent (
@@ -1331,6 +1343,7 @@ def test_fasta_impute_missing(self):
1331
1343
# Note: the nexus tree output isn't compatible with our representation of
1332
1344
# missing data as trees with isolated roots (newick parsers won't accept
1333
1345
# this as valid input), so we set include_trees=False for these examples.
1346
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1334
1347
def test_nexus_reference_sequence (self ):
1335
1348
ref = "0123456789"
1336
1349
expected = textwrap .dedent (
@@ -1347,7 +1360,7 @@ def test_nexus_reference_sequence(self):
1347
1360
n0 01G345678T
1348
1361
n1 01A345678C
1349
1362
n2 01A345678C
1350
- n5 01?345678 ?
1363
+ n5 ????????? ?
1351
1364
;
1352
1365
END;
1353
1366
"""
@@ -1356,6 +1369,7 @@ def test_nexus_reference_sequence(self):
1356
1369
reference_sequence = ref , include_trees = False
1357
1370
)
1358
1371
1372
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1359
1373
def test_nexus_reference_sequence_missing_data_char (self ):
1360
1374
ref = "0123456789"
1361
1375
expected = textwrap .dedent (
@@ -1372,7 +1386,7 @@ def test_nexus_reference_sequence_missing_data_char(self):
1372
1386
n0 01G345678T
1373
1387
n1 01A345678C
1374
1388
n2 01A345678C
1375
- n5 01Q345678Q
1389
+ n5 QQQQQQQQQQ
1376
1390
;
1377
1391
END;
1378
1392
"""
@@ -1383,6 +1397,7 @@ def test_nexus_reference_sequence_missing_data_char(self):
1383
1397
include_trees = False ,
1384
1398
)
1385
1399
1400
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1386
1401
def test_nexus_impute_missing (self ):
1387
1402
ref = "0123456789"
1388
1403
expected = textwrap .dedent (
@@ -1441,6 +1456,7 @@ def test_non_ascii_references(self, ref):
1441
1456
with pytest .raises (UnicodeEncodeError ):
1442
1457
list (ts .alignments (reference_sequence = ref ))
1443
1458
1459
+ @pytest .mark .skip ("Missing data in alignments: #1896" )
1444
1460
@pytest .mark .parametrize ("missing_data_char" , ["À" , "┃" , "α" ])
1445
1461
def test_non_ascii_missing_data_char (self , missing_data_char ):
1446
1462
ts = self .simplest_ts ()
@@ -1470,14 +1486,18 @@ def test_defaults(self, ts):
1470
1486
@pytest .mark .parametrize ("ts" , get_example_discrete_genome_tree_sequences ())
1471
1487
def test_reference_sequence (self , ts ):
1472
1488
ref = tskit .random_nucleotides (ts .sequence_length , seed = 1234 )
1473
- A = list (ts .alignments (reference_sequence = ref ))
1474
- assert len (A ) == ts .num_samples
1475
- H = list (ts .haplotypes ())
1476
- pos = ts .tables .sites .position .astype (int )
1477
- for a , h in map (np .array , zip (A , H )):
1478
- last = 0
1479
- for j , x in enumerate (pos ):
1480
- assert a [last :x ] == ref [last :x ]
1481
- assert a [x ] == h [j ]
1482
- last = x + 1
1483
- assert a [last :] == ref [last :]
1489
+ if any (tree .num_roots > 1 for tree in ts .trees ()):
1490
+ with pytest .raises (ValueError , match = "1896" ):
1491
+ list (ts .alignments (reference_sequence = ref ))
1492
+ else :
1493
+ A = list (ts .alignments (reference_sequence = ref ))
1494
+ assert len (A ) == ts .num_samples
1495
+ H = list (ts .haplotypes ())
1496
+ pos = ts .tables .sites .position .astype (int )
1497
+ for a , h in map (np .array , zip (A , H )):
1498
+ last = 0
1499
+ for j , x in enumerate (pos ):
1500
+ assert a [last :x ] == ref [last :x ]
1501
+ assert a [x ] == h [j ]
1502
+ last = x + 1
1503
+ assert a [last :] == ref [last :]
0 commit comments