-
Notifications
You must be signed in to change notification settings - Fork 0
/
hevProject.glue
61 lines (39 loc) · 2.38 KB
/
hevProject.glue
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
delete project hev
create project hev "A GLUE project for Hepatitis E virus (HEV)" --minVersion 0.1.151
run file glue/hevSchemaExtensions.glue
project hev
run file glue/hevProjectSettings.glue
run file glue/hevModules.glue
import source sources/ncbi-hev
import source sources/ncbi-outgroup
# import unconstrained alignment
# The alignment is based on Smith, et. al 2016, but has also been modified to:
# 1. Replace internal NN's with --'s for KJ873911 in order to make blast fasta importer work properly.
# 2. Add a row for sequence RR_L08816 -- this sequence is present in order to provide feature locations
# 3. Add a row for sequence OO_AM943647 -- this sequence is present in order to provide an outgroup.
# 4. The missing subtype has been added for JQ013791 (3ra) KJ013415 (3ra) and EU723513 (3f)
module unconstrainedAlignmentImporter import AL_UNCONSTRAINED -f alignments/SmithAlignment_23_1_2016_modified.fna
run file glue/hevFeatures.glue
run file glue/hevReferences.glue
module hevGenbankXmlPopulator populate
# Missing data for some full genome NCBI sequences
module hevDataMissingImporter populate --fileName tabular/AhmedAlnamrotyFullGenomeMissing.txt
module hevCollectionBoundsPopulator populate
# identify full genome sequences.
multi-set field sequence -w "gb_length >= 6500" full_genome true
multi-set field sequence -w "full_genome = null" full_genome false
# if there's no evidence it's a lab construct, it isn't.
multi-set field sequence -w "source.name = 'ncbi-hev' and gb_lab_construct = null" gb_lab_construct false
# update gb_country_short based on gb_country_iso
run file glue/hevCountryUpdate.glue
module hevSmithGenotypingPopulator populate --whereClause "source.name = 'ncbi-hev'" -f tabular/Smith_full_length_genotypes.txt
module hevEpaGenotypingResultsPopulator populate --whereClause "source.name = 'ncbi-hev'" -f tabular/ncbiHevGenotypes.txt
multi-copy field sequence -w "source.name = 'ncbi-hev'" epa_genotype genotype
multi-copy field sequence -w "source.name = 'ncbi-hev'" epa_subtype subtype
run file glue/hevAlignmentTree.glue
## record feature presence
module hevFeaturePresenceRecorder
record feature-presence AL_MASTER --recursive --whereClause "sequence.source.name = 'ncbi-hev'" --featureName whole_genome --descendentFeatures
exit
run file glue/importPhylogeny.glue
validate