From 0d7bd46265123791cd8adf03dc8a79cea8e03cea Mon Sep 17 00:00:00 2001 From: Philippe Veber Date: Fri, 1 Jun 2018 14:46:47 +0200 Subject: [PATCH] bioinfo/Ensembl: fetch cdna files --- lib/bioinfo/ensembl.ml | 8 ++++++++ lib/bioinfo/ensembl.mli | 2 ++ 2 files changed, 10 insertions(+) diff --git a/lib/bioinfo/ensembl.ml b/lib/bioinfo/ensembl.ml index d8f01632..da0df038 100644 --- a/lib/bioinfo/ensembl.ml +++ b/lib/bioinfo/ensembl.ml @@ -63,3 +63,11 @@ let gtf ?(chr_name = `ensembl) ~release ~species = | `ucsc -> ucsc_chr_names_gtf in f @@ Unix_tools.(gunzip (wget url)) + +let cdna ~release ~species = + let url = sprintf "ftp://ftp.ensembl.org/pub/release-%d/fasta/%s/cdna/%s.%s.cdna.all.fa.gz" + release (string_of_species species) + (String.capitalize (string_of_species species)) + (lab_label_of_genome (ucsc_reference_genome ~release ~species)) + in + Unix_tools.wget url diff --git a/lib/bioinfo/ensembl.mli b/lib/bioinfo/ensembl.mli index 31cb7d1f..1e28fb05 100644 --- a/lib/bioinfo/ensembl.mli +++ b/lib/bioinfo/ensembl.mli @@ -10,3 +10,5 @@ val ucsc_reference_genome : release:int -> species:species -> Ucsc_gb.genome val gff : ?chr_name : [`ensembl | `ucsc] -> release:int -> species:species -> gff workflow val gtf : ?chr_name : [`ensembl | `ucsc] -> release:int -> species:species -> gff workflow + +val cdna : release:int -> species:species -> fasta gz workflow