-
Notifications
You must be signed in to change notification settings - Fork 0
4. Prepare reference files
Amin edited this page Mar 20, 2017
·
11 revisions
Walk through
# Download the reference genome on
wget ftp://ftp.ensembl.org/pub/release-86/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz
gunzip Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz
# collect chr22 reference
awk '
/>/{
if (substr($0,1,4) == ">22 ")
{prt = 1}
else
{prt = 0}
}
(prt==1){
print $0;
}
' Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa > chr22.fa
# Add spike-ins
wget https://tools.thermofisher.com/content/sfs/manuals/ERCC92.zip
unzip ERCC92.zip
cat chr22.fa ERCC92.fa > chr22_with_ERCC92.fa
# Get annotation files
wget ftp://ftp.ensembl.org/pub/release-86/gtf/homo_sapiens/Homo_sapiens.GRCh38.86.gtf.gz
gunzip Homo_sapiens.GRCh38.86.gtf.gz
grep '^22' Homo_sapiens.GRCh38.87.gtf > Homo_sapiens.GRCh38.87_chr22.gtf