-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathSnakefile
138 lines (118 loc) · 6 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#######################
# CNV calling pipeline :URL: https://rdcu.be/c4Nse
# Copyright (C) 2023 Ashish Kumar Singh - St.Olavs hospital - NTNU - Norway
########################
configfile: "config.yaml"
import pandas as pd
import io
SAMPLESHEET = config['SAMPLESHEET']
HOME = config['HOME']
BATCH = BATCHFOLDER.split("-")[0]
SCRIPTS = config['SCRIPTS']
NGSCNVCODE = config['NGSCNVCODE']
CNV_static_pooling = config['CNV_static_pooling']
DATABASES = config['DATABASES']
EXPORT = config['EXPORT']
rule sambamba_coverage:
input:
"results/samples/{sample}/bam/{sample}_sorted_dedup_RG_Baserecal.bam"
output:
o1="results/coverage_report/region_base_coverage/{sample}_region_base_coverage"
shell:
"""
samtools flagstat {input} > results/coverage_report/flagstat/{wildcards.sample}_mapping_summary.txt
{SCRIPTS}/sambamba depth region -F 'not duplicate and not failed_quality_control' -T 10 -T 15 -T 20 -T 50 -T 100 -L {TARGETPANEL} -o results/coverage_report/region_mean_coverage/{wildcards.sample}_region_mean_coverage {input}
{SCRIPTS}/sambamba depth base -F 'not duplicate and not failed_quality_control' -L {TARGETPANEL} -o {output.o1} {input}
"""
rule coverage_stats:
input:
expand("results/coverage_report/region_base_coverage/{sample}_region_base_coverage",sample=samples)
output:
touch("coverage_stats_done")
params:
len(samples)
run:
if len(input)==len(samples):
shell("python {SCRIPTS}/coverage_stats.py {HOME} {BATCHFOLDER} {TARGETPANEL} {COVTHRESHOLD} {GAPCOVTHRESHOLD} {UTRREGIONS}")
shell("cp {HOME}/{BATCHFOLDER}/results/coverage_report/sample_summary.txt {HOME}/{BATCHFOLDER}/results/CNV_analysis/sample_summary.txt")
shell("{SCRIPTS}/makeNucleotideCoverage.sh results/coverage_report/nucleotide_coverage.txt")
rule copy_number_variation_analysis_step1:
input:
i1="coverage_stats_done"
output:
o1=touch("results/samples/{sample}/status/ind_sliding_done")
shell:
"""
cd results/CNV_analysis
Rscript {SCRIPTS}/NGS_CNV_code_hg37/step2_R_code_for_SlidingWindow_MeanDepth_calculation.r {wildcards.sample}.Coverage {wildcards.sample}.Sliding_window {DATABASES}/panels/TRSW75_skip10_annotated_V3Panel
"""
rule sliding_window:
input:
i1=expand("results/samples/{sample}/status/ind_sliding_done",sample=samples)
output:
touch("results/CNV_analysis/sliding_done")
params:
len(samples)
run:
if len(samples)==len(input):
shell("{SCRIPTS}/NGS_CNV_code_hg37/step3_sliding_window.sh {HOME} {BATCHFOLDER} {SCRIPTS} {DATABASES} {params} results/coverage_report/sample_summary.txt")
rule cnv_individual_plots:
input:
i2="results/CNV_analysis/sliding_done"
output:
touch("results/samples/{sample}/status/cnv_individual_plots_done")
shell:
"""
##step6: plotting the individual sample TRSW_logCNR file for each gene
# here for each individual sample there will be a directory, and all the genes plotted for that sample will be store in that specific directory
cd results/CNV_analysis
mkdir -p {wildcards.sample}
Rscript {SCRIPTS}/NGS_CNV_code_hg37/step6_R_code_for_plotting_sample_for_each_individual_gene_runwise_pooling.r {wildcards.sample} {HOME}/{BATCHFOLDER}/results/CNV_analysis/{wildcards.sample} {DATABASES}/panels/Target_with_gene_info_V3panel_collapsed.bed {DATABASES}/panels/TRSW75_skip10_annotated_V3Panel_with_flag_position_marking
"""
rule cnv_static_pooling_select_pool:
input:
i1=expand("results/samples/{sample}/status/cnv_individual_plots_done",sample=samples)
output:
touch("CNV_static_pooling_done")
params:
len(samples)
run:
if len(input)==len(samples):
shell("cp results/CNV_analysis/*.Sliding_window results/CNV_static_pooling/;cd results/CNV_static_pooling;cut -f 1,3 {HOME}/{BATCHFOLDER}/results/coverage_report/sample_summary.txt|grep -v 'Total' > out.sample_mean;Rscript {SCRIPTS}/NGS_CNV_code_hg37/code_for_running_CNV_analysis_static_pooling/step2_R_code_for_pool_selection.r out.sample_mean out.sample_with_selected_pool {CNV_static_pooling}/Info_Table_of_Pools;rm -f out.sample_mean")
rule cnv_static_pooling_and_plots:
input:
#i1="results/CNV_analysis/{sample}.Sliding_window",
i2="CNV_static_pooling_done"
output:
touch("results/samples/{sample}/status/cnv_static_pooling_done")
#touch("results/cnv_static_pooling_done")
shell:
"""
{SCRIPTS}/NGS_CNV_code_hg37/code_for_running_CNV_analysis_static_pooling/step1_code_for_running_CNV_analysis_with_STATIC_pooling.sh {HOME} {BATCHFOLDER} {SCRIPTS} {DATABASES} {CNV_static_pooling} {wildcards.sample}.Sliding_window
"""
#Quality control: calculation of %deviation from pool coverage
rule cnv_static_pooling_quality:
input:
i1=expand("results/samples/{sample}/status/cnv_static_pooling_done",sample=samples)
output:
o1="results/CNV_static_pooling/out.static_pooling_OUALITY.csv"
run:
if len(input)==len(samples):
shell("Rscript {SCRIPTS}/NGS_CNV_code_hg37/code_for_running_CNV_analysis_static_pooling/step5_R_code_for_quality_control_in_STATIC_pooling.r results/CNV_static_pooling/out.sample_with_selected_pool {output.o1}")
rule cnv_combining_all_plots_pooling:
input:
i1="results/CNV_static_pooling/out.static_pooling_OUALITY.csv",
output:
touch("results/genes/{gene}_copy_number_variant_analysis_done")
shell:
"""
{SCRIPTS}/NGS_CNV_code_hg37/step_FINAL_code_for_combining_all_runwise_and_static_pooling_plots_from_all_samples_for_each_gene.sh {HOME} {BATCHFOLDER} {DATABASES} {wildcards.gene}
"""
rule cnv_check_all_done:
input:
expand("results/genes/{gene}_copy_number_variant_analysis_done",gene=genes)
output:
touch("results/genes_copy_number_variant_analysis_done")
run:
if len(input)==len(genes):
"ok"