-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_rcl.sh
executable file
·117 lines (104 loc) · 3.65 KB
/
run_rcl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/bin/bash
# Run RCL.
# Input: PATH to data files, list of BAM file replicates BFILE1, BFILE2, ...
# Assumes: PATH/chrList.txt, PATH/chr*/BFILE1.covBga.txt, PATH/chr*/BFILE2.covBga.txt, PATH/bigInputs.txt created by preprocessing.bash
# Output:
# PATH/rcl.ckpt fitted model
# PATH/rcl.bed candidate peaks and scores
path="."
epoch=25
batch=256
ext=.covBga.txt
ref_prefix="chr"
save=0
overwrite=0
debug=""
candidate_bed="bigInputs.txt"
chr_list="chrList.txt"
helpFunction()
{
echo ""
echo "Usage: $0 [-d PATH] -b \"BAM_FILE1 BAM_FILE2[ BAM_FILE3...]\""
echo -e "\t-b STR list of preprocessed replicate BAM files, surrounded by double quotes."
echo -e "\t BAM files should have alread been preprocessed by preprocessing.bash."
echo -e "\t Example (from tutorial): \"MCF7_chr10_rep1.bam MCF7_chr10_rep2.bam\""
echo -e "\t-c STR File with reference sequences (chromosomes), one per line, on which to call peaks (DEFAULT: $chr_list)."
echo -e "\t-d STR path to Data files (DEFAULT: $path)."
echo -e "\t-e INT number of Epochs (DEFAULT: $epoch)."
echo -e "\t-g turn on debuGging output (DEFAULT: no)."
echo -e "\t-h INT batcH size (DEFAULT: $batch)."
echo -e "\t-i STR BED file (4-column: chr start end name) containing input regions to score (DEFAULT: $candidate_bed)."
echo -e "\t-r STR Reference sequence name prefix (DEFAULT: $ref_prefix)."
echo -e "\t-s Save output files (DEFAULT: no)."
echo -e "\t-w overWrite existing files (DEFAULT: no)."
echo -e "\t-x STR eXtension for coverage files (DEFAULT: $ext)."
echo -e "\t Chromosome j coverage file for replicate m assumed to be"
echo -e "\t PATH/chrj/BASEm_j.covBga.txt, where BASEm is BAM_FILEm (-f) without .bam extension."
exit 1 # Exit script after printing help
}
while getopts ?gswb:c:d:e:h:i:r:x: flag
do
case "${flag}" in
b) fname=${OPTARG};;
c) chr_list=$OPTARG;;
d) path=${OPTARG};;
e) epoch=${OPTARG};;
g) debug=--debug;;
h) batch=${OPTARG};;
i) candidate_bed=$OPTARG;;
r) ref_prefix=${OPTARG};;
w) overwrite=1;;
s) save=1;;
x) ext=${OPTARG};;
?) helpFunction ;; # Print helpFunction
esac
done
path=${path%/} # remove terminal /
echo "Number of epoch ${epoch}"
# Print helpFunction in case parameters are empty
if [ -z "$path" ] || [ -z "$fname" ] #|| [ -z "$skip" ]
then
echo "Some or all required parameters are empty";
helpFunction
fi
rep_names=${fname//.bam/}
#for i in $(seq 1 $nreps)
nreps=0
for rep_name in $rep_names
do
((nreps++))
if [ $overwrite -eq 1 -o ! -s "$path"/rep$nreps.txt ]; then
rm -f "$path"/rep$nreps.txt
while read chr; do
[ "$chr" = "X" ] && continue
[ "$chr" = "Y" ] && continue
cat "$path"/$ref_prefix"$chr"/"$rep_name""$ext" >> "$path"/rep$nreps.txt
done < "$path"/"$chr_list"
fi
done
if [ $overwrite -eq 1 -o ! -s "$path"/rcl.ckpt ]; then
echo "Number of reps ${nreps}, start training"
python main.py $debug --epochs $epoch --batch_size $batch --datapath "$path" --n_rep $nreps --modelpath "$path"/rcl.ckpt &> "$path"/out.err
echo "Finish training, start writing results (if your data is large, please give more memory)"
else
echo "Using existing \"$path/rcl.ckpt\" file"
fi
if [ $overwrite -eq 1 -o ! -s "$path"/rcl.bed ]; then
while read chr; do
python rcl_score.py $debug --model "$path"/rcl.ckpt --dpath "$path"/$ref_prefix"$chr" --names $rep_names --preprocess_region "$path"/$candidate_bed --id $chr --prefix "$path"
done < "$path"/"$chr_list"
cat "$path"/rcl_*bed > "$path"/rcl.bed
else
echo "Using existing \"$path/rcl.bed\" file"
fi
if [ $save -eq 0 ]; then
rm -f "$path"/rcl_*bed
rm -f "$path"/out.err
nreps=0
for i in $fname
do
((nreps++))
rm -f "$path"/rep$nreps.txt
done
fi
echo "Finished!"