Skip to content

Commit 2237ad7

Browse files
committedDec 2, 2024
Added variable checks related to UMIs
1 parent a10702b commit 2237ad7

File tree

6 files changed

+42
-42
lines changed

6 files changed

+42
-42
lines changed
 

‎DATA_CLEANING/WORKFLOW/DataCleaning_PairedEnd.smk

+1-5
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ if (len(user_demult_dir) == 0):
2424
else:
2525
performDemultiplexing = False
2626

27-
if config["UMI"]:
28-
extractUMI = True
29-
else:
30-
extractUMI = False
3127

3228
### Raw fastq files path and base names
3329
fastq_R1_raw_base = fastq_R2_raw_base = raw_data_dir = ""
@@ -54,7 +50,7 @@ if performDemultiplexing:
5450
demult_dir_input = demult_dir
5551
else:
5652
demult_dir = user_demult_dir
57-
if extractUMI:
53+
if config["UMI"]:
5854
demult_dir_output = outputs_directory+"/DEMULT_UMI"
5955
demult_dir_input = demult_dir_output
6056
else:

‎DATA_CLEANING/WORKFLOW/DataCleaning_SingleEnd.smk

+1-5
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ if (len(user_demult_dir) == 0):
2222
else:
2323
performDemultiplexing = False
2424

25-
if config["UMI"]:
26-
extractUMI = True
27-
else:
28-
extractUMI = False
2925

3026
### Raw fastq files path and base names
3127
fastq_raw_base = raw_data_dir = ""
@@ -51,7 +47,7 @@ if performDemultiplexing:
5147
demult_dir_input = demult_dir
5248
else:
5349
demult_dir = user_demult_dir
54-
if extractUMI:
50+
if config["UMI"]:
5551
demult_dir_output = outputs_directory+"/DEMULT_UMI"
5652
demult_dir_input = demult_dir_output
5753
else:

‎READ_MAPPING/WORKFLOW/ReadMapping.smk

+1-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ trim_dirs = list(config["TRIM_DIRS"].split(" "))
2121
if (len(config["TRIM_DIRS"]) == 0):
2222
trim_dirs = [working_directory+"/WORKFLOWS_OUTPUTS/DATA_CLEANING/DEMULT_TRIM"]
2323

24-
2524
if config["REMOVE_DUP_MARKDUPLICATES"]:
2625
rm_dup = "TRUE"
2726
else:
@@ -52,10 +51,6 @@ if (len(existing_bed) == 0 and len(config["BED_MIN_MEAN_COV"]) == 0):
5251
else:
5352
count_reads_zones = True
5453

55-
if config["CREATE_SUB_BAMS"]:
56-
create_sub_bams = True
57-
else:
58-
create_sub_bams = False
5954

6055
mapper = config["MAPPER"]
6156

@@ -610,7 +605,7 @@ rule Write_Summary:
610605
subbams_reports_dir+"/multiQC_ReadMapping_SubBams_Report.html",
611606
mapping_dir+"/subbams_list.txt",
612607
mapping_dir+"/reference_chr_size.txt" ],
613-
[ True, True, True, count_reads_zones, create_sub_bams, create_sub_bams, create_sub_bams, create_sub_bams, create_sub_bams, create_sub_bams ])
608+
[ True, True, True, count_reads_zones, config["CREATE_SUB_BAMS"], config["CREATE_SUB_BAMS"], config["CREATE_SUB_BAMS"], config["CREATE_SUB_BAMS"], config["CREATE_SUB_BAMS"], config["CREATE_SUB_BAMS"] ])
614609
output:
615610
temp(mapping_dir+"/summary.sentinel")
616611
params:

‎launcher_files/launcher_DataCleaningCheck.sh

+9
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ if [[ ! -z $BARCODE_FILE ]] ; then
262262
fi
263263
fi
264264

265+
# UMI
266+
UMI=$(grep "^UMI:" $CONFIG | sed 's/#.*$//' | cut -d ' ' -f2 | sed 's/"//g')
267+
if [[ "$UMI" != "TRUE" && "$UMI" != "True" && "$UMI" != "true" && "$UMI" != "FALSE" && "$UMI" != "False" && "$UMI" != "false" ]] ; then
268+
echo -e "\nERROR: The UMI variable is incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."
269+
echo "As a reminder:"
270+
echo "UMI: Wether or not UMI sequences should be extracted from reads. Set to TRUE if UMIs were incorporated during library construction. This option is currently only supported for demultiplexed data. [TRUE or FALSE]"
271+
echo -e "\nExiting.\n"
272+
exit 1
273+
fi
265274

266275
# TRIMMING_QUAL and TRIMMING_MIN_LENGTH
267276
TRIMMING_QUAL=$(grep "^TRIMMING_QUAL:" $CONFIG | sed 's/#.*$//' | cut -d ' ' -f2 | sed 's/"//g')

‎launcher_files/launcher_ReadMappingCheck.sh

+28-8
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ if [[ -z "$PAIRED_END" ]] ; then
3636
echo "PAIRED_END: set to TRUE in case of paired end data (R1 + R2), to FALSE in case of single end data."
3737
echo -e "\nExiting.\n"
3838
exit 1
39-
elif [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" || "$PAIRED_END" == "T" ]] ; then
39+
elif [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" ]] ; then
4040
echo -e "\nINFO: Paired end data is expected (PAIRED_END set to TRUE)\n"
41-
elif [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" || "$PAIRED_END" == "F" ]] ; then
41+
elif [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" ]] ; then
4242
echo -e "\nINFO: Single end data is expected (PAIRED_END set to FALSE)\n"
4343
else
4444
echo -e "\nERROR: The PAIRED_END variable is incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."
@@ -69,7 +69,7 @@ for TRIM_DIR in $TRIM_DIRS ; do
6969
echo -e "\nExiting.\n"
7070
exit 1
7171
fi
72-
if [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" || "$PAIRED_END" == "T" ]] ; then
72+
if [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" ]] ; then
7373
nb_fastq_R1_obs=$(ls ${TRIM_DIR}/*.R1.fastq.gz 2>/dev/null | wc -l)
7474
nb_fastq_R2_obs=$(ls ${TRIM_DIR}/*.R2.fastq.gz 2>/dev/null | wc -l)
7575
if [[ $nb_fastq_R1_obs -eq 0 || $nb_fastq_R2_obs -eq 0 ]] ; then
@@ -94,7 +94,7 @@ for TRIM_DIR in $TRIM_DIRS ; do
9494
done
9595
fi
9696
fi
97-
if [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" || "$PAIRED_END" == "F" ]] ; then
97+
if [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" ]] ; then
9898
nb_fastq=$(ls ${TRIM_DIR}/*.fastq.gz 2>/dev/null | wc -l)
9999
nb_fastq_R1_obs=$(ls ${TRIM_DIR}/*R1*fastq.gz 2>/dev/null | wc -l)
100100
nb_fastq_R2_obs=$(ls ${TRIM_DIR}/*R2*fastq.gz 2>/dev/null | wc -l)
@@ -158,14 +158,14 @@ if [[ -z "$CREATE_SUB_BAMS" ]] ; then
158158
echo "CREATE_SUB_BAMS: set to TRUE in case you provided a bed file AND want to extract the reads mapping onto the specified regions, to FALSE otherwise. If set to TRUE, the extracted reads will be stored in new bams, and a new reference (matching the bams) containing only the bed regions will be created."
159159
echo -e "\nExiting.\n"
160160
exit 1
161-
elif [[ "$CREATE_SUB_BAMS" != "TRUE" && "$CREATE_SUB_BAMS" != "True" && "$CREATE_SUB_BAMS" != "true" && "$CREATE_SUB_BAMS" != "T" && "$CREATE_SUB_BAMS" != "FALSE" && "$CREATE_SUB_BAMS" != "False" && "$CREATE_SUB_BAMS" != "false" && "$CREATE_SUB_BAMS" != "F" ]] ; then
161+
elif [[ "$CREATE_SUB_BAMS" != "TRUE" && "$CREATE_SUB_BAMS" != "True" && "$CREATE_SUB_BAMS" != "true" && "$CREATE_SUB_BAMS" != "FALSE" && "$CREATE_SUB_BAMS" != "False" && "$CREATE_SUB_BAMS" != "false" ]] ; then
162162
echo -e "\nERROR: The CREATE_SUB_BAMS variable is incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."
163163
echo "As a reminder:"
164164
echo "CREATE_SUB_BAMS: set to TRUE in case you provided a bed file AND want to extract the reads mapping onto the specified regions, to FALSE otherwise. If set to TRUE, the extracted reads will be stored in new bams, and a new reference (matching the bams) containing only the bed regions will be created."
165165
echo -e "\nExiting.\n"
166166
exit 1
167167
fi
168-
if [[ "$CREATE_SUB_BAMS" == "TRUE" || "$CREATE_SUB_BAMS" == "True" || "$CREATE_SUB_BAMS" == "true" || "$CREATE_SUB_BAMS" == "T" ]] ; then
168+
if [[ "$CREATE_SUB_BAMS" == "TRUE" || "$CREATE_SUB_BAMS" == "True" || "$CREATE_SUB_BAMS" == "true" ]] ; then
169169
if [[ -z "$BED" && (-z "$BED_MIN_MEAN_COV" || -z "$BED_MIN_DIST" || -z "$BED_MIN_LENGTH") ]] ; then
170170
echo -e "\nERROR: CREATE_SUB_BAM was set to TRUE but neither the bed file nor the parameters to automatically create it were provided in your config file (${CONFIG}). Please either provide a bed file, or values for BED_MIN_MEAN_COV, BED_MIN_DIST and BED_MIN_LENGTH, or set CREATE_SUB_BAM to FALSE."
171171
echo -e "\nExiting.\n"
@@ -182,6 +182,7 @@ fi
182182
## Mapping parameters ##
183183
MAPPER=$(grep "^MAPPER:" $CONFIG | sed 's/#.*$//' | cut -d ' ' -f2 | sed 's/"//g')
184184
REMOVE_DUP_MARKDUPLICATES=$(grep "^REMOVE_DUP_MARKDUPLICATES:" $CONFIG | sed 's/#.*$//' | cut -d ' ' -f2 | sed 's/"//g')
185+
REMOVE_DUP_UMI=$(grep "^REMOVE_DUP_UMI:" $CONFIG | sed 's/#.*$//' | cut -d ' ' -f2 | sed 's/"//g')
185186

186187
# **MAPPER**
187188
if [[ -z "$MAPPER" ]] ; then
@@ -200,10 +201,29 @@ if [[ "$MAPPER" != "bwa-mem2_mem" && "$MAPPER" != "bwa_mem" && "$MAPPER" != "bow
200201
fi
201202

202203
# **REMOVE_DUP_MARKDUPLICATES**
203-
if [[ "$REMOVE_DUP_MARKDUPLICATES" != "TRUE" && "$REMOVE_DUP_MARKDUPLICATES" != "True" && "$REMOVE_DUP_MARKDUPLICATES" != "true" && "$REMOVE_DUP_MARKDUPLICATES" != "T" && "$REMOVE_DUP_MARKDUPLICATES" != "FALSE" && "$REMOVE_DUP_MARKDUPLICATES" != "False" && "$REMOVE_DUP_MARKDUPLICATES" != "false" && "$REMOVE_DUP_MARKDUPLICATES" != "F" ]] ; then
204+
if [[ "$REMOVE_DUP_MARKDUPLICATES" != "TRUE" && "$REMOVE_DUP_MARKDUPLICATES" != "True" && "$REMOVE_DUP_MARKDUPLICATES" != "true" && "$REMOVE_DUP_MARKDUPLICATES" != "FALSE" && "$REMOVE_DUP_MARKDUPLICATES" != "False" && "$REMOVE_DUP_MARKDUPLICATES" != "false" ]] ; then
204205
echo -e "\nERROR: The REMOVE_DUP_MARKDUPLICATES variable is incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."
205206
echo "As a reminder:"
206-
echo "REMOVE_DUP_MARKDUPLICATES: set to TRUE to remove duplicates after mapping (picard MarkDuplicates -REMOVE_DUPLICATES TRUE), to FALSE otherwise."
207+
echo "REMOVE_DUP_MARKDUPLICATES: Whether or not to remove duplicates with 'picard MarkDuplicates' after the mapping step. [TRUE or FALSE]"
208+
echo -e "\nExiting.\n"
209+
exit 1
210+
fi
211+
212+
213+
# **REMOVE_DUP_UMI**
214+
if [[ "$REMOVE_DUP_UMI" != "TRUE" && "$REMOVE_DUP_UMI" != "True" && "$REMOVE_DUP_UMI" != "true" && "$REMOVE_DUP_UMI" != "FALSE" && "$REMOVE_DUP_UMI" != "False" && "$REMOVE_DUP_UMI" != "false" ]] ; then
215+
echo -e "\nERROR: The REMOVE_DUP_UMI variable is incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."
216+
echo "As a reminder:"
217+
echo "REMOVE_DUP_UMI: Whether or not to remove duplicates with 'umi_tools dedup' after the mapping step. [TRUE or FALSE]"
218+
echo -e "\nExiting.\n"
219+
exit 1
220+
fi
221+
222+
if [[ "$REMOVE_DUP_UMI" == "TRUE" || "$REMOVE_DUP_UMI" == "True" || "$REMOVE_DUP_UMI" == "true" ]] && [[ "$REMOVE_DUP_MARKDUPLICATES" == "TRUE" || "$REMOVE_DUP_MARKDUPLICATES" == "True" || "$REMOVE_DUP_MARKDUPLICATES" == "true" ]] ; then
223+
echo -e "\nERROR: You cannot set both REMOVE_DUP_MARKDUPLICATES and REMOVE_DUP_UMI variable to TRUE in your config file (${CONFIG})."
224+
echo "As a reminder:"
225+
echo "REMOVE_DUP_MARKDUPLICATES: Whether or not to remove duplicates with 'picard MarkDuplicates' after the mapping step. If set to TRUE, set REMOVE_DUP_UMI to FALSE. If both REMOVE_DUP_MARKDUPLICATES and REMOVE_DUP_UMI are set to FALSE, duplicates will be marked with 'picard MarkDuplicates'. Setting both REMOVE_DUP_MARKDUPLICATES and REMOVE_DUP_UMI to TRUE will raise an error. [TRUE or FALSE]"
226+
echo "REMOVE_DUP_UMI: Whether or not to remove duplicates with 'umi_tools dedup' after the mapping step. This option requires prior extraction of UMI sequences from the reads using 'umi_tools extract' (this can be done with the DataCleaning workflow by setting UMI: TRUE). If set to TRUE, set REMOVE_DUP_MARKDUPLICATES to FALSE. If both REMOVE_DUP_UMI and REMOVE_DUP_MARKDUPLICATES are set to FALSE, duplicates will be marked with 'picard MarkDuplicates'. Setting both REMOVE_DUP_MARKDUPLICATES and REMOVE_DUP_UMI to TRUE will raise an error. [TRUE or FALSE]"
207227
echo -e "\nExiting.\n"
208228
exit 1
209229
fi

‎launcher_files/launcher_allWorkflowsCheck.sh

+2-18
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,6 @@ else
8585
fi
8686

8787

88-
#workflow_profiles_folder="${workflow_path}/PROFILES"
89-
#if [[ -d ${workflow_profiles_folder} ]] ; then
90-
# nb_carriage_returns=$(grep -c $'\r' ${workflow_profiles_folder}/SGE/config.yaml)
91-
# if [[ "$nb_carriage_returns" -gt 0 ]] ; then
92-
# echo "Removing windows carriage returns in ${workflow_profiles_folder}/SGE/config.yaml..."
93-
# sed -i 's/\r$//g' ${workflow_profiles_folder}/SGE/config.yaml
94-
# sed -i 's/\r/\n/g' ${workflow_profiles_folder}/SGE/config.yaml
95-
# fi
96-
# nb_carriage_returns=$(grep -c $'\r' ${workflow_profiles_folder}/SLURM/config.yaml)
97-
# if [[ "$nb_carriage_returns" -gt 0 ]] ; then
98-
# echo "Removing windows carriage returns in ${workflow_profiles_folder}/SLURM/config.yaml..."
99-
# sed -i 's/\r$//g' ${workflow_profiles_folder}/SLURM/config.yaml
100-
# sed -i 's/\r/\n/g' ${workflow_profiles_folder}/SLURM/config.yaml
101-
# fi
102-
#fi
103-
10488
### 2/ CONFIG_FILE ###
10589

10690
CONFIG=$(absolutePath $CONFIG)
@@ -159,9 +143,9 @@ WORKFLOW_SMK="${WORKFLOW}.smk"
159143
# if DataCleaning: is it paired or single
160144
if [[ "$WORKFLOW" = "DataCleaning" ]] ; then
161145
PAIRED_END=$(grep "^PAIRED_END" $CONFIG | cut -f2 -d ' ')
162-
if [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" || "$PAIRED_END" == "T" ]] ; then
146+
if [[ "$PAIRED_END" == "TRUE" || "$PAIRED_END" == "True" || "$PAIRED_END" == "true" ]] ; then
163147
WORKFLOW_SMK="${WORKFLOW}_PairedEnd.smk"
164-
elif [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" || "$PAIRED_END" == "F" ]] ; then
148+
elif [[ "$PAIRED_END" == "FALSE" || "$PAIRED_END" == "False" || "$PAIRED_END" == "false" ]] ; then
165149
WORKFLOW_SMK="${WORKFLOW}_SingleEnd.smk"
166150
else
167151
echo -e "\nERROR: The PAIRED_END variable is either missing or incorrect in your config file (${CONFIG}). Please set it to TRUE or FALSE."

0 commit comments

Comments
 (0)