From 16e2f4fb939c216ff0b50d05098bee6420b7054e Mon Sep 17 00:00:00 2001 From: Mateusz Staniak Date: Wed, 31 Jul 2024 15:29:59 +0200 Subject: [PATCH 1/3] added support for PD 3.1 column naming --- R/clean_ProteomeDiscoverer.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/clean_ProteomeDiscoverer.R b/R/clean_ProteomeDiscoverer.R index eb67665b..6d74e204 100644 --- a/R/clean_ProteomeDiscoverer.R +++ b/R/clean_ProteomeDiscoverer.R @@ -36,19 +36,20 @@ protein_id_column = .standardizeColnames(protein_id_column) sequence_column = .standardizeColnames(sequence_column) quantification_column = .standardizeColnames(quantification_column) + run_column = ifelse(grepl("FileID", colnames(pd_input)), "FileID", "SpectrumFile") if (remove_shared & is.element("XProteins", colnames(pd_input))) { pd_input = pd_input[XProteins == "1", ] } pd_cols = c(protein_id_column, sequence_column, - "Modifications", "Charge", "SpectrumFile", quantification_column) + "Modifications", "Charge", run_column, quantification_column) if (any(is.element(colnames(pd_input), "Fraction"))) { pd_cols = c(pd_cols, "Fraction") } pd_input = pd_input[, pd_cols, with = FALSE] data.table::setnames( pd_input, - c(protein_id_column, sequence_column, "SpectrumFile", + c(protein_id_column, sequence_column, run_column, quantification_column, "Charge"), c("ProteinName", "PeptideSequence", "Run", "Intensity", "PrecursorCharge"), @@ -96,17 +97,18 @@ } channels = .getChannelColumns(colnames(pd_input), intensity_columns_regexp) + run_column = ifelse(grepl("FileID", colnames(pd_input)), "FileID", "SpectrumFile") .validatePDTMTInputColumns(pd_input, protein_id_column, num_proteins, channels) pd_cols = intersect(c(protein_id_column, num_proteins, "AnnotatedSequence", "Charge", "PrecursorCharge", "IonsScore", - "SpectrumFile", "QuanInfo", + run_column, "QuanInfo", "IsolationInterference", channels), colnames(pd_input)) pd_input = pd_input[, pd_cols, with = FALSE] data.table::setnames(pd_input, c(protein_id_column, num_proteins, "AnnotatedSequence", - "SpectrumFile", "Charge"), + run_column, "Charge"), c("ProteinName", "numProtein", "PeptideSequence", "Run", "PrecursorCharge"), skip_absent = TRUE) From 7cd97bfcd94c4e77fdd50ff08520cf40b7d7d06a Mon Sep 17 00:00:00 2001 From: Mateusz Staniak Date: Wed, 31 Jul 2024 15:34:01 +0200 Subject: [PATCH 2/3] updated support for PD 3.1 column names --- R/clean_ProteomeDiscoverer.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/clean_ProteomeDiscoverer.R b/R/clean_ProteomeDiscoverer.R index 6d74e204..82a140cd 100644 --- a/R/clean_ProteomeDiscoverer.R +++ b/R/clean_ProteomeDiscoverer.R @@ -98,7 +98,7 @@ channels = .getChannelColumns(colnames(pd_input), intensity_columns_regexp) run_column = ifelse(grepl("FileID", colnames(pd_input)), "FileID", "SpectrumFile") - .validatePDTMTInputColumns(pd_input, protein_id_column, num_proteins, channels) + .validatePDTMTInputColumns(pd_input, protein_id_column, num_proteins, run_column, channels) pd_cols = intersect(c(protein_id_column, num_proteins, "AnnotatedSequence", "Charge", "PrecursorCharge", "IonsScore", @@ -139,14 +139,16 @@ #' @param pd_input data.frame input #' @param protein_id_column column name for protein passed from user #' @param num_proteins_column column name for number of protein groups passed from user +#' @param run_column column name for Run ID, depends on PD version #' @param channels list of column names for channels .validatePDTMTInputColumns = function(pd_input, protein_id_column, num_proteins_column, + run_column, channels ) { required_columns = c(protein_id_column, num_proteins_column, "AnnotatedSequence", - "SpectrumFile") + run_column) missing_columns = setdiff(required_columns, colnames(pd_input)) if (length(missing_columns) > 0) { msg = paste("The following columns are missing from the input data:", From 59db56fa37bbf887888d2aeaf755af41f61c4fda Mon Sep 17 00:00:00 2001 From: Mateusz Staniak Date: Wed, 31 Jul 2024 15:51:33 +0200 Subject: [PATCH 3/3] fixed a mistake in handling alternative Run column names for PD --- R/clean_ProteomeDiscoverer.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/clean_ProteomeDiscoverer.R b/R/clean_ProteomeDiscoverer.R index 82a140cd..265ba7a2 100644 --- a/R/clean_ProteomeDiscoverer.R +++ b/R/clean_ProteomeDiscoverer.R @@ -36,7 +36,7 @@ protein_id_column = .standardizeColnames(protein_id_column) sequence_column = .standardizeColnames(sequence_column) quantification_column = .standardizeColnames(quantification_column) - run_column = ifelse(grepl("FileID", colnames(pd_input)), "FileID", "SpectrumFile") + run_column = ifelse(any(grepl("FileID", colnames(pd_input))), "FileID", "SpectrumFile") if (remove_shared & is.element("XProteins", colnames(pd_input))) { pd_input = pd_input[XProteins == "1", ] @@ -97,7 +97,7 @@ } channels = .getChannelColumns(colnames(pd_input), intensity_columns_regexp) - run_column = ifelse(grepl("FileID", colnames(pd_input)), "FileID", "SpectrumFile") + run_column = ifelse(any(grepl("FileID", colnames(pd_input))), "FileID", "SpectrumFile") .validatePDTMTInputColumns(pd_input, protein_id_column, num_proteins, run_column, channels) pd_cols = intersect(c(protein_id_column, num_proteins, "AnnotatedSequence",