Skip to content

Commit

Permalink
Refactoring RT matching (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Aug 15, 2023
1 parent c4da046 commit d6fcd42
Show file tree
Hide file tree
Showing 53 changed files with 1,117 additions and 249 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: timaR
Title: Taxonomically Informed Metabolite Annotation
Version: 2.8.2
Version: 2.9.0
Authors@R: c(
person("Adriano", "Rutz", , "rutz@imsb.biol.ethz.ch", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0443-9902")),
Expand All @@ -18,7 +18,7 @@ Imports:
crayon (>= 1.5.2),
docopt (>= 0.7.1),
dplyr (>= 1.1.2),
httr (>= 1.4.6),
httr (>= 1.4.7),
igraph (>= 1.5.1),
jsonlite (>= 1.8.7),
MsBackendMgf (>= 1.8.0),
Expand Down Expand Up @@ -93,6 +93,7 @@ Collate:
'extract_spectra.R'
'fake_annotations_columns.R'
'fake_sop_columns.R'
'filter_annotations.R'
'get_file.R'
'get_example_sirius.R'
'get_gnps_tables.R'
Expand All @@ -117,6 +118,7 @@ Collate:
'prepare_features_edges.R'
'prepare_features_tables.R'
'prepare_libraries_adducts.R'
'prepare_libraries_rt.R'
'prepare_libraries_sop_closed.R'
'prepare_libraries_sop_ecmdb.R'
'prepare_libraries_sop_lotus.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export(export_spectra_2)
export(extract_spectra)
export(fake_annotations_columns)
export(fake_sop_columns)
export(filter_annotations)
export(get_example_sirius)
export(get_file)
export(get_gnps_tables)
Expand All @@ -50,6 +51,7 @@ export(prepare_features_components)
export(prepare_features_edges)
export(prepare_features_tables)
export(prepare_libraries_adducts)
export(prepare_libraries_rt)
export(prepare_libraries_sop_closed)
export(prepare_libraries_sop_ecmdb)
export(prepare_libraries_sop_lotus)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# timaR

# timaR 2.9.0

* Refactored RT matching (#76)

# timaR 2.8.2

* Change from pbmclapply to pblapply
Expand Down
11 changes: 3 additions & 8 deletions R/annotate_masses.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ utils::globalVariables(
"delta_min",
"Distance",
"error_mz",
"error_rt",
"exact_mass",
"feature_id",
"feature_id_dest",
Expand Down Expand Up @@ -157,8 +156,8 @@ annotate_masses <-
20
)
stopifnot(
"Your rt tolerance must be lower or equal to 0.1" = tolerance_rt <=
0.1
"Your rt tolerance must be lower or equal to 0.05" = tolerance_rt <=
0.05
)

paths <<- parse_yaml_paths()
Expand Down Expand Up @@ -465,16 +464,14 @@ annotate_masses <-
)
) |>
dplyr::mutate(
error_mz = adduct_mass - mz_1,
error_rt = NA_real_
error_mz = adduct_mass - mz_1
) |>
tidytable::select(
feature_id,
rt,
mz,
score_input,
error_mz,
error_rt,
exact_mass,
adduct,
adduct_mass,
Expand All @@ -497,7 +494,6 @@ annotate_masses <-
score_input,
library,
error_mz,
error_rt,
exact_mass
) |>
tidyft::filter(!is.na(exact_mass))
Expand Down Expand Up @@ -786,7 +782,6 @@ annotate_masses <-
tidytable::distinct(
feature_id,
error_mz,
error_rt,
structure_name,
structure_inchikey_2D,
structure_smiles_2D,
Expand Down
17 changes: 0 additions & 17 deletions R/annotate_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ utils::globalVariables(
"paths",
"precursorMz",
"presence_ratio",
"rtime",
"score",
"SLAW_ID",
"structure_inchikey_2D",
Expand Down Expand Up @@ -90,7 +89,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
df_empty <- data.frame(
feature_id = NA,
error_mz = NA,
error_rt = NA,
structure_name = NA,
structure_inchikey_2D = NA,
structure_smiles_2D = NA,
Expand Down Expand Up @@ -129,15 +127,11 @@ annotate_spectra <- function(input = params$files$spectral$raw,

query_precursors <- spectra@backend@spectraData$precursorMz
query_spectra <- spectra@backend@peaksData
query_rts <- spectra@backend@spectraData$rtime
## TODO find a way to have consistency in spectrum IDs
query_ids <- spectra@backend@spectraData$acquisitionNum
if (is.null(query_ids)) {
query_ids <- spectra@backend@spectraData$spectrum_id
}
if (is.null(query_rts)) {
query_rts <- rep(NA_real_, length(spectra))
}

if (approx == FALSE) {
log_debug("Reducing library size...")
Expand Down Expand Up @@ -195,7 +189,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
spectral_lib,
query_ids,
query_spectra,
query_rts,
lib_id,
minimal,
maximal,
Expand Down Expand Up @@ -229,7 +222,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
list(
"feature_id" = query_ids[[spectrum]],
"precursorMz" = precursor,
"rtime" = query_rts[[spectrum]],
"target_id" = lib_id[indices][[index]],
"score" = as.numeric(score),
"count_peaks_matched" = NA_integer_,
Expand Down Expand Up @@ -257,7 +249,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
spectral_lib = lib_spectra,
query_ids = query_ids,
query_spectra = query_spectra,
query_rts = query_rts,
lib_id = lib_id,
minimal = minimal,
maximal = maximal
Expand Down Expand Up @@ -295,10 +286,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
if (is.null(lib_smiles2D)) {
lib_smiles2D <- rep(NA_character_, length(spectral_library))
}
lib_rts <- spectral_library@backend@spectraData$rtime
if (is.null(lib_rts)) {
lib_rts <- rep(NA_real_, length(spectral_library))
}
lib_name <- spectral_library@backend@spectraData$name
if (is.null(lib_name)) {
lib_name <- rep(NA_character_, length(spectral_library))
Expand All @@ -322,7 +309,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
"target_inchikey_2D" = lib_inchikey2D,
"target_smiles" = lib_smiles,
"target_smiles_2D" = lib_smiles2D,
"target_rtime" = lib_rts,
"target_name" = lib_name,
"target_formula" = lib_mf,
"target_exactmass" = lib_mass,
Expand All @@ -336,8 +322,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
df_final <- df_final |>
tidytable::rowwise() |>
dplyr::mutate(
## Working in minutes
error_rt = (target_rtime - rtime) / 60,
error_mz = target_precursorMz - precursorMz,
structure_inchikey_2D = ifelse(
test = is.na(target_inchikey_2D),
Expand All @@ -357,7 +341,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
c(
"feature_id",
"error_mz",
"error_rt",
"structure_name" = "target_name",
"structure_inchikey_2D",
"structure_smiles_2D",
Expand Down
98 changes: 98 additions & 0 deletions R/filter_annotations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
utils::globalVariables(c("params"))

#' @title Filter annotations
#'
#' @description This function filters initial annotations.
#'
#' @param annotations Prepared annotations file
#' @param features Prepared features file
#' @param rts Prepared retention time library
#' @param output Output file
#' @param tolerance_rt Tolerance to filter retention time
#' @param parameters Params
#'
#' @return NULL
#'
#' @export
#'
#' @examples NULL
filter_annotations <-
function(annotations = params$files$annotations$prepared,
features = params$files$features$prepared,
rts = params$files$libraries$temporal$prepared,
output = params$files$annotations$filtered,
tolerance_rt = params$ms$tolerances$rt$minutes,
parameters = params) {
stopifnot(
"Annotations file(s) do(es) not exist" =
rep(TRUE, length(annotations)) ==
lapply(X = annotations, file.exists)
)
stopifnot(
"Retention time file(s) do(es) not exist" =
rep(TRUE, length(rts)) ==
lapply(X = rts, file.exists)
)
stopifnot("Your features file does not exist." = file.exists(features))

paths <<- parse_yaml_paths()
params <<- parameters

log_debug(x = "... features")
features_table <- tidytable::fread(
file = features,
colClasses = "character",
na.strings = c("", "NA")
)
log_debug(x = "... annotations")
annotation_table <- lapply(
X = annotations,
FUN = tidytable::fread,
colClasses = "character",
na.strings = c("", "NA")
) |>
tidytable::bind_rows()
log_debug(x = "... retention times")
rt_table <- lapply(
X = rts,
FUN = tidytable::fread,
colClasses = "character",
na.strings = c("", "NA")
) |>
tidytable::bind_rows() |>
tidytable::rename(rt_target = rt)

log_debug(
"Filtering annotations outside of",
tolerance_rt * 3,
"minutes tolerance"
)
features_annotated_table <- features_table |>
tidytable::left_join(annotation_table) |>
tidytable::left_join(rt_table) |>
data.frame() |>
dplyr::mutate(error_rt = as.numeric(rt) - as.numeric(rt_target)) |>
dplyr::arrange(abs(error_rt)) |>
tidytable::tidytable() |>
tidytable::distinct(-error_rt, -rt_target,
.keep_all = TRUE
) |>
data.frame() |>
## TODO adapt for types and improve the * 3
dplyr::filter(abs(error_rt) <= abs(tolerance_rt * 3) |
is.na(error_rt)) |>
tidytable::tidytable() |>
tidytable::select(-rt_target, -type)

## in case some features had a single filtered annotation
final_table <- features_table |>
tidytable::left_join(features_annotated_table)

export_params(step = "filter_annotations")
export_output(
x = final_table,
file = output[[1]]
)

return(output[[1]])
}
19 changes: 13 additions & 6 deletions R/parse_cli_params.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ parse_cli_params <- function() {
params$files$annotations$raw$sirius <-
as.character(arguments$fil_ann_raw_sir)
}
if (!is.null(arguments$fil_ann_fil)) {
params$files$annotations$filtered <- as.character(arguments$fil_ann_fil)
}
if (!is.null(arguments$fil_ann_pre)) {
params$files$annotations$prepared <- as.character(arguments$fil_ann_pre)
}
Expand Down Expand Up @@ -199,12 +202,13 @@ parse_cli_params <- function() {
if (!is.null(arguments$names_mgf_po)) {
params$names$mgf$polarity <- as.character(arguments$names_mgf_po)
}
if (!is.null(arguments$names_mgf_pc)) {
params$names$mgf$precursor_charge <- as.character(arguments$names_mgf_pc)
}
if (!is.null(arguments$names_mgf_pm)) {
params$names$mgf$precursor_mz <- as.character(arguments$names_mgf_pm)
}
# if (!is.null(arguments$names_mgf_pc)) {
# params$names$mgf$precursor_charge <-
# as.character(arguments$names_mgf_pc)
# }
# if (!is.null(arguments$names_mgf_pm)) {
# params$names$mgf$precursor_mz <- as.character(arguments$names_mgf_pm)
# }
if (!is.null(arguments$names_mgf_sm)) {
params$names$mgf$smiles <- as.character(arguments$names_mgf_sm)
}
Expand Down Expand Up @@ -270,6 +274,9 @@ parse_cli_params <- function() {
if (!is.null(arguments$too_tax_che)) {
params$tools$taxonomies$chemical <- as.character(arguments$too_tax_che)
}
if (!is.null(arguments$units_rt)) {
params$units$rt <- as.character(arguments$units_rt)
}
if (!is.null(arguments$wei_glo_bio)) {
params$weights$global$biological <- as.numeric(arguments$wei_glo_bio)
}
Expand Down
5 changes: 1 addition & 4 deletions R/prepare_annotations_gnps.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ utils::globalVariables(
"count_peaks_explained",
"count_peaks_matched",
"error_mz",
"error_rt",
"ExactMass",
"feature_id",
"INCHI",
Expand Down Expand Up @@ -122,13 +121,11 @@ prepare_annotations_gnps <-
dplyr::mutate(
error_mz = as.numeric(MZErrorPPM) *
1E-6 *
as.numeric(Precursor_MZ),
error_rt = NA
as.numeric(Precursor_MZ)
) |>
tidytable::select(
feature_id = `#Scan#`,
error_mz = MassDiff,
error_rt,
library = LibraryName,
structure_name = Compound_Name,
score_input = MQScore,
Expand Down
2 changes: 0 additions & 2 deletions R/prepare_annotations_sirius.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ utils::globalVariables(
"count_peaks_matched",
"CSI:FingerIDScore",
"error_mz",
"error_rt",
"explainedIntensity",
"feature_id",
"id",
Expand Down Expand Up @@ -251,7 +250,6 @@ prepare_annotations_sirius <-
tidytable::left_join(canopus_npc_prepared) |>
tidytable::distinct() |>
tidyft::mutate(
error_rt = NA,
structure_taxonomy_classyfire_chemontid = NA,
structure_taxonomy_classyfire_01kingdom = NA,
## mirror spectral match
Expand Down
2 changes: 0 additions & 2 deletions R/prepare_annotations_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ utils::globalVariables(
"count_peaks_explained",
"count_peaks_matched",
"error_mz",
"error_rt",
"feature_id",
"params",
"score",
Expand Down Expand Up @@ -109,7 +108,6 @@ prepare_annotations_spectra <-
tidytable::distinct(
feature_id,
error_mz,
error_rt,
structure_name,
structure_inchikey_2D,
structure_smiles_2D,
Expand Down
Loading

0 comments on commit d6fcd42

Please # to comment.