diff --git a/DESCRIPTION b/DESCRIPTION index b7f8e0c02..202ee3d7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: timaR Title: Taxonomically Informed Metabolite Annotation -Version: 2.8.2 +Version: 2.9.0 Authors@R: c( person("Adriano", "Rutz", , "rutz@imsb.biol.ethz.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0443-9902")), @@ -18,7 +18,7 @@ Imports: crayon (>= 1.5.2), docopt (>= 0.7.1), dplyr (>= 1.1.2), - httr (>= 1.4.6), + httr (>= 1.4.7), igraph (>= 1.5.1), jsonlite (>= 1.8.7), MsBackendMgf (>= 1.8.0), @@ -93,6 +93,7 @@ Collate: 'extract_spectra.R' 'fake_annotations_columns.R' 'fake_sop_columns.R' + 'filter_annotations.R' 'get_file.R' 'get_example_sirius.R' 'get_gnps_tables.R' @@ -117,6 +118,7 @@ Collate: 'prepare_features_edges.R' 'prepare_features_tables.R' 'prepare_libraries_adducts.R' + 'prepare_libraries_rt.R' 'prepare_libraries_sop_closed.R' 'prepare_libraries_sop_ecmdb.R' 'prepare_libraries_sop_lotus.R' diff --git a/NAMESPACE b/NAMESPACE index 133d2c56f..f317d7686 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(export_spectra_2) export(extract_spectra) export(fake_annotations_columns) export(fake_sop_columns) +export(filter_annotations) export(get_example_sirius) export(get_file) export(get_gnps_tables) @@ -50,6 +51,7 @@ export(prepare_features_components) export(prepare_features_edges) export(prepare_features_tables) export(prepare_libraries_adducts) +export(prepare_libraries_rt) export(prepare_libraries_sop_closed) export(prepare_libraries_sop_ecmdb) export(prepare_libraries_sop_lotus) diff --git a/NEWS.md b/NEWS.md index 03bc297a0..d7ab1f0fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # timaR +# timaR 2.9.0 + +* Refactored RT matching (#76) + # timaR 2.8.2 * Change from pbmclapply to pblapply diff --git a/R/annotate_masses.R b/R/annotate_masses.R index 94d2ac9d9..84fcb130e 100644 --- a/R/annotate_masses.R +++ b/R/annotate_masses.R @@ -7,7 +7,6 @@ utils::globalVariables( "delta_min", "Distance", "error_mz", - "error_rt", "exact_mass", "feature_id", "feature_id_dest", @@ -157,8 +156,8 @@ annotate_masses <- 20 ) stopifnot( - "Your rt tolerance must be lower or equal to 0.1" = tolerance_rt <= - 0.1 + "Your rt tolerance must be lower or equal to 0.05" = tolerance_rt <= + 0.05 ) paths <<- parse_yaml_paths() @@ -465,8 +464,7 @@ annotate_masses <- ) ) |> dplyr::mutate( - error_mz = adduct_mass - mz_1, - error_rt = NA_real_ + error_mz = adduct_mass - mz_1 ) |> tidytable::select( feature_id, @@ -474,7 +472,6 @@ annotate_masses <- mz, score_input, error_mz, - error_rt, exact_mass, adduct, adduct_mass, @@ -497,7 +494,6 @@ annotate_masses <- score_input, library, error_mz, - error_rt, exact_mass ) |> tidyft::filter(!is.na(exact_mass)) @@ -786,7 +782,6 @@ annotate_masses <- tidytable::distinct( feature_id, error_mz, - error_rt, structure_name, structure_inchikey_2D, structure_smiles_2D, diff --git a/R/annotate_spectra.R b/R/annotate_spectra.R index a3309cb96..660731990 100644 --- a/R/annotate_spectra.R +++ b/R/annotate_spectra.R @@ -7,7 +7,6 @@ utils::globalVariables( "paths", "precursorMz", "presence_ratio", - "rtime", "score", "SLAW_ID", "structure_inchikey_2D", @@ -90,7 +89,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, df_empty <- data.frame( feature_id = NA, error_mz = NA, - error_rt = NA, structure_name = NA, structure_inchikey_2D = NA, structure_smiles_2D = NA, @@ -129,15 +127,11 @@ annotate_spectra <- function(input = params$files$spectral$raw, query_precursors <- spectra@backend@spectraData$precursorMz query_spectra <- spectra@backend@peaksData - query_rts <- spectra@backend@spectraData$rtime ## TODO find a way to have consistency in spectrum IDs query_ids <- spectra@backend@spectraData$acquisitionNum if (is.null(query_ids)) { query_ids <- spectra@backend@spectraData$spectrum_id } - if (is.null(query_rts)) { - query_rts <- rep(NA_real_, length(spectra)) - } if (approx == FALSE) { log_debug("Reducing library size...") @@ -195,7 +189,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, spectral_lib, query_ids, query_spectra, - query_rts, lib_id, minimal, maximal, @@ -229,7 +222,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, list( "feature_id" = query_ids[[spectrum]], "precursorMz" = precursor, - "rtime" = query_rts[[spectrum]], "target_id" = lib_id[indices][[index]], "score" = as.numeric(score), "count_peaks_matched" = NA_integer_, @@ -257,7 +249,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, spectral_lib = lib_spectra, query_ids = query_ids, query_spectra = query_spectra, - query_rts = query_rts, lib_id = lib_id, minimal = minimal, maximal = maximal @@ -295,10 +286,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, if (is.null(lib_smiles2D)) { lib_smiles2D <- rep(NA_character_, length(spectral_library)) } - lib_rts <- spectral_library@backend@spectraData$rtime - if (is.null(lib_rts)) { - lib_rts <- rep(NA_real_, length(spectral_library)) - } lib_name <- spectral_library@backend@spectraData$name if (is.null(lib_name)) { lib_name <- rep(NA_character_, length(spectral_library)) @@ -322,7 +309,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, "target_inchikey_2D" = lib_inchikey2D, "target_smiles" = lib_smiles, "target_smiles_2D" = lib_smiles2D, - "target_rtime" = lib_rts, "target_name" = lib_name, "target_formula" = lib_mf, "target_exactmass" = lib_mass, @@ -336,8 +322,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, df_final <- df_final |> tidytable::rowwise() |> dplyr::mutate( - ## Working in minutes - error_rt = (target_rtime - rtime) / 60, error_mz = target_precursorMz - precursorMz, structure_inchikey_2D = ifelse( test = is.na(target_inchikey_2D), @@ -357,7 +341,6 @@ annotate_spectra <- function(input = params$files$spectral$raw, c( "feature_id", "error_mz", - "error_rt", "structure_name" = "target_name", "structure_inchikey_2D", "structure_smiles_2D", diff --git a/R/filter_annotations.R b/R/filter_annotations.R new file mode 100644 index 000000000..848ec0dbe --- /dev/null +++ b/R/filter_annotations.R @@ -0,0 +1,98 @@ +utils::globalVariables(c("params")) + +#' @title Filter annotations +#' +#' @description This function filters initial annotations. +#' +#' @param annotations Prepared annotations file +#' @param features Prepared features file +#' @param rts Prepared retention time library +#' @param output Output file +#' @param tolerance_rt Tolerance to filter retention time +#' @param parameters Params +#' +#' @return NULL +#' +#' @export +#' +#' @examples NULL +filter_annotations <- + function(annotations = params$files$annotations$prepared, + features = params$files$features$prepared, + rts = params$files$libraries$temporal$prepared, + output = params$files$annotations$filtered, + tolerance_rt = params$ms$tolerances$rt$minutes, + parameters = params) { + stopifnot( + "Annotations file(s) do(es) not exist" = + rep(TRUE, length(annotations)) == + lapply(X = annotations, file.exists) + ) + stopifnot( + "Retention time file(s) do(es) not exist" = + rep(TRUE, length(rts)) == + lapply(X = rts, file.exists) + ) + stopifnot("Your features file does not exist." = file.exists(features)) + + paths <<- parse_yaml_paths() + params <<- parameters + + log_debug(x = "... features") + features_table <- tidytable::fread( + file = features, + colClasses = "character", + na.strings = c("", "NA") + ) + log_debug(x = "... annotations") + annotation_table <- lapply( + X = annotations, + FUN = tidytable::fread, + colClasses = "character", + na.strings = c("", "NA") + ) |> + tidytable::bind_rows() + log_debug(x = "... retention times") + rt_table <- lapply( + X = rts, + FUN = tidytable::fread, + colClasses = "character", + na.strings = c("", "NA") + ) |> + tidytable::bind_rows() |> + tidytable::rename(rt_target = rt) + + log_debug( + "Filtering annotations outside of", + tolerance_rt * 3, + "minutes tolerance" + ) + features_annotated_table <- features_table |> + tidytable::left_join(annotation_table) |> + tidytable::left_join(rt_table) |> + data.frame() |> + dplyr::mutate(error_rt = as.numeric(rt) - as.numeric(rt_target)) |> + dplyr::arrange(abs(error_rt)) |> + tidytable::tidytable() |> + tidytable::distinct(-error_rt, -rt_target, + .keep_all = TRUE + ) |> + data.frame() |> + ## TODO adapt for types and improve the * 3 + dplyr::filter(abs(error_rt) <= abs(tolerance_rt * 3) | + is.na(error_rt)) |> + tidytable::tidytable() |> + tidytable::select(-rt_target, -type) + + ## in case some features had a single filtered annotation + final_table <- features_table |> + tidytable::left_join(features_annotated_table) + + export_params(step = "filter_annotations") + export_output( + x = final_table, + file = output[[1]] + ) + + return(output[[1]]) + } diff --git a/R/parse_cli_params.R b/R/parse_cli_params.R index 8027ef68d..a18e9d2a1 100644 --- a/R/parse_cli_params.R +++ b/R/parse_cli_params.R @@ -59,6 +59,9 @@ parse_cli_params <- function() { params$files$annotations$raw$sirius <- as.character(arguments$fil_ann_raw_sir) } + if (!is.null(arguments$fil_ann_fil)) { + params$files$annotations$filtered <- as.character(arguments$fil_ann_fil) + } if (!is.null(arguments$fil_ann_pre)) { params$files$annotations$prepared <- as.character(arguments$fil_ann_pre) } @@ -199,12 +202,13 @@ parse_cli_params <- function() { if (!is.null(arguments$names_mgf_po)) { params$names$mgf$polarity <- as.character(arguments$names_mgf_po) } - if (!is.null(arguments$names_mgf_pc)) { - params$names$mgf$precursor_charge <- as.character(arguments$names_mgf_pc) - } - if (!is.null(arguments$names_mgf_pm)) { - params$names$mgf$precursor_mz <- as.character(arguments$names_mgf_pm) - } + # if (!is.null(arguments$names_mgf_pc)) { + # params$names$mgf$precursor_charge <- + # as.character(arguments$names_mgf_pc) + # } + # if (!is.null(arguments$names_mgf_pm)) { + # params$names$mgf$precursor_mz <- as.character(arguments$names_mgf_pm) + # } if (!is.null(arguments$names_mgf_sm)) { params$names$mgf$smiles <- as.character(arguments$names_mgf_sm) } @@ -270,6 +274,9 @@ parse_cli_params <- function() { if (!is.null(arguments$too_tax_che)) { params$tools$taxonomies$chemical <- as.character(arguments$too_tax_che) } + if (!is.null(arguments$units_rt)) { + params$units$rt <- as.character(arguments$units_rt) + } if (!is.null(arguments$wei_glo_bio)) { params$weights$global$biological <- as.numeric(arguments$wei_glo_bio) } diff --git a/R/prepare_annotations_gnps.R b/R/prepare_annotations_gnps.R index b167d339e..24274f4fd 100644 --- a/R/prepare_annotations_gnps.R +++ b/R/prepare_annotations_gnps.R @@ -5,7 +5,6 @@ utils::globalVariables( "count_peaks_explained", "count_peaks_matched", "error_mz", - "error_rt", "ExactMass", "feature_id", "INCHI", @@ -122,13 +121,11 @@ prepare_annotations_gnps <- dplyr::mutate( error_mz = as.numeric(MZErrorPPM) * 1E-6 * - as.numeric(Precursor_MZ), - error_rt = NA + as.numeric(Precursor_MZ) ) |> tidytable::select( feature_id = `#Scan#`, error_mz = MassDiff, - error_rt, library = LibraryName, structure_name = Compound_Name, score_input = MQScore, diff --git a/R/prepare_annotations_sirius.R b/R/prepare_annotations_sirius.R index f4cbb3187..721dce8e0 100644 --- a/R/prepare_annotations_sirius.R +++ b/R/prepare_annotations_sirius.R @@ -5,7 +5,6 @@ utils::globalVariables( "count_peaks_matched", "CSI:FingerIDScore", "error_mz", - "error_rt", "explainedIntensity", "feature_id", "id", @@ -251,7 +250,6 @@ prepare_annotations_sirius <- tidytable::left_join(canopus_npc_prepared) |> tidytable::distinct() |> tidyft::mutate( - error_rt = NA, structure_taxonomy_classyfire_chemontid = NA, structure_taxonomy_classyfire_01kingdom = NA, ## mirror spectral match diff --git a/R/prepare_annotations_spectra.R b/R/prepare_annotations_spectra.R index 947d936a6..b59af5397 100644 --- a/R/prepare_annotations_spectra.R +++ b/R/prepare_annotations_spectra.R @@ -3,7 +3,6 @@ utils::globalVariables( "count_peaks_explained", "count_peaks_matched", "error_mz", - "error_rt", "feature_id", "params", "score", @@ -109,7 +108,6 @@ prepare_annotations_spectra <- tidytable::distinct( feature_id, error_mz, - error_rt, structure_name, structure_inchikey_2D, structure_smiles_2D, diff --git a/R/prepare_libraries_rt.R b/R/prepare_libraries_rt.R new file mode 100644 index 000000000..5d3a8c00e --- /dev/null +++ b/R/prepare_libraries_rt.R @@ -0,0 +1,305 @@ +#' @title Prepare libraries of retention times +#' +#' @description This function prepares retention times libraries +#' to be used for later +#' +#' @param mgf_exp MGF containing experimental retention times +#' @param mgf_is MGF containing in silico predicted retention times +#' @param temp_exp File containing experimental retention times +#' @param temp_is File containing in silico predicted retention times +#' @param output Output file +#' @param library Library containing the keys +#' @param col_ik Name of the InChIKey in mgf +#' @param col_rt Name of the retention time in mgf +#' @param col_sm Name of the SMILES in mgf +#' @param name_inchikey Name of the InChIKey in file +#' @param name_rt Name of the retention time in file +#' @param name_smiles Name of the SMILES in file +#' @param unit_rt Unit of the retention time. Must be "seconds" or "minutes" +#' @param parameters Params +#' +#' @return NULL +#' +#' @export +#' +#' @examples NULL +prepare_libraries_rt <- + function(mgf_exp = params$files$libraries$spectral$exp, + mgf_is = params$files$libraries$spectral$is, + temp_exp = params$files$libraries$temporal$exp, + temp_is = params$files$libraries$temporal$is, + output = params$files$libraries$temporal$prepared, + library = paths$data$interim$libraries$sop$merged$keys, + col_ik = params$names$mgf$inchikey, + col_rt = params$names$mgf$retention_time, + col_sm = params$names$mgf$smiles, + name_inchikey = params$names$inchikey, + name_rt = params$names$rt, + name_smiles = params$names$smiles, + unit_rt = params$units$rt, + parameters = params) { + stopifnot("Your library file does not exist." = file.exists(library)) + + params <<- parameters + + ## default transforms from `Spectra` + if (col_rt == "RTINSECONDS") { + col_rt <- "rtime" + } + if (col_sm == "SMILES") { + col_sm <- "smiles" + } + + ## TODO improve + if (length(mgf_exp$neg) == 0) { + mgf_exp$neg <- NULL + } + if (length(mgf_exp$pos) == 0) { + mgf_exp$pos <- NULL + } + if (length(mgf_exp) == 0) { + mgf_exp <- NULL + } + if (length(mgf_is$neg) == 0) { + mgf_is$neg <- NULL + } + if (length(mgf_is$pos) == 0) { + mgf_is$pos <- NULL + } + if (length(mgf_is) == 0) { + mgf_is <- NULL + } + if (length(temp_exp) == 0) { + temp_exp <- NULL + } + if (length(temp_is) == 0) { + temp_is <- NULL + } + + rts_from_mgf <- + function(mgf) { + log_debug("Importing spectra ...") + spectra <- mgf |> + lapply(FUN = import_spectra) + log_debug("Extracting retention times...") + ## TODO refactor to avoid pos neg + rts <- + tidytable::bind_rows( + spectra$neg@backend@spectraData |> + data.frame() |> + tidytable::tidytable(), + spectra$pos@backend@spectraData |> + data.frame() |> + tidytable::tidytable() + ) |> + tidytable::select(tidytable::any_of(c( + rt = col_rt, + inchikey = col_ik, + smiles = col_sm + ))) + return(rts) + } + rts_from_tab <- + function(tab) { + log_debug("Importing file ...") + rts <- tab |> + lapply(FUN = tidytable::fread) |> + tidytable::bind_rows() |> + tidytable::select(any_of( + c( + rt = name_rt, + inchikey = name_inchikey, + smiles = name_smiles + ) + )) + return(rts) + } + polish_df <- + function(df, + type = "experimental", + unit = unit_rt) { + df_polished <- df |> + data.frame() |> + dplyr::mutate(type = type) |> + dplyr::rowwise() |> + dplyr::mutate(rt = ifelse(unit == "seconds", + yes = rt / 60, + no = rt + )) |> + dplyr::bind_rows(data.frame( + inchikey = NA_character_, + smiles = NA_character_ + )) |> + dplyr::filter(!is.na(as.numeric(rt))) |> + dplyr::distinct() |> + tidytable::tidytable() + return(df_polished) + } + complete_df <- function(df, library = keys) { + log_debug( + "There are", + nrow(df |> + dplyr::filter(is.na(inchikey))), + "entries without InChIKey.", + "We would recommend you adding them but will try completing." + ) + log_debug("Completing with existing metadata...") + df_completed_smiles <- df |> + tidytable::inner_join(library, by = c("smiles" = "smiles")) + df_empty_smiles <- df |> + tidytable::anti_join(library, by = c("smiles" = "smiles")) + log_debug( + "There are still", + nrow(df_empty_smiles), + "entries without InChIKey.", + "We will query them on the fly, this might take some time." + ) + ## TODO change with a small dependency + smiles <- unique(df_empty_smiles$smiles) + get_inchikey <- function(smiles, toolkit = "rdkit") { + url <- paste0( + "https://api.naturalproducts.net/latest/convert/inchikey?smiles=", + URLencode(smiles), + "&toolkit=", + toolkit + ) + tryCatch( + expr = jsonlite::fromJSON(txt = url), + error = function(e) { + return(NA_character_) + } + ) + } + inchikey <- pbapply::pblapply( + X = smiles, + FUN = get_inchikey + ) |> + as.character() + df_empty_smiles <- df_empty_smiles |> + tidytable::select(-inchikey) |> + tidytable::left_join(tidytable::tidytable( + smiles, + inchikey + )) + + df_completed <- df_completed_smiles |> + tidytable::bind_rows(df_empty_smiles) |> + tidyft::mutate_vars( + is.character, + .func = function(x) { + tidytable::na_if(x, "NA") + } + ) + df_completed <- df_completed |> + data.frame() |> + dplyr::mutate( + structure_smiles = smiles, + structure_inchikey = tidytable::coalesce( + inchikey, + inchikey.x, + inchikey.y + ), + ) |> + tidytable::tidytable() |> + tidytable::select( + rt, + structure_smiles, + structure_inchikey, + type + ) |> + tidytable::distinct() + log_debug( + "There were still", + nrow(df_completed |> + dplyr::filter(is.na( + structure_inchikey + ))), + "entries for which no InChIKey could not be found in the end." + ) + return(df_completed) + } + + keys <- library |> + tidytable::fread(select = c( + "structure_inchikey", + "structure_smiles" + )) |> + tidytable::distinct() |> + tidytable::select( + inchikey = structure_inchikey, + smiles = structure_smiles + ) + + empty_df <- tidytable::tidytable( + rt = NA_real_, + structure_smiles = NA_character_, + structure_inchikey = NA_character_, + type = NA_character_ + ) + + ## from mgf + if (!is.null(mgf_exp)) { + rts_exp_1 <- mgf_exp |> + rts_from_mgf() |> + polish_df() |> + complete_df() + } else { + rts_exp_1 <- empty_df + } + if (!is.null(mgf_is)) { + rts_is_1 <- mgf_is |> + rts_from_mgf() |> + polish_df(type = "predicted") |> + complete_df() + } else { + rts_is_1 <- empty_df + } + + ## from csv + if (!is.null(temp_exp)) { + rts_exp_2 <- temp_exp |> + rts_from_tab() |> + polish_df() |> + complete_df() + } else { + rts_exp_2 <- empty_df + } + if (!is.null(temp_is)) { + rts_is_2 <- temp_is |> + rts_from_tab() |> + polish_df(type = "predicted") |> + complete_df() + } else { + rts_is_2 <- empty_df + } + + rts <- dplyr::bind_rows( + rts_exp_1, + rts_exp_2, + rts_is_1, + rts_is_2 + ) |> + dplyr::filter(!is.na(as.numeric(rt))) |> + dplyr::filter(!is.na((structure_inchikey))) |> + dplyr::select(-structure_smiles) |> + dplyr::distinct() |> + dplyr::mutate(structure_inchikey_2D = gsub( + pattern = "-.*", + replacement = "", + x = structure_inchikey + )) |> + ## TODO REMINDER FOR NOW + dplyr::select(-structure_inchikey) + + if (nrow(rts) == 0) { + log_debug("No retention time library found, returning an empty table.") + rts <- tidytable::tidytable( + rt = NA_real_, + structure_inchikey_2D = NA_character_, + type = NA_character_ + ) + } + export_params(step = "prepare_libraries_rt") + export_output(x = rts, file = output) + return(output) + } diff --git a/R/select_annotations_columns.R b/R/select_annotations_columns.R index 8060ace66..127d1bfaf 100644 --- a/R/select_annotations_columns.R +++ b/R/select_annotations_columns.R @@ -59,7 +59,6 @@ select_annotations_columns <- function(df, tidytable::select( feature_id, error_mz, - error_rt, structure_name, # structure_inchikey, structure_inchikey_2D, diff --git a/R/weight_annotations.R b/R/weight_annotations.R index ecda52e53..5b348bbfd 100644 --- a/R/weight_annotations.R +++ b/R/weight_annotations.R @@ -8,7 +8,7 @@ utils::globalVariables( #' @title Weight annotations #' -#' @description This function weights initial annotations. +#' @description This function weights annotations. #' #' @include clean_bio.R #' @include clean_chemo.R @@ -25,7 +25,6 @@ utils::globalVariables( #' @param annotations Prepared annotations file #' @param components Prepared components file #' @param edges Prepared edges file -#' @param features Prepared features file #' @param taxa Prepared taxed features file #' @param output Output file #' @param candidates_initial Number of initial candidates to keep @@ -112,7 +111,7 @@ weight_annotations <- merged$ structures$ dd_ddd, - annotations = params$files$annotations$prepared, + annotations = params$files$annotations$filtered, components = params$ files$ networks$ @@ -120,7 +119,6 @@ weight_annotations <- components$ prepared, edges = params$files$networks$spectral$edges$prepared, - features = params$files$features$prepared, taxa = params$files$taxa$prepared, output = params$files$annotations$processed, candidates_initial = params$annotations$candidates$initial, @@ -175,19 +173,6 @@ weight_annotations <- params <<- parameters log_debug(x = "... files ...") - log_debug(x = "... features") - features_table <- tidytable::fread( - file = features, - colClasses = "character", - na.strings = c("", "NA") - ) - log_debug(x = "... components") - components_table <- tidytable::fread( - file = components, - colClasses = "character", - na.strings = c("", "NA") - ) - log_debug(x = "... annotations") annotation_table <- lapply( X = annotations, @@ -197,21 +182,28 @@ weight_annotations <- ) |> tidytable::bind_rows() - log_debug(x = "... metadata_table_biological_annotation") - taxed_features_table <- tidytable::fread( - file = taxa, + log_debug(x = "... components") + components_table <- tidytable::fread( + file = components, colClasses = "character", na.strings = c("", "NA") ) - log_debug(x = "... edges table") + log_debug(x = "... edges") edges_table <- tidytable::fread( file = edges, colClasses = "character", na.strings = c("", "NA") ) - log_debug(x = "... structure-organism pairs table") + log_debug(x = "... taxa") + taxed_features_table <- tidytable::fread( + file = taxa, + colClasses = "character", + na.strings = c("", "NA") + ) + + log_debug(x = "... structure-organism pairs") structure_organism_pairs_table <- tidytable::fread( file = library, @@ -229,6 +221,10 @@ weight_annotations <- na.strings = c("", "NA") )) + log_debug(x = "... features") + features_table <- annotation_table |> + tidytable::distinct(feature_id, rt, mz) + if (ms1_only == TRUE) { annotation_table <- annotation_table |> tidyft::filter(score_input == 0) diff --git a/codemeta.json b/codemeta.json index 5f1aba279..e4107ed34 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,7 +8,7 @@ "codeRepository": "https://github.com/taxonomicallyinformedannotation/tima-r", "issueTracker": "https://github.com/taxonomicallyinformedannotation/tima-r/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "2.8.2", + "version": "2.9.0", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -346,7 +346,7 @@ "@type": "SoftwareApplication", "identifier": "httr", "name": "httr", - "version": ">= 1.4.6", + "version": ">= 1.4.7", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -545,7 +545,7 @@ "taxonomicdistance", "specializedmetabolome" ], - "fileSize": "2595.064KB", + "fileSize": "2665.504KB", "citation": [ { "@type": "ScholarlyArticle", diff --git a/inst/app/R/save_input.R b/inst/app/R/save_input.R index 58478c038..37d4df48f 100644 --- a/inst/app/R/save_input.R +++ b/inst/app/R/save_input.R @@ -157,13 +157,6 @@ save_input <- function(input) { dalton$ ms2 <- shiny::isolate(input$ms_tol_mas_dal_ms2) - yamls_params$ - annotate_spectra$ - ms$ - tolerances$ - rt$ - minutes <- - ms_tol_rt_min yamls_params$ create_edges_spectra$ @@ -231,15 +224,16 @@ save_input <- function(input) { dalton$ ms2 <- shiny::isolate(input$ms_tol_mas_dal_ms2) + yamls_params$create_edges_spectra$names$source <- names_source + yamls_params$create_edges_spectra$names$target <- names_target + yamls_params$ - create_edges_spectra$ + filter_annotations$ ms$ tolerances$ rt$ minutes <- ms_tol_rt_min - yamls_params$create_edges_spectra$names$source <- names_source - yamls_params$create_edges_spectra$names$target <- names_target yamls_params$prepare_features_edges$names$source <- names_source yamls_params$prepare_features_edges$names$target <- names_target @@ -263,6 +257,9 @@ save_input <- function(input) { yamls_params$prepare_features_tables$names$rt <- shiny::isolate(input$names_rt) + yamls_params$prepare_libraries_rt$names$rt <- + shiny::isolate(input$names_rt_2) + yamls_params$ prepare_libraries_sop_merged$ organisms$ diff --git a/inst/app/ui.R b/inst/app/ui.R index 1e0544f19..82b5f0cb6 100644 --- a/inst/app/ui.R +++ b/inst/app/ui.R @@ -618,7 +618,7 @@ ui <- fluidPage( ), textInput( inputId = "names_rt", - label = "Name of \"retention time\" variable in the input", + label = "Name of \"retention time\" variable in the feature table", value = "row retention time" ) |> shinyhelper::helper( @@ -630,6 +630,17 @@ ui <- fluidPage( "Assumed to be in minutes." ) ), + textInput( + inputId = "names_rt_2", + label = "Name of \"retention time\" variable in the rt library", + value = "rt" + ) |> + shinyhelper::helper( + type = "inline", + content = c( + "Name of the `retention time` column in your rt library file." + ) + ), textInput( inputId = "names_source", label = "Name of \"source IDs\" variable in the input", diff --git a/inst/params/default/annotate_spectra.yaml b/inst/params/default/annotate_spectra.yaml index 82c524b15..a09455b7d 100644 --- a/inst/params/default/annotate_spectra.yaml +++ b/inst/params/default/annotate_spectra.yaml @@ -75,8 +75,3 @@ ms: dalton: #' Absolute mass tolerance for MS2 in Dalton. FLOAT ms2: 0.01 - - #' For retention time. - rt: - #' Retention time tolerance in minutes. FLOAT - minutes: 0.05 diff --git a/inst/params/default/filter_annotations.yaml b/inst/params/default/filter_annotations.yaml new file mode 100644 index 000000000..bb61b07b9 --- /dev/null +++ b/inst/params/default/filter_annotations.yaml @@ -0,0 +1,38 @@ +--- +#' Files. +files: + #' Annotations files. + annotations: + #' List of filtered annotations. STRING + filtered: + - data/interim/annotations/example_annotations_filtered.tsv.gz + + #' List of prepared annotations. STRING + prepared: + - data/interim/annotations/example_ms1_prepared.tsv.gz + - data/interim/annotations/example_gnps_prepared.tsv.gz + - data/interim/annotations/example_spectral_matches_prepared.tsv.gz + - data/interim/annotations/example_sirius_prepared.tsv.gz + # - data/interim/annotations/example_exp_rt_prepared.tsv.gz + + #' Features files. + features: + #' Prepared features file. STRING + prepared: data/interim/features/example_features.tsv.gz + + #' Libraries files. + libraries: + #' Temporal libraries. STRING + temporal: + #' List of prepared temporal libraries. STRING + prepared: + - data/interim/libraries/rt/prepared.tsv.gz + +#' MS related parameters. +ms: + #' Tolerances. + tolerances: + #' For retention time. + rt: + #' Retention time tolerance in minutes. FLOAT + minutes: 0.05 \ No newline at end of file diff --git a/inst/params/default/params.yaml b/inst/params/default/params.yaml index 3cc751af5..11bdf9987 100644 --- a/inst/params/default/params.yaml +++ b/inst/params/default/params.yaml @@ -35,26 +35,8 @@ annotations: #' Perform approximative matching without precursor matching? BOOLEAN approx: no - #' Similarity method to be used to compare spectra. - #' Currently "gnps", "navdist", "ndotproduct", - # "neuclidean", "nspectraangle" supported. STRING - method: gnps - #' Thresholds thresholds: - #' Condition to be used to retain candidates. - #' Must be "OR" or "AND". - #' Example: Minimum 6 peaks AND 0.2 similarity. STRING - condition: OR - - #' For peaks. - peaks: - #' Minimal shared peaks (absolute). INTEGER - absolute: 6 - - #' Minimal shared peaks (ratio). FLOAT - ratio: 0.2 - #' For similarity. FLOAT similarity: 0.2 @@ -75,6 +57,10 @@ files: #' Directory containing the sirius results. STRING sirius: data/interim/annotations/example_sirius/ + #' List of filtered annotations. STRING + filtered: + - data/interim/annotations/example_annotations_filtered.tsv.gz + #' List of prepared annotations. STRING prepared: - data/interim/annotations/example_gnps_prepared.tsv.gz @@ -176,6 +162,22 @@ files: pos: - data/interim/libraries/spectra/is/lotus_pos.rds + #' Temporal libraries. STRING + temporal: + #' Experimental libraries + exp: + #' List of experimental temporal libraries. STRING + - data/interim/libraries/rt/internal.tsv + + #' In silico libraries + is: + #' List of in silico temporal libraries. STRING + - data/interim/libraries/rt/predicted.tsv + + #' List of prepared temporal libraries. STRING + prepared: + - data/interim/libraries/rt/prepared.tsv.gz + #' Networks files. networks: #' Spectral networks files. @@ -333,6 +335,9 @@ names: #' Name of "filename" variable in the input. STRING filename: filename + #' Name of "InChIKey" variable in the input. STRING + inchikey: inchikey + #' Name of fields present in the MGF. mgf: #' Name of "collision energy" in MGF. STRING @@ -365,6 +370,9 @@ names: #' Name of "polarity" in MGF. STRING polarity: IONMODE + #' Name of "retention time" in MGF. STRING + retention_time: ~ + #' Name of "SMILES" in MGF. STRING smiles: SMILES @@ -389,6 +397,9 @@ names: #' Name of "retention time" variable in the input. STRING rt: rt + #' Name of "SMILES" variable in the input. STRING + smiles: smiles + #' Name of "source IDs" variable in the input. STRING source: CLUSTERID1 @@ -450,6 +461,11 @@ tools: #' Currently only "npc" or supported. STRING chemical: npc +#' Units of the different variables in the input files. +units: + #' Unit of the "retention time" variable in the input files. STRING + rt: minutes + #' Weights to apply to each part of the final score. weights: #' Global weights. diff --git a/inst/params/default/prepare_libraries_rt.yaml b/inst/params/default/prepare_libraries_rt.yaml new file mode 100644 index 000000000..d81db8ecf --- /dev/null +++ b/inst/params/default/prepare_libraries_rt.yaml @@ -0,0 +1,64 @@ +--- +#' Files. +files: + #' Libraries files. + libraries: + #' Spectral libraries. STRING + spectral: + #' Experimental libraries + exp: + #' List of negative experimental spectral libraries. STRING + neg: + - data/interim/libraries/spectra/exp/internal_neg.rds + + #' List of positive experimental spectral libraries. STRING + pos: + - data/interim/libraries/spectra/exp/internal_pos.rds + + #' In silico libraries + is: + #' List of negative in silico spectral libraries. STRING + neg: [] + + #' List of positive in silico spectral libraries. STRING + pos: [] + + #' Temporal libraries. STRING + temporal: + #' List of experimental temporal libraries. STRING + exp: [] + + #' List of in silico temporal libraries. STRING + is: [] + + #' List of prepared temporal libraries. STRING + prepared: + - data/interim/libraries/rt/prepared.tsv.gz + +#' Names of the different variables in the input files. +names: + #' Name of "InChIKey" variable in the input. STRING + inchikey: inchikey + + #' Name of fields present in the MGF. + mgf: + #' Name of "InChIKey" in MGF. STRING + inchikey: ~ + + #' Name of "retention time" in MGF. STRING + retention_time: RTINSECONDS + + #' Name of "SMILES" in MGF. STRING + smiles: SMILES + + #' Name of "retention time" variable in the input. STRING + rt: rt + + #' Name of "SMILES" variable in the input. STRING + smiles: smiles + +#' Units of the different variables in the input files. +units: + #' Unit of the "retention time" variable in the input files. + #' Must be "seconds" or "minutes". STRING + rt: seconds diff --git a/inst/params/default/prepare_libraries_sop_merged.yaml b/inst/params/default/prepare_libraries_sop_merged.yaml index 8e4f1d3bc..ee08e958b 100644 --- a/inst/params/default/prepare_libraries_sop_merged.yaml +++ b/inst/params/default/prepare_libraries_sop_merged.yaml @@ -11,9 +11,6 @@ files: - data/interim/libraries/sop/ecmdb_prepared.tsv.gz - data/interim/libraries/sop/lotus_prepared.tsv.gz - #' Merged structure organism pairs library. STRING - merged: data/interim/libraries/sop/library.tsv.gz - #' Organisms related parameters. organisms: #' Organisms filter related parameters. diff --git a/inst/params/default/weight_annotations.yaml b/inst/params/default/weight_annotations.yaml index 6aa9f0316..78b485de2 100644 --- a/inst/params/default/weight_annotations.yaml +++ b/inst/params/default/weight_annotations.yaml @@ -34,13 +34,9 @@ files: #' Annotations files. annotations: - #' List of prepared annotations. STRING - prepared: - - data/interim/annotations/example_ms1_prepared.tsv.gz - - data/interim/annotations/example_gnps_prepared.tsv.gz - - data/interim/annotations/example_spectral_matches_prepared.tsv.gz - - data/interim/annotations/example_sirius_prepared.tsv.gz - # - data/interim/annotations/example_exp_rt_prepared.tsv.gz + #' List of filtered annotations. STRING + filtered: + - data/interim/annotations/example_annotations_filtered.tsv.gz #' Final results file. STRING processed: annotations.tsv @@ -50,11 +46,6 @@ files: #' Merged structure organism pairs library. STRING merged: data/interim/libraries/keys.tsv.gz - #' Features files. - features: - #' Prepared features file. STRING - prepared: data/interim/features/example_features.tsv.gz - #' Networks files. networks: #' Spectral networks files. diff --git a/inst/paths.yaml b/inst/paths.yaml index d88ef637b..039bd6e64 100644 --- a/inst/paths.yaml +++ b/inst/paths.yaml @@ -119,6 +119,8 @@ params: components: inst/params/default/create_components.yaml edges: spectra: inst/params/default/create_edges_spectra.yaml + filter: + annotations: inst/params/default/filter_annotations.yaml prepare: features: components: inst/params/default/prepare_features_components.yaml @@ -126,6 +128,7 @@ params: tables: inst/params/default/prepare_features_tables.yaml libraries: adducts: inst/params/default/prepare_libraries_adducts.yaml + rt: inst/params/default/prepare_libraries_rt.yaml sop: closed: inst/params/default/prepare_libraries_sop_closed.yaml ecmdb: inst/params/default/prepare_libraries_sop_ecmdb.yaml @@ -150,6 +153,8 @@ params: components: inst/params/user/create_components.yaml edges: spectra: inst/params/user/create_edges_spectra.yaml + filter: + annotations: inst/params/user/filter_annotations.yaml prepare: features: components: inst/params/user/prepare_features_components.yaml @@ -157,6 +162,7 @@ params: tables: inst/params/user/prepare_features_tables.yaml libraries: adducts: inst/params/user/prepare_libraries_adducts.yaml + rt: inst/params/user/prepare_libraries_rt.yaml sop: closed: inst/params/user/prepare_libraries_sop_closed.yaml ecmdb: inst/params/user/prepare_libraries_sop_ecmdb.yaml @@ -209,4 +215,4 @@ urls: pattern: pos: isdb_pos.mgf neg: isdb_neg.mgf -version: 2.8.2 +version: 2.9.0 diff --git a/inst/pipelines/_targets.R b/inst/pipelines/_targets.R index 862b0cab7..e0feb4eed 100644 --- a/inst/pipelines/_targets.R +++ b/inst/pipelines/_targets.R @@ -89,6 +89,13 @@ list( par_def_cre_edg_spe <- paths$params$default$create$edges$spectra } ), + tar_file( + name = par_def_fil_ann, + command = { + par_def_fil_ann <- + paths$params$default$filter$annotations + } + ), tar_file( name = par_def_pre_ann_gnp, command = { @@ -135,6 +142,13 @@ list( paths$params$default$prepare$libraries$adducts } ), + tar_file( + name = par_def_pre_lib_rt, + command = { + par_def_pre_lib_rt <- + paths$params$default$prepare$libraries$rt + } + ), tar_file( name = par_def_pre_lib_sop_clo, command = { @@ -260,6 +274,22 @@ list( ) } ), + tar_file( + name = par_usr_fil_ann, + command = { + par_usr_fil_ann <- + prepare_params( + filename = par_fin_par$files$pattern, + features = par_fin_par$files$features$raw, + spectra = par_fin_par$files$spectral$raw, + gnps_job_id = par_fin_par$gnps$id, + ms_mode = par_fin_par$ms$polarity, + taxon = par_fin_par$organisms$taxon, + parameters = par_fin_par, + step = "filter_annotations" + ) + } + ), tar_file( name = par_usr_cre_edg_spe, command = { @@ -388,6 +418,22 @@ list( ) } ), + tar_file( + name = par_usr_pre_lib_rt, + command = { + par_usr_pre_lib_rt <- + prepare_params( + filename = par_fin_par$files$pattern, + features = par_fin_par$files$features$raw, + spectra = par_fin_par$files$spectral$raw, + gnps_job_id = par_fin_par$gnps$id, + ms_mode = par_fin_par$ms$polarity, + taxon = par_fin_par$organisms$taxon, + parameters = par_fin_par, + step = "prepare_libraries_rt" + ) + } + ), tar_file( name = par_usr_pre_lib_sop_clo, command = { @@ -560,6 +606,16 @@ list( ) } ), + tar_target( + name = par_fil_ann, + command = { + par_fil_ann <- + parse_yaml_params( + def = par_def_fil_ann, + usr = par_usr_fil_ann[1] + ) + } + ), tar_target( name = par_pre_ann_gnp, command = { @@ -630,6 +686,16 @@ list( ) } ), + tar_target( + name = par_pre_lib_rt, + command = { + par_pre_lib_rt <- + parse_yaml_params( + def = par_def_pre_lib_rt, + usr = par_usr_pre_lib_rt[1] + ) + } + ), tar_target( name = par_pre_lib_sop_clo, command = { @@ -1252,7 +1318,36 @@ list( ) ) ) - ) + ), + ## Retention times + list(tar_file( + name = lib_rt, + command = { + lib_rt <- prepare_libraries_rt( + ## TODO refactor to avoid "pos/neg" + mgf_exp = list( + "neg" = lib_spe_exp_int_pre_neg, + "pos" = lib_spe_exp_int_pre_pos + ), + mgf_is = list( + "neg" = lib_spe_is_lot_pre_neg, + "pos" = lib_spe_is_lot_pre_pos + ), + temp_exp = NULL, + temp_is = NULL, + output = par_pre_lib_rt$files$libraries$temporal$prepared, + library = lib_mer_key, + col_ik = par_pre_lib_rt$names$mgf$inchikey, + col_rt = par_pre_lib_rt$names$mgf$retention_time, + col_sm = par_pre_lib_rt$names$mgf$smiles, + name_inchikey = par_pre_lib_rt$names$inchikey, + name_rt = par_pre_lib_rt$names$rt, + name_smiles = par_pre_lib_rt$names$smiles, + unit_rt = par_pre_lib_rt$units$rt, + parameters = par_pre_lib_rt + ) + } + )) ), ## Annotations list( @@ -1599,18 +1694,49 @@ list( } ), tar_file( - name = ann_pre, + name = ann_fil, command = { - ann_pre <- weight_annotations( - library = lib_mer_key, - str_2d_3d = lib_mer_str_2d_3d, + ann_fil <- filter_annotations( annotations = list( ann_spe_is_pre, ann_ms1_pre_ann ), + features = fea_pre, + rts = lib_rt, + output = par_fil_ann$files$annotations$filtered, + tolerance_rt = par_fil_ann$ms$tolerances$rt$minutes, + parameters = par_fil_ann + ) + } + ), + tar_file( + name = ann_fil_crazy, + command = { + ann_fil_crazy <- filter_annotations( + annotations = list( + ann_spe_exp_gnp_pre, + ann_spe_exp_int_pre, + ann_spe_is_pre, + ann_sir_pre, + ann_ms1_pre_ann + ), + features = fea_pre, + rts = lib_rt, + output = par_fil_ann$files$annotations$filtered, + tolerance_rt = par_fil_ann$ms$tolerances$rt$minutes, + parameters = par_fil_ann + ) + } + ), + tar_file( + name = ann_pre, + command = { + ann_pre <- weight_annotations( + library = lib_mer_key, + str_2d_3d = lib_mer_str_2d_3d, + annotations = ann_fil, components = fea_com_pre, edges = fea_edg_pre, - features = fea_pre, taxa = tax_pre, output = par_wei_ann$files$annotations$processed, candidates_initial = par_wei_ann$annotations$candidates$initial, @@ -1658,16 +1784,9 @@ list( ann_pre_crazy <- weight_annotations( library = lib_mer_key, str_2d_3d = lib_mer_str_2d_3d, - annotations = list( - ann_spe_exp_gnp_pre, - ann_spe_exp_int_pre, - ann_spe_is_pre, - ann_sir_pre, - ann_ms1_pre_ann - ), + annotations = ann_fil_crazy, components = fea_com_pre, edges = fea_edg_pre, - features = fea_pre, taxa = tax_pre, output = par_wei_ann$files$annotations$processed, candidates_initial = par_wei_ann$annotations$candidates$initial, @@ -2062,9 +2181,9 @@ list( } ), tar_target( - name = def_ann_mas, + name = benchmark_def_ann_mas, command = { - def_ann_mas <- parse_yaml_params( + benchmark_def_ann_mas <- parse_yaml_params( def = par_def_ann_mas, usr = par_def_ann_mas ) @@ -2081,21 +2200,21 @@ list( "data/interim/benchmark/benchmark_ann_ms1_pos.tsv.gz", output_edges = "data/interim/benchmark/benchmark_edges_ms1_pos.tsv.gz", - name_source = def_ann_mas$names$source, - name_target = def_ann_mas$names$target, + name_source = benchmark_def_ann_mas$names$source, + name_target = benchmark_def_ann_mas$names$target, str_2d_3d = lib_mer_str_2d_3d, str_met = lib_mer_str_met, str_nam = lib_mer_str_nam, str_tax_cla = lib_mer_str_tax_cla, str_tax_npc = lib_mer_str_tax_npc, name = lib_add["pos"], - adducts_list = def_ann_mas$ms$adducts, + adducts_list = benchmark_def_ann_mas$ms$adducts, adducts_masses_list = dic_add, neutral_losses_list = dic_neu_los, ms_mode = "pos", - tolerance_ppm = def_ann_mas$ms$tolerances$mass$ppm$ms1, - tolerance_rt = def_ann_mas$ms$tolerances$rt$minutes, - parameters = def_ann_mas + tolerance_ppm = benchmark_def_ann_mas$ms$tolerances$mass$ppm$ms1, + tolerance_rt = benchmark_def_ann_mas$ms$tolerances$rt$minutes, + parameters = benchmark_def_ann_mas ) } ), @@ -2110,28 +2229,28 @@ list( "data/interim/benchmark/benchmark_ann_ms1_neg.tsv.gz", output_edges = "data/interim/benchmark/benchmark_edges_ms1_neg.tsv.gz", - name_source = def_ann_mas$names$source, - name_target = def_ann_mas$names$target, + name_source = benchmark_def_ann_mas$names$source, + name_target = benchmark_def_ann_mas$names$target, str_2d_3d = lib_mer_str_2d_3d, str_met = lib_mer_str_met, str_nam = lib_mer_str_nam, str_tax_cla = lib_mer_str_tax_cla, str_tax_npc = lib_mer_str_tax_npc, name = lib_add["neg"], - adducts_list = def_ann_mas$ms$adducts, + adducts_list = benchmark_def_ann_mas$ms$adducts, adducts_masses_list = dic_add, neutral_losses_list = dic_neu_los, ms_mode = "neg", - tolerance_ppm = def_ann_mas$ms$tolerances$mass$ppm$ms1, - tolerance_rt = def_ann_mas$ms$tolerances$rt$minutes, - parameters = def_ann_mas + tolerance_ppm = benchmark_def_ann_mas$ms$tolerances$mass$ppm$ms1, + tolerance_rt = benchmark_def_ann_mas$ms$tolerances$rt$minutes, + parameters = benchmark_def_ann_mas ) } ), tar_target( - name = def_cre_edg_spe, + name = benchmark_def_cre_edg_spe, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_cre_edg_spe <- parse_yaml_params( def = par_def_cre_edg_spe, usr = par_def_cre_edg_spe ) @@ -2143,17 +2262,17 @@ list( benchmark_edg_spe_pos <- create_edges_spectra( input = benchmark_pre_mgf_pos, output = "data/interim/benchmark/benchmark_edges_spe_pos.tsv.gz", - name_source = def_cre_edg_spe$names$source, - name_target = def_cre_edg_spe$names$target, - threshold = def_cre_edg_spe$ + name_source = benchmark_def_cre_edg_spe$names$source, + name_target = benchmark_def_cre_edg_spe$names$target, + threshold = benchmark_def_cre_edg_spe$ annotations$ ms2$ thresholds$ similarity, - ppm = def_cre_edg_spe$ms$tolerances$mass$ppm$ms2, - dalton = def_cre_edg_spe$ms$tolerances$mass$dalton$ms2, + ppm = benchmark_def_cre_edg_spe$ms$tolerances$mass$ppm$ms2, + dalton = benchmark_def_cre_edg_spe$ms$tolerances$mass$dalton$ms2, qutoff = 0, - parameters = def_cre_edg_spe + parameters = benchmark_def_cre_edg_spe ) } ), @@ -2163,24 +2282,24 @@ list( benchmark_edg_spe_neg <- create_edges_spectra( input = benchmark_pre_mgf_neg, output = "data/interim/benchmark/benchmark_edges_spe_neg.tsv.gz", - name_source = def_cre_edg_spe$names$source, - name_target = def_cre_edg_spe$names$target, - threshold = def_cre_edg_spe$ + name_source = benchmark_def_cre_edg_spe$names$source, + name_target = benchmark_def_cre_edg_spe$names$target, + threshold = benchmark_def_cre_edg_spe$ annotations$ ms2$ thresholds$ similarity, - ppm = def_cre_edg_spe$ms$tolerances$mass$ppm$ms2, - dalton = def_cre_edg_spe$ms$tolerances$mass$dalton$ms2, + ppm = benchmark_def_cre_edg_spe$ms$tolerances$mass$ppm$ms2, + dalton = benchmark_def_cre_edg_spe$ms$tolerances$mass$dalton$ms2, qutoff = 0, - parameters = def_cre_edg_spe + parameters = benchmark_def_cre_edg_spe ) } ), tar_target( - name = def_pre_fea_edg, + name = benchmark_def_pre_fea_edg, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_pre_fea_edg <- parse_yaml_params( def = par_def_pre_fea_edg, usr = par_def_pre_fea_edg ) @@ -2192,9 +2311,9 @@ list( benchmark_edg_pre_pos <- prepare_features_edges( input = list(benchmark_ann_ms1_pre_pos[[2]], benchmark_edg_spe_pos), output = "data/interim/benchmark/benchmark_edges_pos.tsv.gz", - name_source = def_pre_fea_edg$names$source, - name_target = def_pre_fea_edg$names$target, - parameters = def_pre_fea_edg + name_source = benchmark_def_pre_fea_edg$names$source, + name_target = benchmark_def_pre_fea_edg$names$target, + parameters = benchmark_def_pre_fea_edg ) } ), @@ -2204,16 +2323,16 @@ list( benchmark_edg_pre_neg <- prepare_features_edges( input = list(benchmark_ann_ms1_pre_neg[[2]], benchmark_edg_spe_neg), output = "data/interim/benchmark/benchmark_edges_neg.tsv.gz", - name_source = def_pre_fea_edg$names$source, - name_target = def_pre_fea_edg$names$target, - parameters = def_pre_fea_edg + name_source = benchmark_def_pre_fea_edg$names$source, + name_target = benchmark_def_pre_fea_edg$names$target, + parameters = benchmark_def_pre_fea_edg ) } ), tar_target( - name = def_cre_com, + name = benchmark_def_cre_edg_com, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_cre_edg_com <- parse_yaml_params( def = par_def_cre_com, usr = par_def_cre_com ) @@ -2225,7 +2344,7 @@ list( benchmark_com_pos <- create_components( input = benchmark_edg_pre_pos, output = "data/interim/benchmark/benchmark_components_pos.tsv.gz", - parameters = def_cre_com + parameters = benchmark_def_cre_edg_com ) } ), @@ -2235,14 +2354,14 @@ list( benchmark_com_neg <- create_components( input = benchmark_edg_pre_neg, output = "data/interim/benchmark/benchmark_components_neg.tsv.gz", - parameters = def_cre_com + parameters = benchmark_def_cre_edg_com ) } ), tar_target( - name = def_pre_fea_com, + name = benchmark_def_pre_fea_com, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_pre_fea_com <- parse_yaml_params( def = par_def_pre_fea_com, usr = par_def_pre_fea_com ) @@ -2259,7 +2378,7 @@ list( spectral$ components$ prepared, - parameters = def_pre_fea_com + parameters = benchmark_def_pre_fea_com ) } ), @@ -2274,14 +2393,14 @@ list( spectral$ components$ prepared, - parameters = def_pre_fea_com + parameters = benchmark_def_pre_fea_com ) } ), tar_target( - name = def_ann_spe, + name = benchmark_def_ann_spe, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_ann_spe <- parse_yaml_params( def = par_def_ann_spe, usr = par_def_ann_spe ) @@ -2295,12 +2414,13 @@ list( library = lib_spe_is_lot_pre_pos, polarity = "pos", output = "data/interim/benchmark/benchmark_ann_spe_pos.tsv.gz", - threshold = def_ann_spe$annotations$ms2$thresholds$similarity, - ppm = def_ann_spe$ms$tolerances$mass$ppm$ms2, - dalton = def_ann_spe$ms$tolerances$mass$dalton$ms2, + threshold = + benchmark_def_ann_spe$annotations$ms2$thresholds$similarity, + ppm = benchmark_def_ann_spe$ms$tolerances$mass$ppm$ms2, + dalton = benchmark_def_ann_spe$ms$tolerances$mass$dalton$ms2, qutoff = 0, - approx = def_ann_spe$annotations$ms2$approx, - parameters = def_ann_spe + approx = benchmark_def_ann_spe$annotations$ms2$approx, + parameters = benchmark_def_ann_spe ) } ), @@ -2312,19 +2432,20 @@ list( library = lib_spe_is_lot_pre_neg, polarity = "neg", output = "data/interim/benchmark/benchmark_ann_spe_neg.tsv.gz", - threshold = def_ann_spe$annotations$ms2$thresholds$similarity, - ppm = def_ann_spe$ms$tolerances$mass$ppm$ms2, - dalton = def_ann_spe$ms$tolerances$mass$dalton$ms2, + threshold = + benchmark_def_ann_spe$annotations$ms2$thresholds$similarity, + ppm = benchmark_def_ann_spe$ms$tolerances$mass$ppm$ms2, + dalton = benchmark_def_ann_spe$ms$tolerances$mass$dalton$ms2, qutoff = 0, - approx = def_ann_spe$annotations$ms2$approx, - parameters = def_ann_spe + approx = benchmark_def_ann_spe$annotations$ms2$approx, + parameters = benchmark_def_ann_spe ) } ), tar_target( - name = def_pre_ann_spe, + name = benchmark_def_pre_ann_spe, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_pre_ann_spe <- parse_yaml_params( def = par_def_pre_ann_spe, usr = par_def_pre_ann_spe ) @@ -2343,7 +2464,7 @@ list( str_nam = lib_mer_str_nam, str_tax_cla = lib_mer_str_tax_cla, str_tax_npc = lib_mer_str_tax_npc, - parameters = def_pre_ann_spe + parameters = benchmark_def_pre_ann_spe ) } ), @@ -2360,14 +2481,14 @@ list( str_nam = lib_mer_str_nam, str_tax_cla = lib_mer_str_tax_cla, str_tax_npc = lib_mer_str_tax_npc, - parameters = def_pre_ann_spe + parameters = benchmark_def_pre_ann_spe ) } ), tar_target( - name = def_wei_ann, + name = benchmark_def_wei_ann, command = { - def_cre_edg_spe <- parse_yaml_params( + benchmark_def_wei_ann <- parse_yaml_params( def = par_def_wei_ann, usr = par_def_wei_ann ) @@ -2381,35 +2502,57 @@ list( str_2d_3d = lib_mer_str_2d_3d, candidates_initial = 500, candidates_final = 500, - score_biological_domain = def_wei_ann$weights$biological$domain, - score_biological_kingdom = def_wei_ann$weights$biological$kingdom, - score_biological_phylum = def_wei_ann$weights$biological$phylum, - score_biological_class = def_wei_ann$weights$biological$class, - score_biological_order = def_wei_ann$weights$biological$order, - score_biological_infraorder = def_wei_ann$weights$biological$infraorder, - score_biological_family = def_wei_ann$weights$biological$family, - score_biological_subfamily = def_wei_ann$weights$biological$subfamily, - score_biological_tribe = def_wei_ann$weights$biological$tribe, - score_biological_subtribe = def_wei_ann$weights$biological$subtribe, - score_biological_genus = def_wei_ann$weights$biological$genus, - score_biological_subgenus = def_wei_ann$weights$biological$subgenus, - score_biological_species = def_wei_ann$weights$biological$species, - score_biological_subspecies = def_wei_ann$weights$biological$subspecies, - score_biological_variety = def_wei_ann$weights$biological$variety, - score_chemical_cla_kingdom = def_wei_ann$weights$chemical$cla$kingdom, + score_biological_domain = + benchmark_def_wei_ann$weights$biological$domain, + score_biological_kingdom = + benchmark_def_wei_ann$weights$biological$kingdom, + score_biological_phylum = + benchmark_def_wei_ann$weights$biological$phylum, + score_biological_class = + benchmark_def_wei_ann$weights$biological$class, + score_biological_order = + benchmark_def_wei_ann$weights$biological$order, + score_biological_infraorder = + benchmark_def_wei_ann$weights$biological$infraorder, + score_biological_family = + benchmark_def_wei_ann$weights$biological$family, + score_biological_subfamily = + benchmark_def_wei_ann$weights$biological$subfamily, + score_biological_tribe = + benchmark_def_wei_ann$weights$biological$tribe, + score_biological_subtribe = + benchmark_def_wei_ann$weights$biological$subtribe, + score_biological_genus = + benchmark_def_wei_ann$weights$biological$genus, + score_biological_subgenus = + benchmark_def_wei_ann$weights$biological$subgenus, + score_biological_species = + benchmark_def_wei_ann$weights$biological$species, + score_biological_subspecies = + benchmark_def_wei_ann$weights$biological$subspecies, + score_biological_variety = + benchmark_def_wei_ann$weights$biological$variety, + score_chemical_cla_kingdom = + benchmark_def_wei_ann$weights$chemical$cla$kingdom, score_chemical_cla_superclass = - def_wei_ann$weights$chemical$cla$superclass, - score_chemical_cla_class = def_wei_ann$weights$chemical$cla$class, - score_chemical_cla_parent = def_wei_ann$weights$chemical$cla$parent, - score_chemical_npc_pathway = def_wei_ann$weights$chemical$npc$pathway, + benchmark_def_wei_ann$weights$chemical$cla$superclass, + score_chemical_cla_class = + benchmark_def_wei_ann$weights$chemical$cla$class, + score_chemical_cla_parent = + benchmark_def_wei_ann$weights$chemical$cla$parent, + score_chemical_npc_pathway = + benchmark_def_wei_ann$weights$chemical$npc$pathway, score_chemical_npc_superclass = - def_wei_ann$weights$chemical$npc$superclass, - score_chemical_npc_class = def_wei_ann$weights$chemical$npc$class, - minimal_ms1_bio = def_wei_ann$annotations$ms1$thresholds$biological, - minimal_ms1_chemo = def_wei_ann$annotations$ms1$thresholds$chemical, - summarise = def_wei_ann$options$summarise, - pattern = def_wei_ann$files$pattern, - force = def_wei_ann$options$force, + benchmark_def_wei_ann$weights$chemical$npc$superclass, + score_chemical_npc_class = + benchmark_def_wei_ann$weights$chemical$npc$class, + minimal_ms1_bio = + benchmark_def_wei_ann$annotations$ms1$thresholds$biological, + minimal_ms1_chemo = + benchmark_def_wei_ann$annotations$ms1$thresholds$chemical, + summarise = benchmark_def_wei_ann$options$summarise, + pattern = benchmark_def_wei_ann$files$pattern, + force = benchmark_def_wei_ann$options$force, parameters = def_wei_ann ) } @@ -2420,7 +2563,6 @@ list( benchmark_files_pos <- list( components = benchmark_com_pre_pos, edges = benchmark_edg_pre_pos, - features = benchmark_pre_meta_pos, taxa = benchmark_taxed_pos ) } @@ -2431,7 +2573,6 @@ list( benchmark_files_pos <- list( components = benchmark_com_pre_neg, edges = benchmark_edg_pre_neg, - features = benchmark_pre_meta_neg, taxa = benchmark_taxed_neg ) } diff --git a/inst/scripts/docopt/annotate_masses.txt b/inst/scripts/docopt/annotate_masses.txt index 3febd2e2f..35ef838df 100644 --- a/inst/scripts/docopt/annotate_masses.txt +++ b/inst/scripts/docopt/annotate_masses.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/annotate_masses.R -Usage: +Usage: annotate_masses.R [--fil-ann-pre=] [--fil-lib-sop-mer=] [--ms-add-neg=] [--ms-add-pos=] [--ms-int-thr-ms1=] [--ms-pol=] [--ms-tol-mas-ppm-ms1=] [--ms-tol-mas-dal-ms1=] [--ms-tol-rt-min=] [--force=] Arguments: diff --git a/inst/scripts/docopt/annotate_spectra.txt b/inst/scripts/docopt/annotate_spectra.txt index 976a93bd9..d26a6132d 100644 --- a/inst/scripts/docopt/annotate_spectra.txt +++ b/inst/scripts/docopt/annotate_spectra.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/annotate_spectra.R --fil-spe-raw data/source/examples/spectra.mgf --fil-lib-spe-pos data/interim/libraries/spectra/is/lotus_pos.sqlite --fil-ann-raw-spe data/interim/annotations/example_isdb.tsv.gz --ms-tol-mas-dal-ms2 0.02 --ms-tol-mas-ppm-ms2 10 --ann-ms2-thr-sim 0.2 --ms-int-thr-ms2 0 --ann-ms2-app false -Usage: +Usage: annotate_spectra.R [--ann-ms2-app=] [--ann-ms2-thr-sim=] [--fil-ann-raw-spe=] [--fil-lib-spe-neg=] [--fil-lib-spe-pos=] [--fil-spe-raw=] [--ms-int-thr-ms2=] [--ms-pol=] [--ms-tol-mas-ppm-ms2=] [--ms-tol-mas-dal-ms2=] [--ms-tol-rt-min=] Arguments: diff --git a/inst/scripts/docopt/create_components.txt b/inst/scripts/docopt/create_components.txt index 3b55beef5..a3210f088 100644 --- a/inst/scripts/docopt/create_components.txt +++ b/inst/scripts/docopt/create_components.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/create_components.R --fil-net-spe-edg-pro data/interim/features/example_edges.tsv.gz --fil-net-spe-com-raw data/interim/features/example_components.tsv.gz -Usage: +Usage: create_components.R [--fil-net-spe-edg-pro=] [--fil-net-spe-com-raw=] Arguments: diff --git a/inst/scripts/docopt/create_edges_spectra.txt b/inst/scripts/docopt/create_edges_spectra.txt index 072608b2b..b126aed6f 100644 --- a/inst/scripts/docopt/create_edges_spectra.txt +++ b/inst/scripts/docopt/create_edges_spectra.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/create_edges_spectra.R --fil-spe-raw data/source/examples/spectra.mgf --fil-net-spe-edg-raw data/interim/features/example_edges_spectra.tsv.gz --ms-tol-mas-dal-ms2 0.02 --ms-tol-mas-ppm-ms2 10 --ann-ms2-thr-sim 0.2 --ms-int-thr-ms2 0 -Usage: +Usage: create_edges_spectra.R [--ann-ms2-thr-sim=] [--fil-net-spe-edg-raw=] [--fil-spe-raw=] [--ms-int-thr-ms2=] [--ms-tol-mas-ppm-ms2=] [--ms-tol-mas-dal-ms2=] [--ms-tol-rt-min=] Arguments: diff --git a/inst/scripts/docopt/filter_annotations.txt b/inst/scripts/docopt/filter_annotations.txt new file mode 100644 index 000000000..aeb4ae572 --- /dev/null +++ b/inst/scripts/docopt/filter_annotations.txt @@ -0,0 +1,17 @@ +You can use this script with the following example: + Rscript inst/scripts/filter_annotations.R + +Usage: + filter_annotations.R [--fil-ann-fil=] [--fil-ann-pre=] [--fil-fea-pre=] [--fil-lib-tem-pre=] [--ms-tol-rt-min=] + +Arguments: + fil-ann-fil Filtered annotation file. STRING + fil-ann-pre List of prepared annotation file. STRING + fil-fea-pre Prepared features file. STRING + fil-lib-tem-pre List of prepared temporal libraries. STRING + + ms-tol-rt-min Retention time tolerance in minutes. FLOAT + +Options: + -h --help Shows this screen. + -v --version Shows version. \ No newline at end of file diff --git a/inst/scripts/docopt/prepare_annotations_gnps.txt b/inst/scripts/docopt/prepare_annotations_gnps.txt index 4df305e9b..80bf3f8b5 100644 --- a/inst/scripts/docopt/prepare_annotations_gnps.txt +++ b/inst/scripts/docopt/prepare_annotations_gnps.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_annotations_gnps.R --fil-ann-raw-spe data/interim/annotations/example_gnps.tsv --fil-ann-pre data/interim/annotations/example_gnps_prepared.tsv.gz -Usage: +Usage: prepare_annotations_gnps.R [--=fil-ann-pre] [--gnps-id=] [--gnps-workflow=] Arguments: diff --git a/inst/scripts/docopt/prepare_annotations_sirius.txt b/inst/scripts/docopt/prepare_annotations_sirius.txt index 343c083f4..d9c62a76b 100644 --- a/inst/scripts/docopt/prepare_annotations_sirius.txt +++ b/inst/scripts/docopt/prepare_annotations_sirius.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_annotations_sirius.R --fil-ann-raw-sir data/interim/annotations/example_sirius/ --fil-ann-pre data/interim/annotations/example_sirius_prepared.tsv.gz -Usage: +Usage: prepare_annotations_sirius.R [--fil-ann-raw-sir=] [--fil-ann-pre=] Arguments: diff --git a/inst/scripts/docopt/prepare_annotations_spectra.txt b/inst/scripts/docopt/prepare_annotations_spectra.txt index ba7b67115..95b8e0751 100644 --- a/inst/scripts/docopt/prepare_annotations_spectra.txt +++ b/inst/scripts/docopt/prepare_annotations_spectra.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_annotations_spectra.R --fil-ann-raw-spe data/interim/annotations/example_isdb.tsv.gz --fil-ann-pre data/interim/annotations/example_isdb_prepared.tsv.gz -Usage: +Usage: prepare_annotations_spectra.R [--fil-ann-raw-spe=] [--fil-ann-pre=] Arguments: diff --git a/inst/scripts/docopt/prepare_features_components.txt b/inst/scripts/docopt/prepare_features_components.txt index 0172eaadb..ac9e411b4 100644 --- a/inst/scripts/docopt/prepare_features_components.txt +++ b/inst/scripts/docopt/prepare_features_components.txt @@ -1,6 +1,6 @@ You can use this script with the following example: Rscript inst/scripts/prepare_features_components.R --fil-ann-pre data/interim/annotations/example_gnps_prepared.tsv.gz --fil-ann-pre data/interim/annotations/example_isdb_prepared.tsv.gz --fil-ann-pre data/interim/annotations/example_sirius_prepared.tsv.gz --fil-ann-fil data/interim/annotations/example_filled.tsv.gz -Usage: +Usage: prepare_features_components.R [--fil-ann-pre=] [--fil-ann-fil=] [--fil-net-spe-com-raw=] [--too-net-spe-com=] Arguments: diff --git a/inst/scripts/docopt/prepare_features_edges.txt b/inst/scripts/docopt/prepare_features_edges.txt index 60123510a..c6226f802 100644 --- a/inst/scripts/docopt/prepare_features_edges.txt +++ b/inst/scripts/docopt/prepare_features_edges.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_features_edges.R --fil-net-spe-edg-pro data/interim/features/example_edges.tsv.gz -Usage: +Usage: prepare_features_edges.R [--fil-net-spe-edg-raw=] [--fil-net-spe-edg-pro=] [--names-source=] [--names-target=] Arguments: diff --git a/inst/scripts/docopt/prepare_features_tables.txt b/inst/scripts/docopt/prepare_features_tables.txt index 5885dd93f..3af2ce110 100644 --- a/inst/scripts/docopt/prepare_features_tables.txt +++ b/inst/scripts/docopt/prepare_features_tables.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_features_tables.R --fil-fea-raw data/source/example_features.csv --fil-fea-pre data/interim/features/example_features.tsv.gz --names-features 'row ID' --names-precursor 'row m/z' --names-rt 'row retention time' -Usage: +Usage: prepare_features_tables.R [--fil-fea-raw=] [--fil-fea-pre=] [--names-features=] [--names-precursor=] [--names-rt=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_adducts.txt b/inst/scripts/docopt/prepare_libraries_adducts.txt index 7cc91bf4b..766d8ed06 100644 --- a/inst/scripts/docopt/prepare_libraries_adducts.txt +++ b/inst/scripts/docopt/prepare_libraries_adducts.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_adducts.R --fil-lib-add-pro library -Usage: +Usage: prepare_libraries_adducts.R [--fil-lib-add-pro=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_rt.txt b/inst/scripts/docopt/prepare_libraries_rt.txt new file mode 100644 index 000000000..cf74739b5 --- /dev/null +++ b/inst/scripts/docopt/prepare_libraries_rt.txt @@ -0,0 +1,25 @@ +You can use this script with the following example: +Rscript inst/scripts/prepare_libraries_rt.R --fil-lib-spe-neg data/interim/libraries/spectra/exp/internal_neg.sqlite --fil-lib-spe-pos data/interim/libraries/spectra/exp/internal_pos.sqlite --fil-lib-tem-exp data/source/libraries/rt/internal.tsv --fil-lib-tem-is data/source/libraries/rt/predicted.tsv --fil-lib-tem-pre data/interim/libraries/rt/prepared.tsv.gz + +Usage: + prepare_libraries_rt.R [--fil-lib-spe-neg=] [--fil-lib-spe-pos=] [--fil-lib-tem-exp=] [--fil-lib-tem-is=] [--fil-lib-tem-exp=] [--fil-lib-tem-pre=] [--names-inchikey=] [--names-mgf-ik=] [--names-mgf-rt=] [--names-mgf-sm=] [--names-rt=] [--names-smiles=] [--units-rt=] + +Arguments: + fil-lib-spe-neg List of negative spectral libraries. STRING + fil-lib-spe-pos List of positive spectral libraries. STRING + fil-lib-tem-exp List of experimental temporal libraries. STRING + fil-lib-tem-is List of in silico temporal libraries. STRING + fil-lib-tem-pre List of prepared temporal libraries. STRING + + names-inchikey Name of "InChIKey" variable in the input. STRING + names-mgf-ik Name of "InChIKey" in MGF. STRING + names-mgf-rt Name of "retention time" in MGF. STRING + names-mgf-sm Name of "SMILES" in MGF. STRING + names-rt Name of "retention time" variable in the input. STRING + names-smiles Name of "SMILES" variable in the input. STRING + + units-rt Unit of the "retention time" variable in the input. Must be "seconds" or "minutes". STRING + +Options: + -h --help Shows this screen. + -v --version Shows version. \ No newline at end of file diff --git a/inst/scripts/docopt/prepare_libraries_sop_closed.txt b/inst/scripts/docopt/prepare_libraries_sop_closed.txt index 11169616c..a456af84c 100644 --- a/inst/scripts/docopt/prepare_libraries_sop_closed.txt +++ b/inst/scripts/docopt/prepare_libraries_sop_closed.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_sop_closed.R --fil-lib-sop-raw-clo ../lotus-processor/data/processed/220226_closed_metadata.csv.gz --fil-lib-sop-pre data/interim/libraries/sop/closed_prepared.tsv.gz -Usage: +Usage: prepare_libraries_sop_closed.R [--fil-lib-sop-raw-clo=] [--fil-lib-sop-pre=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_sop_ecmdb.txt b/inst/scripts/docopt/prepare_libraries_sop_ecmdb.txt index 71c0432c2..661cca6e3 100644 --- a/inst/scripts/docopt/prepare_libraries_sop_ecmdb.txt +++ b/inst/scripts/docopt/prepare_libraries_sop_ecmdb.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_sop_ecmdb.R --fil-lib-sop-raw-ecm data/source/libraries/sop/ecmdb.json.zip --fil-lib-sop-pre data/interim/libraries/sop/ecmdb_prepared.tsv.gz -Usage: +Usage: prepare_libraries_sop_ecmdb.R [--fil-lib-sop-raw-ecm=] [--fil-lib-sop-pre=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_sop_lotus.txt b/inst/scripts/docopt/prepare_libraries_sop_lotus.txt index c6c159e7c..7a48fc351 100644 --- a/inst/scripts/docopt/prepare_libraries_sop_lotus.txt +++ b/inst/scripts/docopt/prepare_libraries_sop_lotus.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_sop_lotus.R --fil-lib-sop-raw-ecm data/source/libraries/sop/lotus.csv.gz --fil-lib-sop-pro data/interim/libraries/sop/lotus_prepared.tsv.gz -Usage: +Usage: prepare_libraries_sop_ecmdb.R [--fil-lib-sop-raw-ecm=] [--fil-lib-sop-pro=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_sop_merged.txt b/inst/scripts/docopt/prepare_libraries_sop_merged.txt index 9a60896fc..843102bc5 100644 --- a/inst/scripts/docopt/prepare_libraries_sop_merged.txt +++ b/inst/scripts/docopt/prepare_libraries_sop_merged.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_sop_merged.R --fil-lib-sop-pro data/interim/libraries/sop/lotus_prepared.tsv.gz --fil-lib-sop-mer bitter_db.tsv.gz --org-fil-mod TRUE --org-fil-lev family --org-fil-val 'Simaroubaceae|Gentianaceae' -Usage: +Usage: prepare_libraries_sop_merged.R [--fil-lib-sop-pro=] [--fil-lib-sop-mer=] [--org-fil-mod=] [--org-fil-lev=] [--org-fil-val=] Arguments: diff --git a/inst/scripts/docopt/prepare_libraries_spectra.txt b/inst/scripts/docopt/prepare_libraries_spectra.txt index 5e390322d..a28ec7afd 100644 --- a/inst/scripts/docopt/prepare_libraries_spectra.txt +++ b/inst/scripts/docopt/prepare_libraries_spectra.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_libraries_spectra.R --fil-lib-spe-raw data/source/libraries/spectra/exp/spectral_lib_mini_with_rt.mgf --fil-lib-spe-pos data/interim/libraries/spectra/exp/internal_pos.sqlite --ms-pol pos -Usage: +Usage: prepare_libraries_spectra.R [--fil-lib-spe-neg=] [--fil-lib-spe-pos=] [--fil-lib-spe-raw=] [--ms-pol=] [--names-mgf-ce=] [--names-mgf-ci=] [--names-mgf-em=] [--names-mgf-in=] [--names-mgf-io=] [--names-mgf-ik=] [--names-mgf-il=] [--names-mgf-mf=] [--names-mgf-na=] [--names-mgf-po=] [--names-mgf-sm=] [--names-mgf-sn=] [--names-mgf-si=] [--names-mgf-sp=] [--names-mgf-sy=] [--names-mgf-xl=] Arguments: diff --git a/inst/scripts/docopt/prepare_params.txt b/inst/scripts/docopt/prepare_params.txt index 0b2b03efb..a99710857 100644 --- a/inst/scripts/docopt/prepare_params.txt +++ b/inst/scripts/docopt/prepare_params.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_params.R -Usage: +Usage: prepare_params.R [--fil-pat=] [--gnps-id=] [--gnps-workflow=] [--ms-pol=] [--org-tax=] Arguments: diff --git a/inst/scripts/docopt/prepare_taxa.txt b/inst/scripts/docopt/prepare_taxa.txt index 42da3ae42..771b25494 100644 --- a/inst/scripts/docopt/prepare_taxa.txt +++ b/inst/scripts/docopt/prepare_taxa.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/prepare_taxa.R --fil-tax-pro data/interim/taxa/example_taxed.tsv.gz --org-can 1 --names-extension TRUE --names-features "row ID" --names-filename filename --names-taxon ATTRIBUTE_species -Usage: +Usage: prepare_taxa.R [--fil-fea-raw=] [--fil-tax-raw=] [--fil-tax-pro=] [--gnps-id=] [--names-extension=] [--names-features=] [--names-filename=] [--names-taxon=] [--org-can=] [--org-tax=] [--too-met=] Arguments: diff --git a/inst/scripts/docopt/weight_annotations.txt b/inst/scripts/docopt/weight_annotations.txt index a5d5823f1..0d2dee493 100644 --- a/inst/scripts/docopt/weight_annotations.txt +++ b/inst/scripts/docopt/weight_annotations.txt @@ -1,7 +1,7 @@ You can use this script with the following example: Rscript inst/scripts/weight_annotations.R -Usage: +Usage: weight_annotations.R [--ann-can-ini=] [--ann-can-fin=] [--ann-ms1only=] [--ann-ms1-thr-bio=] [--ann-ms1-thr-che=] [--ann-ms1-thr-con=] [--fil-ann-fil=] [--fil-ann-pro=] [--fil-lib-sop-mer=] [--fil-net-spe-edg-pro=] [--fil-tax-pro=] [--wei-glo-bio=] [--wei-glo-che=] [--wei-glo-spe=] [--wei-bio-01=] [--wei-bio-02=] [--wei-bio-03=] [--wei-bio-04=] [--wei-bio-05=] [--wei-bio-06=] [--wei-bio-07=] [--wei-bio-08=] [--wei-bio-09=] [--wei-bio-10=] [--wei-bio-11=] [--wei-bio-12=] [--wei-bio-13=] [--wei-bio-14=] [--wei-bio-15=] [--wei-che-11=] [--wei-che-12=] [--wei-che-13=] [--wei-che-14=] [--wei-che-21=] [--wei-che-22=] [--wei-che-23=] [--summarise=] [--force=] Arguments: @@ -12,6 +12,7 @@ Arguments: ann-ms1-thr-che Minimal chemical score to keep MS1 only annotation. FLOAT ann-ms1-thr-con Condition to be used to retain candidates. Must be "OR" or "AND". Example: Minimum 0.5 biological AND 0.5 chemical. STRING + fil-ann-fil Filtered annotation file. STRING fil-ann-pro Final results file. STRING fil-lib-sop-mer Merged structure organism pairs library. STRING fil-net-spe-edg-pro Prepared edges file. STRING diff --git a/inst/scripts/filter_annotations.R b/inst/scripts/filter_annotations.R new file mode 100644 index 000000000..c3b459ac8 --- /dev/null +++ b/inst/scripts/filter_annotations.R @@ -0,0 +1,21 @@ +start <- Sys.time() + +require( + package = "timaR", + quietly = TRUE +) + +log_debug( + "This script performs", + crayon::green("filters annotations"), + "based on", + crayon::blue("retention time matching") +) +log_debug("Authors: ", crayon::green("AR"), "\n") +log_debug("Contributors: ...") + +targets::tar_make(names = matches("^ann_fil")) + +end <- Sys.time() + +log_debug("Script finished in", crayon::green(format(end - start))) diff --git a/inst/scripts/prepare_libraries_rt.R b/inst/scripts/prepare_libraries_rt.R new file mode 100644 index 000000000..4cbb066af --- /dev/null +++ b/inst/scripts/prepare_libraries_rt.R @@ -0,0 +1,22 @@ +start <- Sys.time() + +require( + package = "timaR", + quietly = TRUE +) + +log_debug( + "This script", + crayon::green( + "prepares a library of retention times", + "from MGF or tabular data. \n" + ) +) +log_debug("Authors: ", crayon::green("AR"), "\n") +log_debug("Contributors: ...") + +targets::tar_make(names = matches("lib_rt")) + +end <- Sys.time() + +log_debug("Script finished in", crayon::green(format(end - start))) diff --git a/man/filter_annotations.Rd b/man/filter_annotations.Rd new file mode 100644 index 000000000..e61c62d72 --- /dev/null +++ b/man/filter_annotations.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter_annotations.R +\name{filter_annotations} +\alias{filter_annotations} +\title{Filter annotations} +\usage{ +filter_annotations( + annotations = params$files$annotations$prepared, + features = params$files$features$prepared, + rts = params$files$libraries$temporal$prepared, + output = params$files$annotations$filtered, + tolerance_rt = params$ms$tolerances$rt$minutes, + parameters = params +) +} +\arguments{ +\item{annotations}{Prepared annotations file} + +\item{features}{Prepared features file} + +\item{rts}{Prepared retention time library} + +\item{output}{Output file} + +\item{tolerance_rt}{Tolerance to filter retention time} + +\item{parameters}{Params} +} +\description{ +This function filters initial annotations. +} +\examples{ +NULL +} diff --git a/man/prepare_libraries_rt.Rd b/man/prepare_libraries_rt.Rd new file mode 100644 index 000000000..2553765bd --- /dev/null +++ b/man/prepare_libraries_rt.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prepare_libraries_rt.R +\name{prepare_libraries_rt} +\alias{prepare_libraries_rt} +\title{Prepare libraries of retention times} +\usage{ +prepare_libraries_rt( + mgf_exp = params$files$libraries$spectral$exp, + mgf_is = params$files$libraries$spectral$is, + temp_exp = params$files$libraries$temporal$exp, + temp_is = params$files$libraries$temporal$is, + output = params$files$libraries$temporal$prepared, + library = paths$data$interim$libraries$sop$merged$keys, + col_ik = params$names$mgf$inchikey, + col_rt = params$names$mgf$retention_time, + col_sm = params$names$mgf$smiles, + name_inchikey = params$names$inchikey, + name_rt = params$names$rt, + name_smiles = params$names$smiles, + unit_rt = params$units$rt, + parameters = params +) +} +\arguments{ +\item{mgf_exp}{MGF containing experimental retention times} + +\item{mgf_is}{MGF containing in silico predicted retention times} + +\item{temp_exp}{File containing experimental retention times} + +\item{temp_is}{File containing in silico predicted retention times} + +\item{output}{Output file} + +\item{library}{Library containing the keys} + +\item{col_ik}{Name of the InChIKey in mgf} + +\item{col_rt}{Name of the retention time in mgf} + +\item{col_sm}{Name of the SMILES in mgf} + +\item{name_inchikey}{Name of the InChIKey in file} + +\item{name_rt}{Name of the retention time in file} + +\item{name_smiles}{Name of the SMILES in file} + +\item{unit_rt}{Unit of the retention time. Must be "seconds" or "minutes"} + +\item{parameters}{Params} +} +\description{ +This function prepares retention times libraries +to be used for later +} +\examples{ +NULL +} diff --git a/man/weight_annotations.Rd b/man/weight_annotations.Rd index c565d040b..087af537e 100644 --- a/man/weight_annotations.Rd +++ b/man/weight_annotations.Rd @@ -8,10 +8,9 @@ weight_annotations( library = paths$data$interim$libraries$sop$merged$keys, org_tax_ott = paths$data$interim$libraries$sop$merged$organisms$taxonomies$ott, str_2d_3d = paths$data$interim$libraries$sop$merged$structures$dd_ddd, - annotations = params$files$annotations$prepared, + annotations = params$files$annotations$filtered, components = params$files$networks$spectral$components$prepared, edges = params$files$networks$spectral$edges$prepared, - features = params$files$features$prepared, taxa = params$files$taxa$prepared, output = params$files$annotations$processed, candidates_initial = params$annotations$candidates$initial, @@ -63,8 +62,6 @@ weight_annotations( \item{edges}{Prepared edges file} -\item{features}{Prepared features file} - \item{taxa}{Prepared taxed features file} \item{output}{Output file} @@ -160,7 +157,7 @@ match (should be lower than \verb{NPC class})} \item{parameters}{Params} } \description{ -This function weights initial annotations. +This function weights annotations. } \examples{ NULL diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R index cf59d54d2..b6071920f 100644 --- a/tests/testthat/test_functions.R +++ b/tests/testthat/test_functions.R @@ -248,6 +248,11 @@ testthat::test_that("Whole process", { params <- get_params(step = step) prepare_libraries_adducts() + ### Retention time + step <- "prepare_libraries_rt" + params <- get_params(step = step) + prepare_libraries_rt() + ### Features step <- "prepare_features_tables" params <- get_params(step = step) @@ -354,6 +359,11 @@ testthat::test_that("Whole process", { output = "data/interim/benchmark/bench_test_out.tsv.gz" ) + ## Filter annotations + step <- "filter_annotations" + params <- get_params(step = step) + filter_annotations() + ## Perform TIMA step <- "weight_annotations" params <- get_params(step = step) @@ -428,8 +438,8 @@ testthat::test_that("Whole process", { arguments$names_mgf_mf <<- "x" arguments$names_mgf_na <<- "x" arguments$names_mgf_po <<- "x" - arguments$names_mgf_pc <<- "x" - arguments$names_mgf_pm <<- "x" + # arguments$names_mgf_pc <<- "x" + # arguments$names_mgf_pm <<- "x" arguments$names_mgf_sm <<- "x" arguments$names_mgf_sn <<- "x" arguments$names_mgf_si <<- "x" @@ -451,6 +461,7 @@ testthat::test_that("Whole process", { arguments$too_net_spe_edg <<- "x" arguments$too_tax_bio <<- "x" arguments$too_tax_che <<- "x" + arguments$units_rt <<- "x" arguments$wei_glo_bio <<- "x" arguments$wei_glo_che <<- "x" arguments$wei_glo_spe <<- "x" diff --git a/vignettes/II-inputs.Rmd b/vignettes/II-inputs.Rmd index 18691d4fe..2ea41c229 100644 --- a/vignettes/II-inputs.Rmd +++ b/vignettes/II-inputs.Rmd @@ -300,6 +300,34 @@ Before running the corresponding code, do not forget to modify `inst/params/user source(file = "inst/scripts/prepare_libraries_adducts.R") ``` +#### Retention times + +The next library you might want is a retention times library. +This library is **optional**. +As no standard LC method is shared (for now) among laboratories, this library +will be heavily laboratory-dependent. +It could also be a library of in silico predicted retention times. + +```{r rts-1, echo=FALSE, message=FALSE, warning=FALSE, out.width="100%"} +targets::tar_visnetwork( + names = starts_with("lib_rt"), + exclude = c( + contains("benchmark"), + contains("par_"), + contains("paths") + ), + targets_only = TRUE, + degree_from = 8 +) +``` + +We only use the previously generated library to fill up missing InChIKeys. +Before running the corresponding code, do not forget to modify `inst/params/user/prepare_libraries_rt.yaml`. + +```{r adducts-2, results="hide", message=FALSE, warning=FALSE} +source(file = "inst/scripts/prepare_libraries_rt.R") +``` + #### Spectra Finally, you need a spectral library to perform MS^2^-based annotation. diff --git a/vignettes/III-formatting.Rmd b/vignettes/III-formatting.Rmd index 2338d87e1..87f9ea648 100644 --- a/vignettes/III-formatting.Rmd +++ b/vignettes/III-formatting.Rmd @@ -223,11 +223,24 @@ Before running the corresponding code, do not forget to modify `inst/params/user source(file = "inst/scripts/prepare_taxa.R") ``` +## Filter annotations (based on retention time) + +This step allows you to filter out the annotation of all the tools used, +based on your own internal (experimental or predicted) retention times library. +It is optional. +If you do not have one, it will simply group the annotations of all tools. + +Before running the corresponding code, do not forget to modify `inst/params/user/filter_annotations.yaml`. + +```{r prepare-taxa, results="hide", message=FALSE, warning=FALSE} +source(file = "inst/scripts/filter_annotations.R") +``` + You are almost there! See already all the steps accomplished! ```{r features-all, echo=FALSE, message=FALSE, warning=FALSE, out.width="100%"} targets::tar_visnetwork( - names = starts_with("fea"), + names = matches("^ann"), exclude = c( contains("benchmark"), contains("par_"),