Skip to content

Commit

Permalink
Merge pull request #160 from taxonomicallyinformedannotation/dev
Browse files Browse the repository at this point in the history
Exposing more parameters
  • Loading branch information
Adafede authored Jul 11, 2024
2 parents a8ccf52 + b333690 commit 86b29f7
Show file tree
Hide file tree
Showing 39 changed files with 1,239 additions and 460 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: timaR
Title: Taxonomically Informed Metabolite Annotation
Version: 2.9.6
Version: 2.9.7
Authors@R: c(
person("Adriano", "Rutz", , "rutz@imsb.biol.ethz.ch", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0443-9902")),
Expand All @@ -22,7 +22,7 @@ Imports:
dplyr (>= 1.1.4),
DT (>= 0.33),
fs (>= 1.6.4),
gt (>= 0.10.1),
gt (>= 0.11.0),
httr2 (>= 1.0.1),
igraph (>= 2.0.3),
installr (>= 0.23.4),
Expand Down Expand Up @@ -67,12 +67,13 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
Collate:
'round_reals.R'
'filter_nitrogen_rule.R'
'harmonize_adducts.R'
'dist_groups.R'
'decorate_masses.R'
'annotate_masses.R'
'import_spectra.R'
'annotate_spectra.R'
'benchmark_taxize_spectra.R'
'calculate_entropy.R'
'parse_adduct.R'
'calculate_mass_of_m.R'
Expand Down Expand Up @@ -142,7 +143,6 @@ Collate:
'prepare_params.R'
'prepare_taxa.R'
'replace_id.R'
'taxize_spectra_benchmark.R'
'weight_chemo.R'
'weight_bio.R'
'weight_annotations.R'
Expand Down
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(annotate_masses)
export(annotate_spectra)
export(benchmark_taxize_spectra)
export(calculate_entropy)
export(calculate_mass_of_m)
export(clean_bio)
Expand Down Expand Up @@ -30,14 +31,14 @@ export(fake_hmdb)
export(fake_lotus)
export(fake_sop_columns)
export(filter_annotations)
export(filter_nitrogen_rule)
export(get_example_sirius)
export(get_file)
export(get_gnps_tables)
export(get_last_version_from_zenodo)
export(get_massbank_spectra)
export(get_organism_taxonomy_ott)
export(get_params)
export(harmonize_adducts)
export(harmonize_names_sirius)
export(harmonize_spectra)
export(import_spectra)
Expand Down Expand Up @@ -78,7 +79,6 @@ export(select_sirius_columns_formulas)
export(select_sirius_columns_structures)
export(select_sop_columns)
export(split_tables_sop)
export(taxize_spectra_benchmark)
export(weight_annotations)
export(weight_bio)
export(weight_chemo)
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# timaR

# timaR 2.9.7

* Adding possibility to add internal libraries through the GUI (#159)
* Expose more parameters to the GUI (#159)
* Fix adducts and remove nitrogen rule
* Updated benchmarking steps

# timaR 2.9.6

* Adding light-switch thanks to `pkgdown 2.1.0`.
Expand Down
43 changes: 13 additions & 30 deletions R/annotate_masses.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
#'
#' @include decorate_masses.R
#' @include dist_groups.R
#' @include filter_nitrogen_rule.R
#' @include harmonize_adducts.R
#' @include round_reals.R
#'
#' @param features Table containing your previous annotation to complement
#' @param filter_nitro Filter according to Nitrogen rule. Boolean
#' @param output_annotations Output for mass based structural annotations
#' @param output_edges Output for mass based edges
#' @param name_source Name of the source features column
Expand Down Expand Up @@ -36,7 +35,6 @@
#' @examples NULL
annotate_masses <-
function(features = get_params(step = "annotate_masses")$files$features$prepared,

Check warning on line 37 in R/annotate_masses.R

View workflow job for this annotation

GitHub Actions / MegaLinter

file=/github/workspace/R/annotate_masses.R,line=37,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 83 characters.
filter_nitro = get_params(step = "annotate_masses")$options$nitrogen_rule,
output_annotations = get_params(step = "annotate_masses")$files$annotations$prepared$structural$ms1,

Check warning on line 38 in R/annotate_masses.R

View workflow job for this annotation

GitHub Actions / MegaLinter

file=/github/workspace/R/annotate_masses.R,line=38,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 111 characters.
output_edges = get_params(step = "annotate_masses")$files$networks$spectral$edges$raw,

Check warning on line 39 in R/annotate_masses.R

View workflow job for this annotation

GitHub Actions / MegaLinter

file=/github/workspace/R/annotate_masses.R,line=39,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 97 characters.
name_source = get_params(step = "annotate_masses")$names$source,
Expand All @@ -53,7 +51,6 @@ annotate_masses <-
ms_mode = get_params(step = "annotate_masses")$ms$polarity,
tolerance_ppm = get_params(step = "annotate_masses")$ms$tolerances$mass$ppm$ms1,
tolerance_rt = get_params(step = "annotate_masses")$ms$tolerances$rt$minutes) {
# TODO not using MS1 threshold
stopifnot("Your ppm tolerance must be <= 20" = tolerance_ppm <= 20)
stopifnot("Your rt tolerance must be <= 0.05" = tolerance_rt <= 0.05)

Expand All @@ -72,7 +69,6 @@ annotate_masses <-
clusters <- clusters_list$neg
}

## slim it
structure_organism_pairs_table <-
tidytable::fread(
file = library,
Expand Down Expand Up @@ -144,25 +140,20 @@ annotate_masses <-
"+2Na" = "+Na2", # mzmine
"+3K" = "+K3", # cliqueMS
"+3Na" = "+Na3", # cliqueMS
"+Acetate" = "C2H3O2", # mzmine
"+Acetate" = "+C2H3O2", # mzmine
"+ACN" = "+C2H3N", # mzmine
"+FA" = "CHO2", # mzmine
"+HAc" = "C2H4O2", # mzmine
"+HFA" = "CH2O2", # mzmine
"+CH3COO" = "+C2H3O2", # GNPS
"+FA" = "+CHO2", # mzmine
"+HAc" = "+C2H4O2", # mzmine
"+Hac" = "+C2H4O2", # GNPS
"+HFA" = "+CH2O2", # mzmine
"+IsoProp" = "+C3H8O", # mzmine
"+MeOH" = "+CH4O", # mzmine
"+NH4" = "+H4N" # mzmine
"+NH4" = "+H4N", # mzmine
"[M+CH3COO]-/[M-CH3]-" = "[M+CH3COO]-" # weird MassBank
)
log_debug("Trying to harmonize adducts definitions...")
features_table <- features_table |>
tidytable::mutate(
adduct = stringi::stri_replace_all_fixed(
str = adduct,
pattern = names(adducts_translations),
replacement = adducts_translations,
vectorize_all = FALSE
)
)
harmonize_adducts()
}

df_fea_min <- features_table |>
Expand Down Expand Up @@ -210,7 +201,7 @@ annotate_masses <-
delta_max = (mz_dest + (1E-6 * tolerance_ppm * (mz + mz_dest) / 2) - mz)
)

rm(df_rt_tol)
rm(df_rt_tol, features_table)

adducts_table <- adducts |>
tidytable::tidytable() |>
Expand Down Expand Up @@ -372,8 +363,6 @@ annotate_masses <-
))
rm(df_adducted, df_nl_min)

# ISSUE see #141 dictionary of adducts (example 2H2O in mzmine)

df_addlossed_min <- df_addlossed |>
tidytable::mutate_rowwise(mass = calculate_mass_of_m(adduct_string = adduct, mz = mz))

Expand Down Expand Up @@ -480,7 +469,6 @@ annotate_masses <-
tidytable::distinct()
rm(adducts_table_multi)


log_debug("joining within given rt tolerance \n")
df_multi_nl <- df_multi |>
dplyr::inner_join(
Expand Down Expand Up @@ -537,14 +525,9 @@ annotate_masses <-

rm(df_annotated_1, df_annotated_2, df_str_unique)

df_annotated_filtered <- df_annotated_final |>
filter_nitrogen_rule(features_table = features_table, filter_nitro = filter_nitro)

rm(df_annotated_final, features_table)

log_debug("adding chemical classification")
df_final <- tidytable::left_join(
df_annotated_filtered,
df_annotated_final,
structure_organism_pairs_table |>
tidytable::distinct(
candidate_structure_inchikey_no_stereo = structure_inchikey_no_stereo,
Expand All @@ -568,7 +551,7 @@ annotate_masses <-
)) |>
tidytable::mutate(candidate_adduct = candidate_library) |>
tidytable::mutate(candidate_library = "TIMA MS1")
rm(structure_organism_pairs_table, df_annotated_filtered)
rm(structure_organism_pairs_table, df_annotated_final)

df_final |>
decorate_masses()
Expand Down
3 changes: 3 additions & 0 deletions R/annotate_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @details It takes two files as input.
#' A query file that will be matched against a library file.
#'
#' @include harmonize_adducts.R
#' @include import_spectra.R
#'
#' @param input Query file containing spectra. Currently an '.mgf' file
Expand Down Expand Up @@ -318,6 +319,8 @@ annotate_spectra <- function(input = get_params(step = "annotate_spectra")$files
"target_xlogp" = lib_xlogp,
"target_precursorMz" = lib_precursors
)
df_meta <- df_meta |>
harmonize_adducts(adducts_colname = "target_adduct")
rm(lib_precursors)
df_final <- df_final |>
tidytable::left_join(df_meta) |>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @export
#'
#' @examples NULL
taxize_spectra_benchmark <-
benchmark_taxize_spectra <-
function(input,
keys,
org_tax_ott,
Expand Down
24 changes: 14 additions & 10 deletions R/calculate_mass_of_m.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
#' @examples NULL
calculate_mass_of_m <- function(adduct_string, mz, electron_mass = 5.485799E-4) {
parsed_adduct <- parse_adduct(adduct_string)
return(((
parsed_adduct["n_charges"])[[1]] *
(mz +
parsed_adduct["n_iso"]) -
parsed_adduct["los_add_clu"] -
(parsed_adduct["n_charges"] *
parsed_adduct["charge"] *
electron_mass)
) /
parsed_adduct["n_mer"])
if (all(parsed_adduct == 0)) {
return(0)
} else {
return(((
(parsed_adduct["n_charges"])[[1]] *
(mz +
parsed_adduct["n_iso"]) -
parsed_adduct["los_add_clu"] -
(parsed_adduct["n_charges"] *
parsed_adduct["charge"] *
electron_mass)
) /
parsed_adduct["n_mer"]))
}
}
2 changes: 2 additions & 0 deletions R/cleanup_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@ cleanup_spectra <- function(spectra) {
spectra <- spectra |>
Spectra::combineSpectra(f = spectra$FEATURE_ID)
}
spectra <- spectra |>
Spectra::filterEmptySpectra()
return(spectra)
}
9 changes: 1 addition & 8 deletions R/filter_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
#'
#' @description This function filters initial annotations.
#'
#' @include filter_nitrogen_rule.R
#'
#' @param annotations Prepared annotations file
#' @param features Prepared features file
#' @param filter_nitro Filter according to Nitrogen rule. Boolean
#' @param rts Prepared retention time library
#' @param output Output file
#' @param tolerance_rt Tolerance to filter retention time
Expand All @@ -19,7 +16,6 @@
filter_annotations <-
function(annotations = get_params(step = "filter_annotations")$files$annotations$prepared$structural,
features = get_params(step = "filter_annotations")$files$features$prepared,
filter_nitro = get_params(step = "filter_annotations")$options$nitrogen_rule,
rts = get_params(step = "filter_annotations")$files$libraries$temporal$prepared,
output = get_params(step = "filter_annotations")$files$annotations$filtered,
tolerance_rt = get_params(step = "filter_annotations")$ms$tolerances$rt$minutes) {
Expand Down Expand Up @@ -96,12 +92,9 @@ filter_annotations <-
"Candidates were removed based on retention time."
)

features_annotated_table_3 <- features_annotated_table_2 |>
filter_nitrogen_rule(features_table = features_table, filter_nitro = filter_nitro)

## in case some features had a single filtered annotation
final_table <- features_table |>
tidytable::left_join(features_annotated_table_3)
tidytable::left_join(features_annotated_table_2)

rm(
features_table,
Expand Down
Loading

0 comments on commit 86b29f7

Please # to comment.