Merge pull request #160 from taxonomicallyinformedannotation/dev

Exposing more parameters
taxonomicallyinformedannotation · Jul 11, 2024 · 86b29f7 · 86b29f7
2 parents a8ccf52 + b333690
commit 86b29f7
Show file tree

Hide file tree

Showing 39 changed files with 1,239 additions and 460 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: timaR
 Title: Taxonomically Informed Metabolite Annotation
-Version: 2.9.6
+Version: 2.9.7
 Authors@R: c(
     person("Adriano", "Rutz", , "rutz@imsb.biol.ethz.ch", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0443-9902")),
@@ -22,7 +22,7 @@ Imports:
     dplyr (>= 1.1.4),
     DT (>= 0.33),
     fs (>= 1.6.4),
-    gt (>= 0.10.1),
+    gt (>= 0.11.0),
     httr2 (>= 1.0.1),
     igraph (>= 2.0.3),
     installr (>= 0.23.4),
@@ -67,12 +67,13 @@ Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
 Collate:
     'round_reals.R'
-    'filter_nitrogen_rule.R'
+    'harmonize_adducts.R'
     'dist_groups.R'
     'decorate_masses.R'
     'annotate_masses.R'
     'import_spectra.R'
     'annotate_spectra.R'
+    'benchmark_taxize_spectra.R'
     'calculate_entropy.R'
     'parse_adduct.R'
     'calculate_mass_of_m.R'
@@ -142,7 +143,6 @@ Collate:
     'prepare_params.R'
     'prepare_taxa.R'
     'replace_id.R'
-    'taxize_spectra_benchmark.R'
     'weight_chemo.R'
     'weight_bio.R'
     'weight_annotations.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(annotate_masses)
 export(annotate_spectra)
+export(benchmark_taxize_spectra)
 export(calculate_entropy)
 export(calculate_mass_of_m)
 export(clean_bio)
@@ -30,14 +31,14 @@ export(fake_hmdb)
 export(fake_lotus)
 export(fake_sop_columns)
 export(filter_annotations)
-export(filter_nitrogen_rule)
 export(get_example_sirius)
 export(get_file)
 export(get_gnps_tables)
 export(get_last_version_from_zenodo)
 export(get_massbank_spectra)
 export(get_organism_taxonomy_ott)
 export(get_params)
+export(harmonize_adducts)
 export(harmonize_names_sirius)
 export(harmonize_spectra)
 export(import_spectra)
@@ -78,7 +79,6 @@ export(select_sirius_columns_formulas)
 export(select_sirius_columns_structures)
 export(select_sop_columns)
 export(split_tables_sop)
-export(taxize_spectra_benchmark)
 export(weight_annotations)
 export(weight_bio)
 export(weight_chemo)
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 # timaR
 
+# timaR 2.9.7
+
+* Adding possibility to add internal libraries through the GUI (#159)
+* Expose more parameters to the GUI (#159)
+* Fix adducts and remove nitrogen rule
+* Updated benchmarking steps
+
 # timaR 2.9.6
 
 * Adding light-switch thanks to `pkgdown 2.1.0`.

diff --git a/R/annotate_masses.R b/R/annotate_masses.R
@@ -4,11 +4,10 @@
 #'
 #' @include decorate_masses.R
 #' @include dist_groups.R
-#' @include filter_nitrogen_rule.R
+#' @include harmonize_adducts.R
 #' @include round_reals.R
 #'
 #' @param features Table containing your previous annotation to complement
-#' @param filter_nitro Filter according to Nitrogen rule. Boolean
 #' @param output_annotations Output for mass based structural annotations
 #' @param output_edges Output for mass based edges
 #' @param name_source Name of the source features column
@@ -36,7 +35,6 @@
 #' @examples NULL
 annotate_masses <-
   function(features = get_params(step = "annotate_masses")$files$features$prepared,
-           filter_nitro = get_params(step = "annotate_masses")$options$nitrogen_rule,
            output_annotations = get_params(step = "annotate_masses")$files$annotations$prepared$structural$ms1,
            output_edges = get_params(step = "annotate_masses")$files$networks$spectral$edges$raw,
            name_source = get_params(step = "annotate_masses")$names$source,
@@ -53,7 +51,6 @@ annotate_masses <-
            ms_mode = get_params(step = "annotate_masses")$ms$polarity,
            tolerance_ppm = get_params(step = "annotate_masses")$ms$tolerances$mass$ppm$ms1,
            tolerance_rt = get_params(step = "annotate_masses")$ms$tolerances$rt$minutes) {
-    # TODO not using MS1 threshold
     stopifnot("Your ppm tolerance must be <= 20" = tolerance_ppm <= 20)
     stopifnot("Your rt tolerance must be <= 0.05" = tolerance_rt <= 0.05)
 
@@ -72,7 +69,6 @@ annotate_masses <-
       clusters <- clusters_list$neg
     }
 
-    ## slim it
     structure_organism_pairs_table <-
       tidytable::fread(
         file = library,
@@ -144,25 +140,20 @@ annotate_masses <-
           "+2Na" = "+Na2", # mzmine
           "+3K" = "+K3", # cliqueMS
           "+3Na" = "+Na3", # cliqueMS
-          "+Acetate" = "C2H3O2", # mzmine
+          "+Acetate" = "+C2H3O2", # mzmine
           "+ACN" = "+C2H3N", # mzmine
-          "+FA" = "CHO2", # mzmine
-          "+HAc" = "C2H4O2", # mzmine
-          "+HFA" = "CH2O2", # mzmine
+          "+CH3COO" = "+C2H3O2", # GNPS
+          "+FA" = "+CHO2", # mzmine
+          "+HAc" = "+C2H4O2", # mzmine
+          "+Hac" = "+C2H4O2", # GNPS
+          "+HFA" = "+CH2O2", # mzmine
           "+IsoProp" = "+C3H8O", # mzmine
           "+MeOH" = "+CH4O", # mzmine
-          "+NH4" = "+H4N" # mzmine
+          "+NH4" = "+H4N", # mzmine
+          "[M+CH3COO]-/[M-CH3]-" = "[M+CH3COO]-" # weird MassBank
         )
-      log_debug("Trying to harmonize adducts definitions...")
       features_table <- features_table |>
-        tidytable::mutate(
-          adduct = stringi::stri_replace_all_fixed(
-            str = adduct,
-            pattern = names(adducts_translations),
-            replacement = adducts_translations,
-            vectorize_all = FALSE
-          )
-        )
+        harmonize_adducts()
     }
 
     df_fea_min <- features_table |>
@@ -210,7 +201,7 @@ annotate_masses <-
         delta_max = (mz_dest + (1E-6 * tolerance_ppm * (mz + mz_dest) / 2) - mz)
       )
 
-    rm(df_rt_tol)
+    rm(df_rt_tol, features_table)
 
     adducts_table <- adducts |>
       tidytable::tidytable() |>
@@ -372,8 +363,6 @@ annotate_masses <-
       ))
     rm(df_adducted, df_nl_min)
 
-    # ISSUE see #141 dictionary of adducts (example 2H2O in mzmine)
-
     df_addlossed_min <- df_addlossed |>
       tidytable::mutate_rowwise(mass = calculate_mass_of_m(adduct_string = adduct, mz = mz))
 
@@ -480,7 +469,6 @@ annotate_masses <-
       tidytable::distinct()
     rm(adducts_table_multi)
 
-
     log_debug("joining within given rt tolerance \n")
     df_multi_nl <- df_multi |>
       dplyr::inner_join(
@@ -537,14 +525,9 @@ annotate_masses <-
 
     rm(df_annotated_1, df_annotated_2, df_str_unique)
 
-    df_annotated_filtered <- df_annotated_final |>
-      filter_nitrogen_rule(features_table = features_table, filter_nitro = filter_nitro)
-
-    rm(df_annotated_final, features_table)
-
     log_debug("adding chemical classification")
     df_final <- tidytable::left_join(
-      df_annotated_filtered,
+      df_annotated_final,
       structure_organism_pairs_table |>
         tidytable::distinct(
           candidate_structure_inchikey_no_stereo = structure_inchikey_no_stereo,
@@ -568,7 +551,7 @@ annotate_masses <-
       )) |>
       tidytable::mutate(candidate_adduct = candidate_library) |>
       tidytable::mutate(candidate_library = "TIMA MS1")
-    rm(structure_organism_pairs_table, df_annotated_filtered)
+    rm(structure_organism_pairs_table, df_annotated_final)
 
     df_final |>
       decorate_masses()

diff --git a/R/annotate_spectra.R b/R/annotate_spectra.R
@@ -5,6 +5,7 @@
 #' @details It takes two files as input.
 #'    A query file that will be matched against a library file.
 #'
+#' @include harmonize_adducts.R
 #' @include import_spectra.R
 #'
 #' @param input Query file containing spectra. Currently an '.mgf' file
@@ -318,6 +319,8 @@ annotate_spectra <- function(input = get_params(step = "annotate_spectra")$files
         "target_xlogp" = lib_xlogp,
         "target_precursorMz" = lib_precursors
       )
+      df_meta <- df_meta |>
+        harmonize_adducts(adducts_colname = "target_adduct")
       rm(lib_precursors)
       df_final <- df_final |>
         tidytable::left_join(df_meta) |>

diff --git a/R/taxize_spectra_benchmark.R → R/benchmark_taxize_spectra.R b/R/taxize_spectra_benchmark.R → R/benchmark_taxize_spectra.R
@@ -14,7 +14,7 @@
 #' @export
 #'
 #' @examples NULL
-taxize_spectra_benchmark <-
+benchmark_taxize_spectra <-
   function(input,
            keys,
            org_tax_ott,

diff --git a/R/calculate_mass_of_m.R b/R/calculate_mass_of_m.R
@@ -15,14 +15,18 @@
 #' @examples NULL
 calculate_mass_of_m <- function(adduct_string, mz, electron_mass = 5.485799E-4) {
   parsed_adduct <- parse_adduct(adduct_string)
-  return(((
-    parsed_adduct["n_charges"])[[1]] *
-    (mz +
-      parsed_adduct["n_iso"]) -
-    parsed_adduct["los_add_clu"] -
-    (parsed_adduct["n_charges"] *
-      parsed_adduct["charge"] *
-      electron_mass)
-  ) /
-    parsed_adduct["n_mer"])
+  if (all(parsed_adduct == 0)) {
+    return(0)
+  } else {
+    return(((
+      (parsed_adduct["n_charges"])[[1]] *
+        (mz +
+          parsed_adduct["n_iso"]) -
+        parsed_adduct["los_add_clu"] -
+        (parsed_adduct["n_charges"] *
+          parsed_adduct["charge"] *
+          electron_mass)
+    ) /
+      parsed_adduct["n_mer"]))
+  }
 }
diff --git a/R/cleanup_spectra.R b/R/cleanup_spectra.R
@@ -21,5 +21,7 @@ cleanup_spectra <- function(spectra) {
     spectra <- spectra |>
       Spectra::combineSpectra(f = spectra$FEATURE_ID)
   }
+  spectra <- spectra |>
+    Spectra::filterEmptySpectra()
   return(spectra)
 }
diff --git a/R/filter_annotations.R b/R/filter_annotations.R
@@ -2,11 +2,8 @@
 #'
 #' @description This function filters initial annotations.
 #'
-#' @include filter_nitrogen_rule.R
-#'
 #' @param annotations Prepared annotations file
 #' @param features Prepared features file
-#' @param filter_nitro Filter according to Nitrogen rule. Boolean
 #' @param rts Prepared retention time library
 #' @param output Output file
 #' @param tolerance_rt Tolerance to filter retention time
@@ -19,7 +16,6 @@
 filter_annotations <-
   function(annotations = get_params(step = "filter_annotations")$files$annotations$prepared$structural,
            features = get_params(step = "filter_annotations")$files$features$prepared,
-           filter_nitro = get_params(step = "filter_annotations")$options$nitrogen_rule,
            rts = get_params(step = "filter_annotations")$files$libraries$temporal$prepared,
            output = get_params(step = "filter_annotations")$files$annotations$filtered,
            tolerance_rt = get_params(step = "filter_annotations")$ms$tolerances$rt$minutes) {
@@ -96,12 +92,9 @@ filter_annotations <-
       "Candidates were removed based on retention time."
     )
 
-    features_annotated_table_3 <- features_annotated_table_2 |>
-      filter_nitrogen_rule(features_table = features_table, filter_nitro = filter_nitro)
-
     ## in case some features had a single filtered annotation
     final_table <- features_table |>
-      tidytable::left_join(features_annotated_table_3)
+      tidytable::left_join(features_annotated_table_2)
 
     rm(
       features_table,