-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcompile_metrics.R
50 lines (50 loc) · 2.68 KB
/
compile_metrics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -------------------------------------------------------------------------------------------------
# Copyright (c) 2024, DHS.
#
# This file is part of MixDeR and is licensed under the BSD license: see LICENSE.
#
# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
# Development Center.
# -------------------------------------------------------------------------------------------------
#' @title Compiling metrics for the range of allele 1 and allele 2 probablity thresholds
#'
#' @param x Data frame of allele probabilities and genotype calls
#' @param ref Data frame of reference genotypes
#' @param minimum_snps Minimum number of SNPs required
#' @param A1min Minimum value of allele 1 probability threshold
#' @param A1max Maximum value of allele 1 probability threshold
#' @param A2min Minimum value of allele 2 probability threshold
#' @param A2max Maximum value of allele 2 probability threshold
#' @param filter_missing TRUE/FALSE to filter SNPs with missing allele 2 values
#'
#' @return List of data frames compiled using the min/max values of the allele probability thresholds and the minimum number of SNPs
#' @export
#'
#' @importFrom stats setNames
compile_metrics = function(x, ref, minimum_snps, A1min, A1max, A2min, A2max, filter_missing) {
final_table = setNames(data.frame(matrix(ncol=8, nrow=0)), c("A1_Cutoff", "A2_Cutoff", "Total_SNPs", "N_noref", "SNPs_Tested", "N_Geno_Correct", "Genotype_Accuracy", "Heterozygosity"))
final_table_min = setNames(data.frame(matrix(ncol=8, nrow=0)), c("A1_Cutoff", "A2_Cutoff", "Total_SNPs", "N_noref", "SNPs_Tested", "N_Geno_Correct", "Genotype_Accuracy", "Heterozygosity"))
if (A1max < A1min | A2max < A2min) {
message("Allele ranges not entered correctly. Please re-run!")
stop()
}
for (t in seq(A2min, A2max, 0.01)) {
message(paste0("Allele Probability Threshold: ", t, "<br/>"))
for (j in seq(A1min, A1max, 0.01)) {
#message(paste0("Allele 1 threshold: ", j, "<br/>"))
new_row = calc_metrics(x, j, t, ref, minimum_snps, filter_missing)
if (!is.null(new_row)) {
final_table = rbind(final_table, new_row)
}
}
new_row_min = calc_metrics(x, "Min", t, ref, minimum_snps, filter_missing)
final_table_min = rbind(final_table_min, new_row_min)
}
sort_final_table = final_table %>%
arrange(desc(.data$Genotype_Accuracy))
sort_final_table_min = final_table_min %>%
arrange(desc(.data$Genotype_Accuracy))
return(list(sort_final_table, sort_final_table_min))
}