From 2f14599d4c13d4064a57bc6331fa7b20c28d324f Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 17 Sep 2021 16:38:45 -0700
Subject: [PATCH 01/16] introduce review_hotspots functionality

---
 R/utilities.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/R/utilities.R b/R/utilities.R
index bd2ddc45..1d81c448 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -538,6 +538,52 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL
   return(hot_ssms)
 }
 
+#' Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations
+#'
+#' @param annotated_maf A data frame in MAF format that has hotspots annotated using function annotate_hotspots().
+#' @param genes_of_interest List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported.
+#' @param genome_build Reference genome build for the coordinates in the mAF file. Currently only variations of hg19 genome build are supported.
+#'
+#' @return The same data frame with reviewed column "hot_spot"
+#' @export
+#' @import dplyr
+#'
+#' @examples
+#' hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm()), genes_of_interest=c("CREBBP"))
+
+review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", "CREBBP"), genome_build="hg19"){
+
+  # check genome build because CREBBP coordinates are hg19-based
+  if (!genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){
+    stop("Currently only variations of hg19 genome build are supported.")
+  }
+
+  # check that at least one of the currently supported genes are present
+  if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)<1){
+      stop("Currently only FOXO1, MYD88, and CREBBP are supported. Please specify one of these genes.")
+  }
+
+  # notify user that there is limited number of genes currently supported
+  if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 1 ){
+      print("Currently only FOXO1, MYD88, and CREBBP are supported. By default only these genes from the supplied list will be reviewed.")
+  }
+
+  if("FOXO1" %in% genes_of_interest){
+      annotated_maf <- annotated_maf %>%
+          dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="FOXO1" & HGVSp_Short == "p.M1?", "TRUE" , hot_spot))
+  }
+  if("CREBBP" %in% genes_of_interest){
+      annotated_maf <- annotated_maf %>%
+          dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > 3785000 & End_Position < 3791000 & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot))
+  }
+  if("MYD88" %in% genes_of_interest){
+      annotated_maf <- annotated_maf %>%
+          dplyr::mutate(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot)
+  }
+  return(annotated_maf)
+}
+
+
 #' Make a UCSC-ready custom track file from SV data
 #
 #' @param sv_bedpe A bedpe formatted data frame of SVs

From c420d147b77557d800d79965abdc3e9e4a8c7e38 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 17 Sep 2021 16:49:35 -0700
Subject: [PATCH 02/16] add hg38 support for review_hotspots

---
 R/utilities.R | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/R/utilities.R b/R/utilities.R
index 1d81c448..40f6d368 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -553,9 +553,18 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL
 
 review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", "CREBBP"), genome_build="hg19"){
 
-  # check genome build because CREBBP coordinates are hg19-based
-  if (!genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){
-    stop("Currently only variations of hg19 genome build are supported.")
+  # check genome build because CREBBP coordinates are hg19-based or hg38-based
+  coordinates <- list()
+  if (genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){
+    coordinates$start <- 3785000
+    coordinates$end <- 3791000
+    print(coordinates)
+  }else if(genome_build %in% c("hg38", "grch38", "GRCh38")){
+    coordinates$start <- 3734999
+    coordinates$end <- 3740999
+    print(coordinates)
+  }else{
+    stop("The genome build specified is not currently supported. Please provide MAF file in one of the following cordinates: hg19, grch37, hs37d5, GRCh37, hg38, grch38, or GRCh38")
   }
 
   # check that at least one of the currently supported genes are present
@@ -574,7 +583,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88",
   }
   if("CREBBP" %in% genes_of_interest){
       annotated_maf <- annotated_maf %>%
-          dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > 3785000 & End_Position < 3791000 & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot))
+          dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > coordinates$start & End_Position < coordinates$end & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot))
   }
   if("MYD88" %in% genes_of_interest){
       annotated_maf <- annotated_maf %>%

From e059c30a12996e58eb393bd9482e5807cb9d2047 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 17 Sep 2021 16:51:01 -0700
Subject: [PATCH 03/16] add documentation to review_hotspots

---
 R/utilities.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utilities.R b/R/utilities.R
index 40f6d368..c0aae096 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -542,7 +542,7 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL
 #'
 #' @param annotated_maf A data frame in MAF format that has hotspots annotated using function annotate_hotspots().
 #' @param genes_of_interest List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported.
-#' @param genome_build Reference genome build for the coordinates in the mAF file. Currently only variations of hg19 genome build are supported.
+#' @param genome_build Reference genome build for the coordinates in the MAF file. The default is hg19 genome build.
 #'
 #' @return The same data frame with reviewed column "hot_spot"
 #' @export

From df19258c437ce0ea768ca5533144302bc5ea7985 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 10:51:11 -0700
Subject: [PATCH 04/16] initial update for get_coding_ssm_status

---
 R/utilities.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/R/utilities.R b/R/utilities.R
index c0aae096..f12c84c3 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -4,6 +4,10 @@
 #' @param gene_symbols
 #' @param these_samples_metadata
 #' @param from_flatfile
+#' @param include_hotspots Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE.
+#' @param from_flatfile Integer value indicating minimal recurrence level
+#' @param review_hotspots Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE.
+#' @param ... Other parameters accepted by the review_hotspots() function
 #'
 #' @return
 #' @export
@@ -11,7 +15,14 @@
 #' @examples
 #' coding_tabulated_df = get_coding_ssm_status(gene_symbols=c("MYC","KMT2D"))
 #' coding_tabulated_df = get_coding_ssm_status() #all lymphoma genes from bundled NHL gene list
-get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfile=TRUE){
+get_coding_ssm_status = function(gene_symbols,
+                                  these_samples_metadata,
+                                  from_flatfile=TRUE,
+                                  include_hotspots=TRUE,
+                                  recurrence_min = 5,
+                                  review_hotspots=TRUE,
+                                  genes_of_interest = c("FOXO1", "MYD88", "CREBBP"),
+                                  genome_build = "hg19"){
   if(missing(gene_symbols)){
     message("defaulting to all lymphoma genes")
     gene_symbols = pull(lymphoma_genes,Gene)
@@ -20,7 +31,10 @@ get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfi
     these_samples_metadata = get_gambl_metadata()
   }
 
-  coding = get_coding_ssm(from_flatfile=from_flatfile) %>%
+  # call it once so the object can be reused if user wants to annotate hotspots
+  coding_ssm = get_coding_ssm(from_flatfile=from_flatfile)
+
+  coding = coding_ssm %>%
     dplyr::filter(Hugo_Symbol %in% gene_symbols &
                     Variant_Classification != "Synonymous") %>%
     dplyr::select(Tumor_Sample_Barcode,Hugo_Symbol) %>%
@@ -33,6 +47,44 @@ get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfi
   #complete(wide_coding,fill=list("sample_id"=samples_table$sample_id))
   all_tabulated = left_join(samples_table,wide_coding)
   all_tabulated = all_tabulated %>% replace(is.na(.), 0)
+
+  # include hotspots if user chooses to do so
+  if(include_hotspots){
+    # first annotate
+    annotated = annotate_hotspots(coding_ssm, recurrence_min = recurrence_min)
+    # review for the supported genes
+    if(review_hotspots){
+      annotated = review_hotspots(annotated, genes_of_interest = genes_of_interest, genome_build = genome_build)
+    }
+    hotspots = annotated %>%
+              dplyr::filter(Hugo_Symbol %in% gene_symbols) %>%
+              dplyr::select(Tumor_Sample_Barcode,Hugo_Symbol, hot_spot) %>%
+              dplyr::rename("sample_id"="Tumor_Sample_Barcode","gene"="Hugo_Symbol") %>%
+              dplyr::mutate(gene=paste0(gene, "HOTSPOT")) %>%
+              unique() %>%
+              dplyr::mutate(mutated=ifelse(hot_spot=="TRUE", 1, 0)) %>%
+              replace(is.na(.), 0) %>%
+              dplyr::filter(mutated==1) %>%
+              dplyr::select(-hot_spot)
+
+    # long to wide hotspots, samples are tabulated with 0 if no hotspot is detected
+    wide_hotspots = pivot_wider(hotspots,names_from = "gene",
+                          values_from="mutated",values_fill = 0)
+    # join with the ssm object
+    all_tabulated = left_join(all_tabulated,wide_hotspots)
+    all_tabulated = all_tabulated %>% replace(is.na(.), 0)
+    # make SSM and hotspots non-redundant by giving priority to hotspot feature and setting SSM to 0
+    for (hotspot_site in colnames(wide_hotspots)[grepl("HOTSPOT", colnames(wide_hotspots))]){
+          this_gene = gsub("HOTSPOT", "", hotspot_site)
+          redundant_features = all_tabulated %>% dplyr::select(starts_with(this_gene))
+          # if not both the gene and the hotspot are present, go to the next iteration
+          if(ncol(redundant_features)!=2) next
+          # if both gene and it's hotspot are in the matrix, give priority to hotspot feature
+          all_tabulated[(all_tabulated[,this_gene]>0 & all_tabulated[,paste0(this_gene, "HOTSPOT")]==1),][,c(this_gene, paste0(this_gene, "HOTSPOT"))][,this_gene] = 0
+    }
+
+  }
+
   return(all_tabulated)
 }
 
@@ -558,11 +610,9 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88",
   if (genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){
     coordinates$start <- 3785000
     coordinates$end <- 3791000
-    print(coordinates)
   }else if(genome_build %in% c("hg38", "grch38", "GRCh38")){
     coordinates$start <- 3734999
     coordinates$end <- 3740999
-    print(coordinates)
   }else{
     stop("The genome build specified is not currently supported. Please provide MAF file in one of the following cordinates: hg19, grch37, hs37d5, GRCh37, hg38, grch38, or GRCh38")
   }
@@ -573,7 +623,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88",
   }
 
   # notify user that there is limited number of genes currently supported
-  if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 1 ){
+  if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 3 ){
       print("Currently only FOXO1, MYD88, and CREBBP are supported. By default only these genes from the supplied list will be reviewed.")
   }
 
@@ -587,7 +637,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88",
   }
   if("MYD88" %in% genes_of_interest){
       annotated_maf <- annotated_maf %>%
-          dplyr::mutate(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot)
+          dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot))
   }
   return(annotated_maf)
 }

From fd8caef6537212b250bbeeeccc0c2653b95362d7 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 10:53:51 -0700
Subject: [PATCH 05/16] small cleanup

---
 R/utilities.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/utilities.R b/R/utilities.R
index f12c84c3..f6e7507b 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -63,7 +63,6 @@ get_coding_ssm_status = function(gene_symbols,
               dplyr::mutate(gene=paste0(gene, "HOTSPOT")) %>%
               unique() %>%
               dplyr::mutate(mutated=ifelse(hot_spot=="TRUE", 1, 0)) %>%
-              replace(is.na(.), 0) %>%
               dplyr::filter(mutated==1) %>%
               dplyr::select(-hot_spot)
 

From 39aa6de9a89891f6af4f25045c2e74687fb03ebb Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 13:11:18 -0700
Subject: [PATCH 06/16] add documentaion for the new functionality

---
 NAMESPACE                    |  1 +
 R/utilities.R                |  6 +++---
 man/get_coding_ssm_status.Rd | 19 +++++++++++++++++--
 man/review_hotspots.Rd       | 28 ++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 5 deletions(-)
 create mode 100644 man/review_hotspots.Rd

diff --git a/NAMESPACE b/NAMESPACE
index dc26ddf9..b4f5e2ea 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -58,6 +58,7 @@ export(process_all_manta_bedpe)
 export(referesh_metadata_tables)
 export(refresh_full_table)
 export(region_to_chunks)
+export(review_hotspots)
 export(sanitize_maf_data)
 export(setup_fusions)
 export(setup_study)
diff --git a/R/utilities.R b/R/utilities.R
index f6e7507b..904d49f7 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1,9 +1,9 @@
 
 #' Tabulate mutation status for non-silent SSMs for a set of genes
 #'
-#' @param gene_symbols
-#' @param these_samples_metadata
-#' @param from_flatfile
+#' @param gene_symbols List of gene symbols for which the mutation status will be tabulated. If not provided, lymphoma genes will be returned by default.
+#' @param these_samples_metadata The matedata for samples of interest to be included in the returned matrix. Only the column "sample_id" is required. If not provided, the matrix is tabulated for all available samples as default.
+#' @param from_flatfile Optional argument whether to use database or flat file to retrieve mutations.
 #' @param include_hotspots Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE.
 #' @param from_flatfile Integer value indicating minimal recurrence level
 #' @param review_hotspots Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE.
diff --git a/man/get_coding_ssm_status.Rd b/man/get_coding_ssm_status.Rd
index 30f4d7c5..bda38b7f 100644
--- a/man/get_coding_ssm_status.Rd
+++ b/man/get_coding_ssm_status.Rd
@@ -7,11 +7,26 @@
 get_coding_ssm_status(
   gene_symbols,
   these_samples_metadata,
-  from_flatfile = TRUE
+  from_flatfile = TRUE,
+  include_hotspots = TRUE,
+  recurrence_min = 5,
+  review_hotspots = TRUE,
+  genes_of_interest = c("FOXO1", "MYD88", "CREBBP"),
+  genome_build = "hg19"
 )
 }
 \arguments{
-\item{from_flatfile}{}
+\item{gene_symbols}{List of gene symbols for which the mutation status will be tabulated. If not provided, lymphoma genes will be returned by default.}
+
+\item{these_samples_metadata}{The matedata for samples of interest to be included in the returned matrix. Only the column "sample_id" is required. If not provided, the matrix is tabulated for all available samples as default.}
+
+\item{from_flatfile}{Integer value indicating minimal recurrence level}
+
+\item{include_hotspots}{Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE.}
+
+\item{review_hotspots}{Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE.}
+
+\item{...}{Other parameters accepted by the review_hotspots() function}
 }
 \value{
 
diff --git a/man/review_hotspots.Rd b/man/review_hotspots.Rd
new file mode 100644
index 00000000..695907ae
--- /dev/null
+++ b/man/review_hotspots.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utilities.R
+\name{review_hotspots}
+\alias{review_hotspots}
+\title{Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations}
+\usage{
+review_hotspots(
+  annotated_maf,
+  genes_of_interest = c("FOXO1", "MYD88", "CREBBP"),
+  genome_build = "hg19"
+)
+}
+\arguments{
+\item{annotated_maf}{A data frame in MAF format that has hotspots annotated using function annotate_hotspots().}
+
+\item{genes_of_interest}{List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported.}
+
+\item{genome_build}{Reference genome build for the coordinates in the MAF file. The default is hg19 genome build.}
+}
+\value{
+The same data frame with reviewed column "hot_spot"
+}
+\description{
+Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations
+}
+\examples{
+hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm()), genes_of_interest=c("CREBBP"))
+}

From 155749e99ba67411f56f534c929e384d0bd599a3 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 13:55:28 -0700
Subject: [PATCH 07/16] new function overused in clustering workflow

---
 R/utilities.R | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/R/utilities.R b/R/utilities.R
index 904d49f7..a90b6868 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1630,3 +1630,47 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr
   return(OUTPUT)
   message("Done!")
 }
+
+
+
+#' Using GISTIC2.0 outputs, perform Fisher's exact test to compare CNV frequencies between 2 groups
+#'
+#' @param incoming_matrix A matrix or data frame that should be filled.
+#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix.
+#' @param fill_in_values Value that will be used to fill in the matrix.
+#' @param normalize_order Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE.
+#' @param samples_in_rows Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features.
+#'
+#' @return a data frame with maintained orientation (rows and columns) where samples from the supplied list are present and reordered according to the specified order
+#' @export
+#'
+#' @examples
+#' partial_matrix = get_coding_ssm_status(these_samples_metadata = (get_gambl_metadata(case_set = "BL--DLBCL") %>% filter(pairing_status=="unmatched")), include_hotspots = FALSE)
+#' complete_matrix = complete_missing_from_matrix(partial_matrix, get_gambl_metadata() %>% pull(sample_id))
+complete_missing_from_matrix = function(incoming_matrix,
+                                        list_of_samples,
+                                        fill_in_values = 0,
+                                        normalize_order=TRUE,
+                                        samples_in_rows=TRUE){
+
+  if(!samples_in_rows){
+    incoming_matrix = as.data.frame(incoming_matrix) %>% t()
+  }
+
+  matrix_with_all_samples <- rbind(incoming_matrix,
+        matrix(fill_in_values:fill_in_values,# populate matrix with all 0
+               length(setdiff(list_of_samples, rownames(incoming_matrix))), # how many rows
+               ncol(incoming_matrix), # how many columns
+               dimnames = list(setdiff(list_of_samples, rownames(incoming_matrix)), # name rows with sample IDs
+                               colnames(incoming_matrix))) %>% # name columns with gene names
+          as.data.frame(.))
+  if(normalize_order){
+    matrix_with_all_samples = matrix_with_all_samples[ order(match(rownames(matrix_with_all_samples), list_of_samples)),]
+  }
+
+  if(!samples_in_rows){
+    matrix_with_all_samples = as.data.frame(matrix_with_all_samples) %>% t()
+  }
+
+  return(matrix_with_all_samples)
+}

From 6f7f75909f802b0878f891375a40b89db608b6ad Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 13:58:46 -0700
Subject: [PATCH 08/16] add documentation

---
 NAMESPACE                           |  1 +
 R/utilities.R                       |  2 +-
 man/complete_missing_from_matrix.Rd | 35 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 man/complete_missing_from_matrix.Rd

diff --git a/NAMESPACE b/NAMESPACE
index b4f5e2ea..08bc6615 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,6 +20,7 @@ export(collate_nfkbiz_results)
 export(collate_results)
 export(collate_sbs_results)
 export(collate_sv_results)
+export(complete_missing_from_matrix)
 export(copy_number_vaf_plot)
 export(fetch_output_files)
 export(finalize_study)
diff --git a/R/utilities.R b/R/utilities.R
index a90b6868..d179bc3b 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1633,7 +1633,7 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr
 
 
 
-#' Using GISTIC2.0 outputs, perform Fisher's exact test to compare CNV frequencies between 2 groups
+#' If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency
 #'
 #' @param incoming_matrix A matrix or data frame that should be filled.
 #' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix.
diff --git a/man/complete_missing_from_matrix.Rd b/man/complete_missing_from_matrix.Rd
new file mode 100644
index 00000000..6a1689d7
--- /dev/null
+++ b/man/complete_missing_from_matrix.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utilities.R
+\name{complete_missing_from_matrix}
+\alias{complete_missing_from_matrix}
+\title{If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency}
+\usage{
+complete_missing_from_matrix(
+  incoming_matrix,
+  list_of_samples,
+  fill_in_values = 0,
+  normalize_order = TRUE,
+  samples_in_rows = TRUE
+)
+}
+\arguments{
+\item{incoming_matrix}{A matrix or data frame that should be filled.}
+
+\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix.}
+
+\item{fill_in_values}{Value that will be used to fill in the matrix.}
+
+\item{normalize_order}{Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE.}
+
+\item{samples_in_rows}{Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features.}
+}
+\value{
+a data frame with maintained orientation (rows and columns) where samples from the supplied list are present and reordered according to the specified order
+}
+\description{
+If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency
+}
+\examples{
+partial_matrix = get_coding_ssm_status(these_samples_metadata = (get_gambl_metadata(case_set = "BL--DLBCL") \%>\% filter(pairing_status=="unmatched")), include_hotspots = FALSE)
+complete_matrix = complete_missing_from_matrix(partial_matrix, get_gambl_metadata() \%>\% pull(sample_id))
+}

From 4e167e7e7f796d9fe5077e6d0143115b52fa0726 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 14:05:53 -0700
Subject: [PATCH 09/16] in-line documentation and some checks for required
 arguments

---
 R/utilities.R | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/R/utilities.R b/R/utilities.R
index d179bc3b..16a9f73e 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1635,8 +1635,8 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr
 
 #' If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency
 #'
-#' @param incoming_matrix A matrix or data frame that should be filled.
-#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix.
+#' @param incoming_matrix A matrix or data frame that should be filled. Required parameter.
+#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix. Required parameter.
 #' @param fill_in_values Value that will be used to fill in the matrix.
 #' @param normalize_order Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE.
 #' @param samples_in_rows Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features.
@@ -1653,21 +1653,34 @@ complete_missing_from_matrix = function(incoming_matrix,
                                         normalize_order=TRUE,
                                         samples_in_rows=TRUE){
 
+  # check for required arguments
+  if (missing(incoming_matrix)){
+      stop("Please provide initial matrix to fill.")
+  }
+
+  if (missing(list_of_samples)){
+      stop("Please provide list of samples to complete the matrix and normalize order.")
+  }
+
+  # is samples are in columns, transpose the matrix so code below is generalizable
   if(!samples_in_rows){
     incoming_matrix = as.data.frame(incoming_matrix) %>% t()
   }
 
   matrix_with_all_samples <- rbind(incoming_matrix,
-        matrix(fill_in_values:fill_in_values,# populate matrix with all 0
+        matrix(fill_in_values:fill_in_values, # populate matrix with all 0
                length(setdiff(list_of_samples, rownames(incoming_matrix))), # how many rows
                ncol(incoming_matrix), # how many columns
                dimnames = list(setdiff(list_of_samples, rownames(incoming_matrix)), # name rows with sample IDs
                                colnames(incoming_matrix))) %>% # name columns with gene names
           as.data.frame(.))
+
+  # this is very helpful in clustering
   if(normalize_order){
     matrix_with_all_samples = matrix_with_all_samples[ order(match(rownames(matrix_with_all_samples), list_of_samples)),]
   }
 
+  # transpose matrix back to the initial format supplied by user (samples in columns)
   if(!samples_in_rows){
     matrix_with_all_samples = as.data.frame(matrix_with_all_samples) %>% t()
   }

From 06f977556aedeebec0d333e8076c7541ad790e37 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Fri, 24 Sep 2021 14:35:25 -0700
Subject: [PATCH 10/16] handle numeric columns specified in metadata for
 prettyOncoplot

---
 R/viz.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/viz.R b/R/viz.R
index 9ae75fd9..9e681992 100644
--- a/R/viz.R
+++ b/R/viz.R
@@ -363,6 +363,12 @@ prettyOncoplot = function(maftools_obj,
     }
   }
 
+  if(!missing(numericMetadataColumns)){
+    message(paste0("The column(s) ", numericMetadataColumns, " specified both in metadata and numeric metadata. Plotting as numeric values..."))
+    metadataColumns = metadataColumns[!metadataColumns %in% numericMetadataColumns]
+  }
+
+
   if(missing(onco_matrix_path)){
     onco_matrix_path="onco_matrix.txt"
   }

From 7d5d49b6bbfd97f0acb58c18c3b9ece0c7d84050 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 7 Oct 2021 22:43:09 -0700
Subject: [PATCH 11/16] add ggplot theme

---
 NAMESPACE                           |  1 +
 R/viz.R                             | 47 +++++++++++++++++++++++++++++
 man/complete_missing_from_matrix.Rd |  4 +--
 man/theme_Morons.Rd                 | 33 ++++++++++++++++++++
 4 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 man/theme_Morons.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 08bc6615..6782872c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -65,6 +65,7 @@ export(setup_fusions)
 export(setup_study)
 export(sv_to_bedpe_file)
 export(sv_to_custom_track)
+export(theme_Morons)
 export(tidy_gene_expression)
 export(trim_scale_expression)
 import(ComplexHeatmap)
diff --git a/R/viz.R b/R/viz.R
index 9e681992..079e5b24 100644
--- a/R/viz.R
+++ b/R/viz.R
@@ -1255,3 +1255,50 @@ prettyChromoplot = function(scores,
     geom_hline(yintercept = 0, size=7) +
     geom_text(aes(label = Chromosome, x = xses, y = 0), size = 4, color="white")
 }
+
+#' Define function for consistent plot theme
+#'
+#' @param base_size Size of the font on the plot. Defaults to 14
+#' @param base_family Font family to be used on the plot. Defaults to Arial. Always use cairo device when saving the resulting plot!
+#' @param my_legend_position Where to draw the legend? Defaults to the bottom of the plot
+#' @param my_legend_direction Which direction to draw the legend? Defaults to horizontal
+#'
+#'
+#' @return nothing
+#' @export
+#' @import ggplot2
+#'
+#' @examples
+#' ggplot(mpg, aes(displ, hwy, colour = class)) +
+#' geom_point() +
+#' theme_Morons
+
+theme_Morons <- function(base_size=14,
+                        base_family="Arial",
+                        my_legend_position="bottom",
+                        my_legend_direction = "horizontal") {
+  library(ggthemes)
+  (theme_foundation(base_size=base_size, base_family=base_family)
+    + theme(plot.title = element_text(face = "bold",
+                                      size = rel(1.2), hjust = 0.5),
+            text = element_text(colour = "black"),
+            panel.background = element_rect(colour = NA),
+            plot.background = element_rect(colour = NA),
+            panel.border = element_rect(colour = NA),
+            axis.title = element_text(face = "bold",size = rel(1.2)),
+            axis.title.y = element_text(angle=90,vjust =2),
+            axis.title.x = element_text(vjust = -0.2),
+            axis.text = element_text(size = base_size, family=base_family),
+            axis.line = element_line(colour="black", size = rel(0.8)),
+            axis.ticks = element_line(),
+            panel.grid.major = element_line(colour="#f0f0f0"),
+            panel.grid.minor = element_blank(),
+            legend.key = element_rect(colour = NA),
+            legend.position = my_legend_position,
+            legend.direction = my_legend_direction,
+            legend.title = element_text(face="italic"),
+            strip.background = element_rect(
+              color="black", fill="white", size=1, linetype="solid"),
+            strip.text = element_text(face="bold")
+    ))
+}
\ No newline at end of file
diff --git a/man/complete_missing_from_matrix.Rd b/man/complete_missing_from_matrix.Rd
index 6a1689d7..466e3f9b 100644
--- a/man/complete_missing_from_matrix.Rd
+++ b/man/complete_missing_from_matrix.Rd
@@ -13,9 +13,9 @@ complete_missing_from_matrix(
 )
 }
 \arguments{
-\item{incoming_matrix}{A matrix or data frame that should be filled.}
+\item{incoming_matrix}{A matrix or data frame that should be filled. Required parameter.}
 
-\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix.}
+\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix. Required parameter.}
 
 \item{fill_in_values}{Value that will be used to fill in the matrix.}
 
diff --git a/man/theme_Morons.Rd b/man/theme_Morons.Rd
new file mode 100644
index 00000000..5665279f
--- /dev/null
+++ b/man/theme_Morons.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/viz.R
+\name{theme_Morons}
+\alias{theme_Morons}
+\title{Define function for consistent plot theme}
+\usage{
+theme_Morons(
+  base_size = 14,
+  base_family = "Arial",
+  my_legend_position = "bottom",
+  my_legend_direction = "horizontal"
+)
+}
+\arguments{
+\item{base_size}{Size of the font on the plot. Defaults to 14}
+
+\item{base_family}{Font family to be used on the plot. Defaults to Arial. Always use cairo device when saving the resulting plot!}
+
+\item{my_legend_position}{Where to draw the legend? Defaults to the bottom of the plot}
+
+\item{my_legend_direction}{Which direction to draw the legend? Defaults to horizontal}
+}
+\value{
+nothing
+}
+\description{
+Define function for consistent plot theme
+}
+\examples{
+ggplot(mpg, aes(displ, hwy, colour = class)) + 
+geom_point() +
+theme_Morons
+}

From 8ab9c64e43f285dce0888628e5dfd99a13d9feb6 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 7 Oct 2021 22:53:52 -0700
Subject: [PATCH 12/16] silly bug fix

---
 R/viz.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/viz.R b/R/viz.R
index 079e5b24..a9c04979 100644
--- a/R/viz.R
+++ b/R/viz.R
@@ -1271,7 +1271,7 @@ prettyChromoplot = function(scores,
 #' @examples
 #' ggplot(mpg, aes(displ, hwy, colour = class)) +
 #' geom_point() +
-#' theme_Morons
+#' theme_Morons()
 
 theme_Morons <- function(base_size=14,
                         base_family="Arial",

From 957e7ad791e650b081b945d79b591641845c755d Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 14 Oct 2021 09:38:02 -0700
Subject: [PATCH 13/16] add indexed flatfile support for strelka2

---
 R/database.R  | 38 ++++++++++++++++++++++++++++++--------
 R/utilities.R | 20 +++++++++++++++-----
 config.yml    |  1 +
 3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/R/database.R b/R/database.R
index 386d3bb3..b7c72ea1 100644
--- a/R/database.R
+++ b/R/database.R
@@ -726,7 +726,7 @@ get_ssm_by_gene = function(gene_symbol,coding_only=FALSE,rename_splice_region=TR
 #' regions_bed = grch37_ashm_regions %>% mutate(name=paste(gene,region,sep="_"))
 #' ashm_maf=get_ssm_by_regions(regions_bed=regions_bed,streamlined=TRUE,use_name_column=use_name_column)
 
-get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_data=maf_data,use_name_column=FALSE,from_indexed_flatfile=FALSE){
+get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_data=maf_data,use_name_column=FALSE,from_indexed_flatfile=FALSE, mode="slms-3"){
   bed2region=function(x){
     paste0(x[1],":",as.numeric(x[2]),"-",as.numeric(x[3]))
   }
@@ -738,9 +738,9 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat
     }
   }
   if(missing(maf_data)){
-    region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,from_indexed_flatfile = from_indexed_flatfile)})
+    region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,from_indexed_flatfile = from_indexed_flatfile, mode=mode)})
   }else{
-    region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,maf_data=maf_data,from_indexed_flatfile = from_indexed_flatfile)})
+    region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,maf_data=maf_data,from_indexed_flatfile = from_indexed_flatfile, mode=mode)})
   }
   if(!use_name_column){
     rn = regions
@@ -788,19 +788,32 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat
 #' my_mutations=get_ssm_by_region(chromosome="8",qstart=128723128,qend=128774067)
 get_ssm_by_region = function(chromosome,qstart,qend,
                              region="",basic_columns=TRUE,streamlined=FALSE,maf_data,
-                             from_indexed_flatfile=FALSE){
+                             from_indexed_flatfile=FALSE,
+                             mode="slms-3"){
   tabix_bin = "/home/rmorin/miniconda3/bin/tabix"
   table_name = config::get("results_tables")$ssm
   db=config::get("database_name")
   if(from_indexed_flatfile){
     base_path = config::get("project_base")
     #test if we have permissions for the full gambl + icgc merge
-    maf_partial_path = config::get("results_filatfiles")$ssm$all$full
+    if(mode=="slms-3"){
+      maf_partial_path = config::get("results_filatfiles")$ssm$all$full
+    }else if (mode=="strelka2"){
+      maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2
+    }else{
+      stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.")
+    }
     maf_path = paste0(base_path,maf_partial_path)
     maf_permissions = file.access(maf_path,4)
     if(maf_permissions == -1){
       #currently this will only return non-ICGC results
-      maf_partial_path = config::get("results_filatfiles")$ssm$gambl$full
+      if(mode=="slms-3"){
+        maf_partial_path = config::get("results_filatfiles")$ssm$gambl$full
+      }else if (mode=="strelka2"){
+        maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2
+      }else{
+        stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.")
+      }
       base_path = config::get("project_base")
       #default is non-ICGC
       maf_path = paste0(base_path,maf_partial_path)
@@ -829,10 +842,19 @@ get_ssm_by_region = function(chromosome,qstart,qend,
     if(from_indexed_flatfile){
       streamlined = TRUE
       muts = system(paste(tabix_bin,maf_path,region),intern=TRUE)
-      if(length(muts)>0){
+      if(length(muts)>1){
         muts_region = readr::read_tsv(muts,col_names=c("Chromosome","Start_Position",
                                                   "End_Position","Tumor_Sample_Barcode"))
-      }else{
+      # this is necessary because when only one row is returned, read_tsv thinks it is a file name
+      }else if (length(muts)==1){
+        region_with_one_row <- stringr::str_split(muts, "\t", n=4)
+
+        muts_region = data.frame(Chromosome=unlist(region_with_one_row)[1],
+           Start_Position=as.numeric(unlist(region_with_one_row)[2]),
+           End_Position=as.numeric(unlist(region_with_one_row)[3]),
+           Tumor_Sample_Barcode=unlist(region_with_one_row)[4],
+           stringsAsFactors=FALSE)
+      } else {
         muts_region = data.frame(Chromosome=character(),
                                  Start_Position=character(),
                                  End_Position=character(),
diff --git a/R/utilities.R b/R/utilities.R
index 16a9f73e..ef6d8b59 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -149,7 +149,9 @@ get_mutation_frequency_bin_matrix = function(regions,
                                   show_gene_colours=FALSE,
                                   legend_row=3,
                                   legend_col=3,
-                                  legend_direction="horizontal"){
+                                  legend_direction="horizontal",
+                                  from_indexed_flatfile=FALSE,
+                                  mode="slms-3"){
 
     if(missing(regions)){
       if(missing(regions_df)){
@@ -163,7 +165,8 @@ get_mutation_frequency_bin_matrix = function(regions,
     this_region=x,drop_unmutated = TRUE,
     slide_by = slide_by,plot_type="none",window_size=window_size,
     min_count_per_bin=min_count_per_bin,return_count = TRUE,
-    metadata = these_samples_metadata)})
+    metadata = these_samples_metadata,
+    from_indexed_flatfile=from_indexed_flatfile, mode=mode)})
 
   all= do.call("rbind",dfs)
   #add a fake bin for one gene and make every patient not mutated in it (to fill gaps)
@@ -326,7 +329,9 @@ calc_mutation_frequency_sliding_windows =
            min_count_per_bin=3,
            return_count = FALSE,
            drop_unmutated=FALSE,
-           classification_column="lymphgen"){
+           classification_column="lymphgen",
+           from_indexed_flatfile=FALSE,
+           mode="slms-3"){
 
 
   max_region = 1000000
@@ -357,11 +362,16 @@ calc_mutation_frequency_sliding_windows =
   windows.dt = as.data.table(windows)
 
 
-  region_ssm = GAMBLR::get_ssm_by_region(region=this_region,streamlined = TRUE) %>%
+  region_ssm = GAMBLR::get_ssm_by_region(region=this_region,streamlined = TRUE, from_indexed_flatfile=from_indexed_flatfile, mode=mode) %>%
     dplyr::rename(c("start"="Start_Position","sample_id"="Tumor_Sample_Barcode")) %>%
     mutate(mutated=1)
 
-  region.dt = mutate(region_ssm,end=start+1) %>% as.data.table()
+  region.dt = region_ssm %>%
+    dplyr::mutate(start=as.numeric(as.character(start)),
+                  end=start+1,
+                  end=as.numeric(as.character(end))) %>%
+    dplyr::relocate(start, .before=end) %>%
+    as.data.table()
   setkey(windows.dt,start,end)
   setkey(region.dt,start,end)
 
diff --git a/config.yml b/config.yml
index 032f46bb..88fd2d71 100644
--- a/config.yml
+++ b/config.yml
@@ -40,6 +40,7 @@ default:
             gambl:
                 full: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf"
                 cds: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf"
+                strelka2: "gambl/strelka-1.1_vcf2maf-1.2/level_3/final_merged_grch37.maf"
 
     results_directories:
         manta: "manta_current/99-outputs/bedpe/"

From a858f182c192440b314626e32f4a9ebff614e13e Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 14 Oct 2021 14:58:21 -0700
Subject: [PATCH 14/16] add support for icgc in strelka flatfiles

---
 R/database.R | 2 +-
 config.yml   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/database.R b/R/database.R
index b7c72ea1..83490273 100644
--- a/R/database.R
+++ b/R/database.R
@@ -799,7 +799,7 @@ get_ssm_by_region = function(chromosome,qstart,qend,
     if(mode=="slms-3"){
       maf_partial_path = config::get("results_filatfiles")$ssm$all$full
     }else if (mode=="strelka2"){
-      maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2
+      maf_partial_path = config::get("results_filatfiles")$ssm$all$strelka2
     }else{
       stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.")
     }
diff --git a/config.yml b/config.yml
index 88fd2d71..46bcccf5 100644
--- a/config.yml
+++ b/config.yml
@@ -37,6 +37,7 @@ default:
             all:
                 full: "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf"
                 cds: "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf"
+                strelka2: "icgc_dart/strelka-1.1_vcf2maf-1.2/level_3/final_merged_grch37.maf"
             gambl:
                 full: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf"
                 cds: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf"

From c0fc00f4e600fe469ff498b617198f66424611a7 Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 14 Oct 2021 21:31:04 -0700
Subject: [PATCH 15/16] add documentation to new features

---
 R/database.R                                   | 3 +++
 R/utilities.R                                  | 4 ++++
 man/calc_mutation_frequency_sliding_windows.Rd | 8 +++++++-
 man/get_mutation_frequency_bin_matrix.Rd       | 8 +++++++-
 man/get_ssm_by_region.Rd                       | 5 ++++-
 man/get_ssm_by_regions.Rd                      | 7 ++++++-
 man/theme_Morons.Rd                            | 4 ++--
 7 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/R/database.R b/R/database.R
index 83490273..b4fa6164 100644
--- a/R/database.R
+++ b/R/database.R
@@ -717,6 +717,8 @@ get_ssm_by_gene = function(gene_symbol,coding_only=FALSE,rename_splice_region=TR
 #' @param regions_bed Better yet, provide a bed file with the coordinates you want to retrieve
 #' @param streamlined Return a basic rather than full MAF format
 #' @param use_name_column If your bed-format data frame has a name column (must be named "name") these can be used to name your regions
+#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)
+#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".
 #'
 #' @return
 #' @export
@@ -776,6 +778,7 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat
 #' @param region Region formatted like chrX:1234-5678 instead of specifying chromosome, start and end separately
 #' @param basic_columns Set to TRUE to override the default behaviour of returning only the first 45 columns of MAF data
 #' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)
+#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".
 #'
 #' @return A data frame containing all the MAF data columns (one row per mutation)
 #' @export
diff --git a/R/utilities.R b/R/utilities.R
index ef6d8b59..06f3895a 100644
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -123,6 +123,8 @@ trim_scale_expression <- function(x){
 #' @param legend_row Fiddle with these to widen or narrow your legend
 #' @param legend_col Fiddle with these to widen or narrow your legend
 #' @param legend_col Accepts one of "horizontal" (default) or "vertical" to indicate in which direction the legend will be drawn
+#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)
+#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".
 #'
 #'
 #' @return
@@ -313,6 +315,8 @@ get_mutation_frequency_bin_matrix = function(regions,
 #' @param min_count_per_bin
 #' @param return_count
 #' @param drop_unmutated This may not currently work properly.
+#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)
+#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".
 #'
 #' @return
 #' @export
diff --git a/man/calc_mutation_frequency_sliding_windows.Rd b/man/calc_mutation_frequency_sliding_windows.Rd
index b8ae36e7..661b6fcb 100644
--- a/man/calc_mutation_frequency_sliding_windows.Rd
+++ b/man/calc_mutation_frequency_sliding_windows.Rd
@@ -17,7 +17,9 @@ calc_mutation_frequency_sliding_windows(
   min_count_per_bin = 3,
   return_count = FALSE,
   drop_unmutated = FALSE,
-  classification_column = "lymphgen"
+  classification_column = "lymphgen",
+  from_indexed_flatfile = FALSE,
+  mode = "slms-3"
 )
 }
 \arguments{
@@ -26,6 +28,10 @@ calc_mutation_frequency_sliding_windows(
 \item{drop_unmutated}{This may not currently work properly.}
 
 \item{classification_column}{Only used for plotting}
+
+\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)}
+
+\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".}
 }
 \value{
 
diff --git a/man/get_mutation_frequency_bin_matrix.Rd b/man/get_mutation_frequency_bin_matrix.Rd
index 2cb6c9de..576d6ac4 100644
--- a/man/get_mutation_frequency_bin_matrix.Rd
+++ b/man/get_mutation_frequency_bin_matrix.Rd
@@ -25,7 +25,9 @@ get_mutation_frequency_bin_matrix(
   show_gene_colours = FALSE,
   legend_row = 3,
   legend_col = 3,
-  legend_direction = "horizontal"
+  legend_direction = "horizontal",
+  from_indexed_flatfile = FALSE,
+  mode = "slms-3"
 )
 }
 \arguments{
@@ -57,6 +59,10 @@ get_mutation_frequency_bin_matrix(
 
 \item{legend_col}{Accepts one of "horizontal" (default) or "vertical" to indicate in which direction the legend will be drawn}
 
+\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)}
+
+\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".}
+
 \item{region_df}{Data frame of regions with four columns (chrom,start,end,gene_name)}
 }
 \value{
diff --git a/man/get_ssm_by_region.Rd b/man/get_ssm_by_region.Rd
index 5a7a19bc..b7421903 100644
--- a/man/get_ssm_by_region.Rd
+++ b/man/get_ssm_by_region.Rd
@@ -12,7 +12,8 @@ get_ssm_by_region(
   basic_columns = TRUE,
   streamlined = FALSE,
   maf_data,
-  from_indexed_flatfile = FALSE
+  from_indexed_flatfile = FALSE,
+  mode = "slms-3"
 )
 }
 \arguments{
@@ -27,6 +28,8 @@ get_ssm_by_region(
 \item{basic_columns}{Set to TRUE to override the default behaviour of returning only the first 45 columns of MAF data}
 
 \item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)}
+
+\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".}
 }
 \value{
 A data frame containing all the MAF data columns (one row per mutation)
diff --git a/man/get_ssm_by_regions.Rd b/man/get_ssm_by_regions.Rd
index 89837c4e..f3596975 100644
--- a/man/get_ssm_by_regions.Rd
+++ b/man/get_ssm_by_regions.Rd
@@ -10,7 +10,8 @@ get_ssm_by_regions(
   streamlined = FALSE,
   maf_data = maf_data,
   use_name_column = FALSE,
-  from_indexed_flatfile = FALSE
+  from_indexed_flatfile = FALSE,
+  mode = "slms-3"
 )
 }
 \arguments{
@@ -21,6 +22,10 @@ get_ssm_by_regions(
 \item{streamlined}{Return a basic rather than full MAF format}
 
 \item{use_name_column}{If your bed-format data frame has a name column (must be named "name") these can be used to name your regions}
+
+\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)}
+
+\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".}
 }
 \value{
 
diff --git a/man/theme_Morons.Rd b/man/theme_Morons.Rd
index 5665279f..8015d608 100644
--- a/man/theme_Morons.Rd
+++ b/man/theme_Morons.Rd
@@ -27,7 +27,7 @@ nothing
 Define function for consistent plot theme
 }
 \examples{
-ggplot(mpg, aes(displ, hwy, colour = class)) + 
+ggplot(mpg, aes(displ, hwy, colour = class)) +
 geom_point() +
-theme_Morons
+theme_Morons()
 }

From 545206dc0b400032041f4c4fce2da64211ff646a Mon Sep 17 00:00:00 2001
From: Kdreval <k.dreval@gmail.com>
Date: Thu, 14 Oct 2021 21:33:14 -0700
Subject: [PATCH 16/16] add new line character

---
 R/viz.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/viz.R b/R/viz.R
index a9c04979..b643fb12 100644
--- a/R/viz.R
+++ b/R/viz.R
@@ -1301,4 +1301,4 @@ theme_Morons <- function(base_size=14,
               color="black", fill="white", size=1, linetype="solid"),
             strip.text = element_text(face="bold")
     ))
-}
\ No newline at end of file
+}