From 2f14599d4c13d4064a57bc6331fa7b20c28d324f Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 17 Sep 2021 16:38:45 -0700 Subject: [PATCH 01/16] introduce review_hotspots functionality --- R/utilities.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index bd2ddc45..1d81c448 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -538,6 +538,52 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL return(hot_ssms) } +#' Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations +#' +#' @param annotated_maf A data frame in MAF format that has hotspots annotated using function annotate_hotspots(). +#' @param genes_of_interest List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported. +#' @param genome_build Reference genome build for the coordinates in the mAF file. Currently only variations of hg19 genome build are supported. +#' +#' @return The same data frame with reviewed column "hot_spot" +#' @export +#' @import dplyr +#' +#' @examples +#' hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm()), genes_of_interest=c("CREBBP")) + +review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", "CREBBP"), genome_build="hg19"){ + + # check genome build because CREBBP coordinates are hg19-based + if (!genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){ + stop("Currently only variations of hg19 genome build are supported.") + } + + # check that at least one of the currently supported genes are present + if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)<1){ + stop("Currently only FOXO1, MYD88, and CREBBP are supported. Please specify one of these genes.") + } + + # notify user that there is limited number of genes currently supported + if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 1 ){ + print("Currently only FOXO1, MYD88, and CREBBP are supported. By default only these genes from the supplied list will be reviewed.") + } + + if("FOXO1" %in% genes_of_interest){ + annotated_maf <- annotated_maf %>% + dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="FOXO1" & HGVSp_Short == "p.M1?", "TRUE" , hot_spot)) + } + if("CREBBP" %in% genes_of_interest){ + annotated_maf <- annotated_maf %>% + dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > 3785000 & End_Position < 3791000 & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot)) + } + if("MYD88" %in% genes_of_interest){ + annotated_maf <- annotated_maf %>% + dplyr::mutate(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot) + } + return(annotated_maf) +} + + #' Make a UCSC-ready custom track file from SV data # #' @param sv_bedpe A bedpe formatted data frame of SVs From c420d147b77557d800d79965abdc3e9e4a8c7e38 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 17 Sep 2021 16:49:35 -0700 Subject: [PATCH 02/16] add hg38 support for review_hotspots --- R/utilities.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 1d81c448..40f6d368 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -553,9 +553,18 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", "CREBBP"), genome_build="hg19"){ - # check genome build because CREBBP coordinates are hg19-based - if (!genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){ - stop("Currently only variations of hg19 genome build are supported.") + # check genome build because CREBBP coordinates are hg19-based or hg38-based + coordinates <- list() + if (genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){ + coordinates$start <- 3785000 + coordinates$end <- 3791000 + print(coordinates) + }else if(genome_build %in% c("hg38", "grch38", "GRCh38")){ + coordinates$start <- 3734999 + coordinates$end <- 3740999 + print(coordinates) + }else{ + stop("The genome build specified is not currently supported. Please provide MAF file in one of the following cordinates: hg19, grch37, hs37d5, GRCh37, hg38, grch38, or GRCh38") } # check that at least one of the currently supported genes are present @@ -574,7 +583,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", } if("CREBBP" %in% genes_of_interest){ annotated_maf <- annotated_maf %>% - dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > 3785000 & End_Position < 3791000 & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot)) + dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="CREBBP" & Start_Position > coordinates$start & End_Position < coordinates$end & Variant_Classification == "Missense_Mutation", "TRUE" , hot_spot)) } if("MYD88" %in% genes_of_interest){ annotated_maf <- annotated_maf %>% From e059c30a12996e58eb393bd9482e5807cb9d2047 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 17 Sep 2021 16:51:01 -0700 Subject: [PATCH 03/16] add documentation to review_hotspots --- R/utilities.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 40f6d368..c0aae096 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -542,7 +542,7 @@ annotate_hotspots = function(mutation_maf,recurrence_min = 5,analysis_base=c("FL #' #' @param annotated_maf A data frame in MAF format that has hotspots annotated using function annotate_hotspots(). #' @param genes_of_interest List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported. -#' @param genome_build Reference genome build for the coordinates in the mAF file. Currently only variations of hg19 genome build are supported. +#' @param genome_build Reference genome build for the coordinates in the MAF file. The default is hg19 genome build. #' #' @return The same data frame with reviewed column "hot_spot" #' @export From df19258c437ce0ea768ca5533144302bc5ea7985 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 10:51:11 -0700 Subject: [PATCH 04/16] initial update for get_coding_ssm_status --- R/utilities.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index c0aae096..f12c84c3 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -4,6 +4,10 @@ #' @param gene_symbols #' @param these_samples_metadata #' @param from_flatfile +#' @param include_hotspots Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE. +#' @param from_flatfile Integer value indicating minimal recurrence level +#' @param review_hotspots Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE. +#' @param ... Other parameters accepted by the review_hotspots() function #' #' @return #' @export @@ -11,7 +15,14 @@ #' @examples #' coding_tabulated_df = get_coding_ssm_status(gene_symbols=c("MYC","KMT2D")) #' coding_tabulated_df = get_coding_ssm_status() #all lymphoma genes from bundled NHL gene list -get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfile=TRUE){ +get_coding_ssm_status = function(gene_symbols, + these_samples_metadata, + from_flatfile=TRUE, + include_hotspots=TRUE, + recurrence_min = 5, + review_hotspots=TRUE, + genes_of_interest = c("FOXO1", "MYD88", "CREBBP"), + genome_build = "hg19"){ if(missing(gene_symbols)){ message("defaulting to all lymphoma genes") gene_symbols = pull(lymphoma_genes,Gene) @@ -20,7 +31,10 @@ get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfi these_samples_metadata = get_gambl_metadata() } - coding = get_coding_ssm(from_flatfile=from_flatfile) %>% + # call it once so the object can be reused if user wants to annotate hotspots + coding_ssm = get_coding_ssm(from_flatfile=from_flatfile) + + coding = coding_ssm %>% dplyr::filter(Hugo_Symbol %in% gene_symbols & Variant_Classification != "Synonymous") %>% dplyr::select(Tumor_Sample_Barcode,Hugo_Symbol) %>% @@ -33,6 +47,44 @@ get_coding_ssm_status = function(gene_symbols,these_samples_metadata,from_flatfi #complete(wide_coding,fill=list("sample_id"=samples_table$sample_id)) all_tabulated = left_join(samples_table,wide_coding) all_tabulated = all_tabulated %>% replace(is.na(.), 0) + + # include hotspots if user chooses to do so + if(include_hotspots){ + # first annotate + annotated = annotate_hotspots(coding_ssm, recurrence_min = recurrence_min) + # review for the supported genes + if(review_hotspots){ + annotated = review_hotspots(annotated, genes_of_interest = genes_of_interest, genome_build = genome_build) + } + hotspots = annotated %>% + dplyr::filter(Hugo_Symbol %in% gene_symbols) %>% + dplyr::select(Tumor_Sample_Barcode,Hugo_Symbol, hot_spot) %>% + dplyr::rename("sample_id"="Tumor_Sample_Barcode","gene"="Hugo_Symbol") %>% + dplyr::mutate(gene=paste0(gene, "HOTSPOT")) %>% + unique() %>% + dplyr::mutate(mutated=ifelse(hot_spot=="TRUE", 1, 0)) %>% + replace(is.na(.), 0) %>% + dplyr::filter(mutated==1) %>% + dplyr::select(-hot_spot) + + # long to wide hotspots, samples are tabulated with 0 if no hotspot is detected + wide_hotspots = pivot_wider(hotspots,names_from = "gene", + values_from="mutated",values_fill = 0) + # join with the ssm object + all_tabulated = left_join(all_tabulated,wide_hotspots) + all_tabulated = all_tabulated %>% replace(is.na(.), 0) + # make SSM and hotspots non-redundant by giving priority to hotspot feature and setting SSM to 0 + for (hotspot_site in colnames(wide_hotspots)[grepl("HOTSPOT", colnames(wide_hotspots))]){ + this_gene = gsub("HOTSPOT", "", hotspot_site) + redundant_features = all_tabulated %>% dplyr::select(starts_with(this_gene)) + # if not both the gene and the hotspot are present, go to the next iteration + if(ncol(redundant_features)!=2) next + # if both gene and it's hotspot are in the matrix, give priority to hotspot feature + all_tabulated[(all_tabulated[,this_gene]>0 & all_tabulated[,paste0(this_gene, "HOTSPOT")]==1),][,c(this_gene, paste0(this_gene, "HOTSPOT"))][,this_gene] = 0 + } + + } + return(all_tabulated) } @@ -558,11 +610,9 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", if (genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){ coordinates$start <- 3785000 coordinates$end <- 3791000 - print(coordinates) }else if(genome_build %in% c("hg38", "grch38", "GRCh38")){ coordinates$start <- 3734999 coordinates$end <- 3740999 - print(coordinates) }else{ stop("The genome build specified is not currently supported. Please provide MAF file in one of the following cordinates: hg19, grch37, hs37d5, GRCh37, hg38, grch38, or GRCh38") } @@ -573,7 +623,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", } # notify user that there is limited number of genes currently supported - if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 1 ){ + if (sum(c("FOXO1", "MYD88", "CREBBP") %in% genes_of_interest)>1 & length(genes_of_interest) > 3 ){ print("Currently only FOXO1, MYD88, and CREBBP are supported. By default only these genes from the supplied list will be reviewed.") } @@ -587,7 +637,7 @@ review_hotspots = function(annotated_maf, genes_of_interest=c("FOXO1", "MYD88", } if("MYD88" %in% genes_of_interest){ annotated_maf <- annotated_maf %>% - dplyr::mutate(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot) + dplyr::mutate(hot_spot=ifelse(Hugo_Symbol=="MYD88" & HGVSp_Short %in% c("p.L273P", "p.L265P"), "TRUE" , hot_spot)) } return(annotated_maf) } From fd8caef6537212b250bbeeeccc0c2653b95362d7 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 10:53:51 -0700 Subject: [PATCH 05/16] small cleanup --- R/utilities.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index f12c84c3..f6e7507b 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -63,7 +63,6 @@ get_coding_ssm_status = function(gene_symbols, dplyr::mutate(gene=paste0(gene, "HOTSPOT")) %>% unique() %>% dplyr::mutate(mutated=ifelse(hot_spot=="TRUE", 1, 0)) %>% - replace(is.na(.), 0) %>% dplyr::filter(mutated==1) %>% dplyr::select(-hot_spot) From 39aa6de9a89891f6af4f25045c2e74687fb03ebb Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 13:11:18 -0700 Subject: [PATCH 06/16] add documentaion for the new functionality --- NAMESPACE | 1 + R/utilities.R | 6 +++--- man/get_coding_ssm_status.Rd | 19 +++++++++++++++++-- man/review_hotspots.Rd | 28 ++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 man/review_hotspots.Rd diff --git a/NAMESPACE b/NAMESPACE index dc26ddf9..b4f5e2ea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -58,6 +58,7 @@ export(process_all_manta_bedpe) export(referesh_metadata_tables) export(refresh_full_table) export(region_to_chunks) +export(review_hotspots) export(sanitize_maf_data) export(setup_fusions) export(setup_study) diff --git a/R/utilities.R b/R/utilities.R index f6e7507b..904d49f7 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,9 +1,9 @@ #' Tabulate mutation status for non-silent SSMs for a set of genes #' -#' @param gene_symbols -#' @param these_samples_metadata -#' @param from_flatfile +#' @param gene_symbols List of gene symbols for which the mutation status will be tabulated. If not provided, lymphoma genes will be returned by default. +#' @param these_samples_metadata The matedata for samples of interest to be included in the returned matrix. Only the column "sample_id" is required. If not provided, the matrix is tabulated for all available samples as default. +#' @param from_flatfile Optional argument whether to use database or flat file to retrieve mutations. #' @param include_hotspots Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE. #' @param from_flatfile Integer value indicating minimal recurrence level #' @param review_hotspots Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE. diff --git a/man/get_coding_ssm_status.Rd b/man/get_coding_ssm_status.Rd index 30f4d7c5..bda38b7f 100644 --- a/man/get_coding_ssm_status.Rd +++ b/man/get_coding_ssm_status.Rd @@ -7,11 +7,26 @@ get_coding_ssm_status( gene_symbols, these_samples_metadata, - from_flatfile = TRUE + from_flatfile = TRUE, + include_hotspots = TRUE, + recurrence_min = 5, + review_hotspots = TRUE, + genes_of_interest = c("FOXO1", "MYD88", "CREBBP"), + genome_build = "hg19" ) } \arguments{ -\item{from_flatfile}{} +\item{gene_symbols}{List of gene symbols for which the mutation status will be tabulated. If not provided, lymphoma genes will be returned by default.} + +\item{these_samples_metadata}{The matedata for samples of interest to be included in the returned matrix. Only the column "sample_id" is required. If not provided, the matrix is tabulated for all available samples as default.} + +\item{from_flatfile}{Integer value indicating minimal recurrence level} + +\item{include_hotspots}{Logical parameter indicating whether hotspots object should also be tabulated. Default is TRUE.} + +\item{review_hotspots}{Logical parameter indicating whether hotspots object should be reviewed to include functionally relevant mutations or rare lymphoma-related genes. Default is TRUE.} + +\item{...}{Other parameters accepted by the review_hotspots() function} } \value{ diff --git a/man/review_hotspots.Rd b/man/review_hotspots.Rd new file mode 100644 index 00000000..695907ae --- /dev/null +++ b/man/review_hotspots.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{review_hotspots} +\alias{review_hotspots} +\title{Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations} +\usage{ +review_hotspots( + annotated_maf, + genes_of_interest = c("FOXO1", "MYD88", "CREBBP"), + genome_build = "hg19" +) +} +\arguments{ +\item{annotated_maf}{A data frame in MAF format that has hotspots annotated using function annotate_hotspots().} + +\item{genes_of_interest}{List of genes for hotspot review. Currently only FOXO1, MYD88, and CREBBP are supported.} + +\item{genome_build}{Reference genome build for the coordinates in the MAF file. The default is hg19 genome build.} +} +\value{ +The same data frame with reviewed column "hot_spot" +} +\description{ +Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations +} +\examples{ +hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm()), genes_of_interest=c("CREBBP")) +} From 155749e99ba67411f56f534c929e384d0bd599a3 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 13:55:28 -0700 Subject: [PATCH 07/16] new function overused in clustering workflow --- R/utilities.R | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index 904d49f7..a90b6868 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1630,3 +1630,47 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr return(OUTPUT) message("Done!") } + + + +#' Using GISTIC2.0 outputs, perform Fisher's exact test to compare CNV frequencies between 2 groups +#' +#' @param incoming_matrix A matrix or data frame that should be filled. +#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix. +#' @param fill_in_values Value that will be used to fill in the matrix. +#' @param normalize_order Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE. +#' @param samples_in_rows Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features. +#' +#' @return a data frame with maintained orientation (rows and columns) where samples from the supplied list are present and reordered according to the specified order +#' @export +#' +#' @examples +#' partial_matrix = get_coding_ssm_status(these_samples_metadata = (get_gambl_metadata(case_set = "BL--DLBCL") %>% filter(pairing_status=="unmatched")), include_hotspots = FALSE) +#' complete_matrix = complete_missing_from_matrix(partial_matrix, get_gambl_metadata() %>% pull(sample_id)) +complete_missing_from_matrix = function(incoming_matrix, + list_of_samples, + fill_in_values = 0, + normalize_order=TRUE, + samples_in_rows=TRUE){ + + if(!samples_in_rows){ + incoming_matrix = as.data.frame(incoming_matrix) %>% t() + } + + matrix_with_all_samples <- rbind(incoming_matrix, + matrix(fill_in_values:fill_in_values,# populate matrix with all 0 + length(setdiff(list_of_samples, rownames(incoming_matrix))), # how many rows + ncol(incoming_matrix), # how many columns + dimnames = list(setdiff(list_of_samples, rownames(incoming_matrix)), # name rows with sample IDs + colnames(incoming_matrix))) %>% # name columns with gene names + as.data.frame(.)) + if(normalize_order){ + matrix_with_all_samples = matrix_with_all_samples[ order(match(rownames(matrix_with_all_samples), list_of_samples)),] + } + + if(!samples_in_rows){ + matrix_with_all_samples = as.data.frame(matrix_with_all_samples) %>% t() + } + + return(matrix_with_all_samples) +} From 6f7f75909f802b0878f891375a40b89db608b6ad Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 13:58:46 -0700 Subject: [PATCH 08/16] add documentation --- NAMESPACE | 1 + R/utilities.R | 2 +- man/complete_missing_from_matrix.Rd | 35 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 man/complete_missing_from_matrix.Rd diff --git a/NAMESPACE b/NAMESPACE index b4f5e2ea..08bc6615 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(collate_nfkbiz_results) export(collate_results) export(collate_sbs_results) export(collate_sv_results) +export(complete_missing_from_matrix) export(copy_number_vaf_plot) export(fetch_output_files) export(finalize_study) diff --git a/R/utilities.R b/R/utilities.R index a90b6868..d179bc3b 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1633,7 +1633,7 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr -#' Using GISTIC2.0 outputs, perform Fisher's exact test to compare CNV frequencies between 2 groups +#' If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency #' #' @param incoming_matrix A matrix or data frame that should be filled. #' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix. diff --git a/man/complete_missing_from_matrix.Rd b/man/complete_missing_from_matrix.Rd new file mode 100644 index 00000000..6a1689d7 --- /dev/null +++ b/man/complete_missing_from_matrix.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{complete_missing_from_matrix} +\alias{complete_missing_from_matrix} +\title{If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency} +\usage{ +complete_missing_from_matrix( + incoming_matrix, + list_of_samples, + fill_in_values = 0, + normalize_order = TRUE, + samples_in_rows = TRUE +) +} +\arguments{ +\item{incoming_matrix}{A matrix or data frame that should be filled.} + +\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix.} + +\item{fill_in_values}{Value that will be used to fill in the matrix.} + +\item{normalize_order}{Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE.} + +\item{samples_in_rows}{Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features.} +} +\value{ +a data frame with maintained orientation (rows and columns) where samples from the supplied list are present and reordered according to the specified order +} +\description{ +If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency +} +\examples{ +partial_matrix = get_coding_ssm_status(these_samples_metadata = (get_gambl_metadata(case_set = "BL--DLBCL") \%>\% filter(pairing_status=="unmatched")), include_hotspots = FALSE) +complete_matrix = complete_missing_from_matrix(partial_matrix, get_gambl_metadata() \%>\% pull(sample_id)) +} From 4e167e7e7f796d9fe5077e6d0143115b52fa0726 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 14:05:53 -0700 Subject: [PATCH 09/16] in-line documentation and some checks for required arguments --- R/utilities.R | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index d179bc3b..16a9f73e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1635,8 +1635,8 @@ FtestCNV <- function(gistic_lesions, metadata, comparison, fdr.method="fdr", fdr #' If some samples are missing from the matrix, add them with filled in 0 as value and normalize their ordering for consistency #' -#' @param incoming_matrix A matrix or data frame that should be filled. -#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix. +#' @param incoming_matrix A matrix or data frame that should be filled. Required parameter. +#' @param list_of_samples Vector specifying all desired samples to be present in the resulting matrix. Required parameter. #' @param fill_in_values Value that will be used to fill in the matrix. #' @param normalize_order Logical parameter specifying whether sample order should be according to the supplied list. Default is TRUE. #' @param samples_in_rows Logical argument indicating whether samples are in rows or columns. Default assumes samples are in rows and columns are features. @@ -1653,21 +1653,34 @@ complete_missing_from_matrix = function(incoming_matrix, normalize_order=TRUE, samples_in_rows=TRUE){ + # check for required arguments + if (missing(incoming_matrix)){ + stop("Please provide initial matrix to fill.") + } + + if (missing(list_of_samples)){ + stop("Please provide list of samples to complete the matrix and normalize order.") + } + + # is samples are in columns, transpose the matrix so code below is generalizable if(!samples_in_rows){ incoming_matrix = as.data.frame(incoming_matrix) %>% t() } matrix_with_all_samples <- rbind(incoming_matrix, - matrix(fill_in_values:fill_in_values,# populate matrix with all 0 + matrix(fill_in_values:fill_in_values, # populate matrix with all 0 length(setdiff(list_of_samples, rownames(incoming_matrix))), # how many rows ncol(incoming_matrix), # how many columns dimnames = list(setdiff(list_of_samples, rownames(incoming_matrix)), # name rows with sample IDs colnames(incoming_matrix))) %>% # name columns with gene names as.data.frame(.)) + + # this is very helpful in clustering if(normalize_order){ matrix_with_all_samples = matrix_with_all_samples[ order(match(rownames(matrix_with_all_samples), list_of_samples)),] } + # transpose matrix back to the initial format supplied by user (samples in columns) if(!samples_in_rows){ matrix_with_all_samples = as.data.frame(matrix_with_all_samples) %>% t() } From 06f977556aedeebec0d333e8076c7541ad790e37 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Fri, 24 Sep 2021 14:35:25 -0700 Subject: [PATCH 10/16] handle numeric columns specified in metadata for prettyOncoplot --- R/viz.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/viz.R b/R/viz.R index 9ae75fd9..9e681992 100644 --- a/R/viz.R +++ b/R/viz.R @@ -363,6 +363,12 @@ prettyOncoplot = function(maftools_obj, } } + if(!missing(numericMetadataColumns)){ + message(paste0("The column(s) ", numericMetadataColumns, " specified both in metadata and numeric metadata. Plotting as numeric values...")) + metadataColumns = metadataColumns[!metadataColumns %in% numericMetadataColumns] + } + + if(missing(onco_matrix_path)){ onco_matrix_path="onco_matrix.txt" } From 7d5d49b6bbfd97f0acb58c18c3b9ece0c7d84050 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 7 Oct 2021 22:43:09 -0700 Subject: [PATCH 11/16] add ggplot theme --- NAMESPACE | 1 + R/viz.R | 47 +++++++++++++++++++++++++++++ man/complete_missing_from_matrix.Rd | 4 +-- man/theme_Morons.Rd | 33 ++++++++++++++++++++ 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 man/theme_Morons.Rd diff --git a/NAMESPACE b/NAMESPACE index 08bc6615..6782872c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,6 +65,7 @@ export(setup_fusions) export(setup_study) export(sv_to_bedpe_file) export(sv_to_custom_track) +export(theme_Morons) export(tidy_gene_expression) export(trim_scale_expression) import(ComplexHeatmap) diff --git a/R/viz.R b/R/viz.R index 9e681992..079e5b24 100644 --- a/R/viz.R +++ b/R/viz.R @@ -1255,3 +1255,50 @@ prettyChromoplot = function(scores, geom_hline(yintercept = 0, size=7) + geom_text(aes(label = Chromosome, x = xses, y = 0), size = 4, color="white") } + +#' Define function for consistent plot theme +#' +#' @param base_size Size of the font on the plot. Defaults to 14 +#' @param base_family Font family to be used on the plot. Defaults to Arial. Always use cairo device when saving the resulting plot! +#' @param my_legend_position Where to draw the legend? Defaults to the bottom of the plot +#' @param my_legend_direction Which direction to draw the legend? Defaults to horizontal +#' +#' +#' @return nothing +#' @export +#' @import ggplot2 +#' +#' @examples +#' ggplot(mpg, aes(displ, hwy, colour = class)) + +#' geom_point() + +#' theme_Morons + +theme_Morons <- function(base_size=14, + base_family="Arial", + my_legend_position="bottom", + my_legend_direction = "horizontal") { + library(ggthemes) + (theme_foundation(base_size=base_size, base_family=base_family) + + theme(plot.title = element_text(face = "bold", + size = rel(1.2), hjust = 0.5), + text = element_text(colour = "black"), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold",size = rel(1.2)), + axis.title.y = element_text(angle=90,vjust =2), + axis.title.x = element_text(vjust = -0.2), + axis.text = element_text(size = base_size, family=base_family), + axis.line = element_line(colour="black", size = rel(0.8)), + axis.ticks = element_line(), + panel.grid.major = element_line(colour="#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = my_legend_position, + legend.direction = my_legend_direction, + legend.title = element_text(face="italic"), + strip.background = element_rect( + color="black", fill="white", size=1, linetype="solid"), + strip.text = element_text(face="bold") + )) +} \ No newline at end of file diff --git a/man/complete_missing_from_matrix.Rd b/man/complete_missing_from_matrix.Rd index 6a1689d7..466e3f9b 100644 --- a/man/complete_missing_from_matrix.Rd +++ b/man/complete_missing_from_matrix.Rd @@ -13,9 +13,9 @@ complete_missing_from_matrix( ) } \arguments{ -\item{incoming_matrix}{A matrix or data frame that should be filled.} +\item{incoming_matrix}{A matrix or data frame that should be filled. Required parameter.} -\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix.} +\item{list_of_samples}{Vector specifying all desired samples to be present in the resulting matrix. Required parameter.} \item{fill_in_values}{Value that will be used to fill in the matrix.} diff --git a/man/theme_Morons.Rd b/man/theme_Morons.Rd new file mode 100644 index 00000000..5665279f --- /dev/null +++ b/man/theme_Morons.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/viz.R +\name{theme_Morons} +\alias{theme_Morons} +\title{Define function for consistent plot theme} +\usage{ +theme_Morons( + base_size = 14, + base_family = "Arial", + my_legend_position = "bottom", + my_legend_direction = "horizontal" +) +} +\arguments{ +\item{base_size}{Size of the font on the plot. Defaults to 14} + +\item{base_family}{Font family to be used on the plot. Defaults to Arial. Always use cairo device when saving the resulting plot!} + +\item{my_legend_position}{Where to draw the legend? Defaults to the bottom of the plot} + +\item{my_legend_direction}{Which direction to draw the legend? Defaults to horizontal} +} +\value{ +nothing +} +\description{ +Define function for consistent plot theme +} +\examples{ +ggplot(mpg, aes(displ, hwy, colour = class)) + +geom_point() + +theme_Morons +} From 8ab9c64e43f285dce0888628e5dfd99a13d9feb6 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 7 Oct 2021 22:53:52 -0700 Subject: [PATCH 12/16] silly bug fix --- R/viz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/viz.R b/R/viz.R index 079e5b24..a9c04979 100644 --- a/R/viz.R +++ b/R/viz.R @@ -1271,7 +1271,7 @@ prettyChromoplot = function(scores, #' @examples #' ggplot(mpg, aes(displ, hwy, colour = class)) + #' geom_point() + -#' theme_Morons +#' theme_Morons() theme_Morons <- function(base_size=14, base_family="Arial", From 957e7ad791e650b081b945d79b591641845c755d Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 14 Oct 2021 09:38:02 -0700 Subject: [PATCH 13/16] add indexed flatfile support for strelka2 --- R/database.R | 38 ++++++++++++++++++++++++++++++-------- R/utilities.R | 20 +++++++++++++++----- config.yml | 1 + 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/R/database.R b/R/database.R index 386d3bb3..b7c72ea1 100644 --- a/R/database.R +++ b/R/database.R @@ -726,7 +726,7 @@ get_ssm_by_gene = function(gene_symbol,coding_only=FALSE,rename_splice_region=TR #' regions_bed = grch37_ashm_regions %>% mutate(name=paste(gene,region,sep="_")) #' ashm_maf=get_ssm_by_regions(regions_bed=regions_bed,streamlined=TRUE,use_name_column=use_name_column) -get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_data=maf_data,use_name_column=FALSE,from_indexed_flatfile=FALSE){ +get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_data=maf_data,use_name_column=FALSE,from_indexed_flatfile=FALSE, mode="slms-3"){ bed2region=function(x){ paste0(x[1],":",as.numeric(x[2]),"-",as.numeric(x[3])) } @@ -738,9 +738,9 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat } } if(missing(maf_data)){ - region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,from_indexed_flatfile = from_indexed_flatfile)}) + region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,from_indexed_flatfile = from_indexed_flatfile, mode=mode)}) }else{ - region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,maf_data=maf_data,from_indexed_flatfile = from_indexed_flatfile)}) + region_mafs = lapply(regions,function(x){get_ssm_by_region(region=x,streamlined = streamlined,maf_data=maf_data,from_indexed_flatfile = from_indexed_flatfile, mode=mode)}) } if(!use_name_column){ rn = regions @@ -788,19 +788,32 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat #' my_mutations=get_ssm_by_region(chromosome="8",qstart=128723128,qend=128774067) get_ssm_by_region = function(chromosome,qstart,qend, region="",basic_columns=TRUE,streamlined=FALSE,maf_data, - from_indexed_flatfile=FALSE){ + from_indexed_flatfile=FALSE, + mode="slms-3"){ tabix_bin = "/home/rmorin/miniconda3/bin/tabix" table_name = config::get("results_tables")$ssm db=config::get("database_name") if(from_indexed_flatfile){ base_path = config::get("project_base") #test if we have permissions for the full gambl + icgc merge - maf_partial_path = config::get("results_filatfiles")$ssm$all$full + if(mode=="slms-3"){ + maf_partial_path = config::get("results_filatfiles")$ssm$all$full + }else if (mode=="strelka2"){ + maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2 + }else{ + stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.") + } maf_path = paste0(base_path,maf_partial_path) maf_permissions = file.access(maf_path,4) if(maf_permissions == -1){ #currently this will only return non-ICGC results - maf_partial_path = config::get("results_filatfiles")$ssm$gambl$full + if(mode=="slms-3"){ + maf_partial_path = config::get("results_filatfiles")$ssm$gambl$full + }else if (mode=="strelka2"){ + maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2 + }else{ + stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.") + } base_path = config::get("project_base") #default is non-ICGC maf_path = paste0(base_path,maf_partial_path) @@ -829,10 +842,19 @@ get_ssm_by_region = function(chromosome,qstart,qend, if(from_indexed_flatfile){ streamlined = TRUE muts = system(paste(tabix_bin,maf_path,region),intern=TRUE) - if(length(muts)>0){ + if(length(muts)>1){ muts_region = readr::read_tsv(muts,col_names=c("Chromosome","Start_Position", "End_Position","Tumor_Sample_Barcode")) - }else{ + # this is necessary because when only one row is returned, read_tsv thinks it is a file name + }else if (length(muts)==1){ + region_with_one_row <- stringr::str_split(muts, "\t", n=4) + + muts_region = data.frame(Chromosome=unlist(region_with_one_row)[1], + Start_Position=as.numeric(unlist(region_with_one_row)[2]), + End_Position=as.numeric(unlist(region_with_one_row)[3]), + Tumor_Sample_Barcode=unlist(region_with_one_row)[4], + stringsAsFactors=FALSE) + } else { muts_region = data.frame(Chromosome=character(), Start_Position=character(), End_Position=character(), diff --git a/R/utilities.R b/R/utilities.R index 16a9f73e..ef6d8b59 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -149,7 +149,9 @@ get_mutation_frequency_bin_matrix = function(regions, show_gene_colours=FALSE, legend_row=3, legend_col=3, - legend_direction="horizontal"){ + legend_direction="horizontal", + from_indexed_flatfile=FALSE, + mode="slms-3"){ if(missing(regions)){ if(missing(regions_df)){ @@ -163,7 +165,8 @@ get_mutation_frequency_bin_matrix = function(regions, this_region=x,drop_unmutated = TRUE, slide_by = slide_by,plot_type="none",window_size=window_size, min_count_per_bin=min_count_per_bin,return_count = TRUE, - metadata = these_samples_metadata)}) + metadata = these_samples_metadata, + from_indexed_flatfile=from_indexed_flatfile, mode=mode)}) all= do.call("rbind",dfs) #add a fake bin for one gene and make every patient not mutated in it (to fill gaps) @@ -326,7 +329,9 @@ calc_mutation_frequency_sliding_windows = min_count_per_bin=3, return_count = FALSE, drop_unmutated=FALSE, - classification_column="lymphgen"){ + classification_column="lymphgen", + from_indexed_flatfile=FALSE, + mode="slms-3"){ max_region = 1000000 @@ -357,11 +362,16 @@ calc_mutation_frequency_sliding_windows = windows.dt = as.data.table(windows) - region_ssm = GAMBLR::get_ssm_by_region(region=this_region,streamlined = TRUE) %>% + region_ssm = GAMBLR::get_ssm_by_region(region=this_region,streamlined = TRUE, from_indexed_flatfile=from_indexed_flatfile, mode=mode) %>% dplyr::rename(c("start"="Start_Position","sample_id"="Tumor_Sample_Barcode")) %>% mutate(mutated=1) - region.dt = mutate(region_ssm,end=start+1) %>% as.data.table() + region.dt = region_ssm %>% + dplyr::mutate(start=as.numeric(as.character(start)), + end=start+1, + end=as.numeric(as.character(end))) %>% + dplyr::relocate(start, .before=end) %>% + as.data.table() setkey(windows.dt,start,end) setkey(region.dt,start,end) diff --git a/config.yml b/config.yml index 032f46bb..88fd2d71 100644 --- a/config.yml +++ b/config.yml @@ -40,6 +40,7 @@ default: gambl: full: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf" cds: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf" + strelka2: "gambl/strelka-1.1_vcf2maf-1.2/level_3/final_merged_grch37.maf" results_directories: manta: "manta_current/99-outputs/bedpe/" From a858f182c192440b314626e32f4a9ebff614e13e Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 14 Oct 2021 14:58:21 -0700 Subject: [PATCH 14/16] add support for icgc in strelka flatfiles --- R/database.R | 2 +- config.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/database.R b/R/database.R index b7c72ea1..83490273 100644 --- a/R/database.R +++ b/R/database.R @@ -799,7 +799,7 @@ get_ssm_by_region = function(chromosome,qstart,qend, if(mode=="slms-3"){ maf_partial_path = config::get("results_filatfiles")$ssm$all$full }else if (mode=="strelka2"){ - maf_partial_path = config::get("results_filatfiles")$ssm$gambl$strelka2 + maf_partial_path = config::get("results_filatfiles")$ssm$all$strelka2 }else{ stop("You requested results from indexed flatfile. The mode should be set to either slms-3 (default) or strelka2. Please specify one of these modes.") } diff --git a/config.yml b/config.yml index 88fd2d71..46bcccf5 100644 --- a/config.yml +++ b/config.yml @@ -37,6 +37,7 @@ default: all: full: "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf" cds: "icgc_dart/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf" + strelka2: "icgc_dart/strelka-1.1_vcf2maf-1.2/level_3/final_merged_grch37.maf" gambl: full: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.maf" cds: "gambl/slms-3_vcf2maf_current/level_3/final_merged_grch37.CDS.maf" From c0fc00f4e600fe469ff498b617198f66424611a7 Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 14 Oct 2021 21:31:04 -0700 Subject: [PATCH 15/16] add documentation to new features --- R/database.R | 3 +++ R/utilities.R | 4 ++++ man/calc_mutation_frequency_sliding_windows.Rd | 8 +++++++- man/get_mutation_frequency_bin_matrix.Rd | 8 +++++++- man/get_ssm_by_region.Rd | 5 ++++- man/get_ssm_by_regions.Rd | 7 ++++++- man/theme_Morons.Rd | 4 ++-- 7 files changed, 33 insertions(+), 6 deletions(-) diff --git a/R/database.R b/R/database.R index 83490273..b4fa6164 100644 --- a/R/database.R +++ b/R/database.R @@ -717,6 +717,8 @@ get_ssm_by_gene = function(gene_symbol,coding_only=FALSE,rename_splice_region=TR #' @param regions_bed Better yet, provide a bed file with the coordinates you want to retrieve #' @param streamlined Return a basic rather than full MAF format #' @param use_name_column If your bed-format data frame has a name column (must be named "name") these can be used to name your regions +#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details) +#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3". #' #' @return #' @export @@ -776,6 +778,7 @@ get_ssm_by_regions = function(regions_list,regions_bed,streamlined=FALSE,maf_dat #' @param region Region formatted like chrX:1234-5678 instead of specifying chromosome, start and end separately #' @param basic_columns Set to TRUE to override the default behaviour of returning only the first 45 columns of MAF data #' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details) +#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3". #' #' @return A data frame containing all the MAF data columns (one row per mutation) #' @export diff --git a/R/utilities.R b/R/utilities.R index ef6d8b59..06f3895a 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -123,6 +123,8 @@ trim_scale_expression <- function(x){ #' @param legend_row Fiddle with these to widen or narrow your legend #' @param legend_col Fiddle with these to widen or narrow your legend #' @param legend_col Accepts one of "horizontal" (default) or "vertical" to indicate in which direction the legend will be drawn +#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details) +#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3". #' #' #' @return @@ -313,6 +315,8 @@ get_mutation_frequency_bin_matrix = function(regions, #' @param min_count_per_bin #' @param return_count #' @param drop_unmutated This may not currently work properly. +#' @param from_indexed_flatfile Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details) +#' @param mode Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3". #' #' @return #' @export diff --git a/man/calc_mutation_frequency_sliding_windows.Rd b/man/calc_mutation_frequency_sliding_windows.Rd index b8ae36e7..661b6fcb 100644 --- a/man/calc_mutation_frequency_sliding_windows.Rd +++ b/man/calc_mutation_frequency_sliding_windows.Rd @@ -17,7 +17,9 @@ calc_mutation_frequency_sliding_windows( min_count_per_bin = 3, return_count = FALSE, drop_unmutated = FALSE, - classification_column = "lymphgen" + classification_column = "lymphgen", + from_indexed_flatfile = FALSE, + mode = "slms-3" ) } \arguments{ @@ -26,6 +28,10 @@ calc_mutation_frequency_sliding_windows( \item{drop_unmutated}{This may not currently work properly.} \item{classification_column}{Only used for plotting} + +\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)} + +\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".} } \value{ diff --git a/man/get_mutation_frequency_bin_matrix.Rd b/man/get_mutation_frequency_bin_matrix.Rd index 2cb6c9de..576d6ac4 100644 --- a/man/get_mutation_frequency_bin_matrix.Rd +++ b/man/get_mutation_frequency_bin_matrix.Rd @@ -25,7 +25,9 @@ get_mutation_frequency_bin_matrix( show_gene_colours = FALSE, legend_row = 3, legend_col = 3, - legend_direction = "horizontal" + legend_direction = "horizontal", + from_indexed_flatfile = FALSE, + mode = "slms-3" ) } \arguments{ @@ -57,6 +59,10 @@ get_mutation_frequency_bin_matrix( \item{legend_col}{Accepts one of "horizontal" (default) or "vertical" to indicate in which direction the legend will be drawn} +\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)} + +\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".} + \item{region_df}{Data frame of regions with four columns (chrom,start,end,gene_name)} } \value{ diff --git a/man/get_ssm_by_region.Rd b/man/get_ssm_by_region.Rd index 5a7a19bc..b7421903 100644 --- a/man/get_ssm_by_region.Rd +++ b/man/get_ssm_by_region.Rd @@ -12,7 +12,8 @@ get_ssm_by_region( basic_columns = TRUE, streamlined = FALSE, maf_data, - from_indexed_flatfile = FALSE + from_indexed_flatfile = FALSE, + mode = "slms-3" ) } \arguments{ @@ -27,6 +28,8 @@ get_ssm_by_region( \item{basic_columns}{Set to TRUE to override the default behaviour of returning only the first 45 columns of MAF data} \item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)} + +\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".} } \value{ A data frame containing all the MAF data columns (one row per mutation) diff --git a/man/get_ssm_by_regions.Rd b/man/get_ssm_by_regions.Rd index 89837c4e..f3596975 100644 --- a/man/get_ssm_by_regions.Rd +++ b/man/get_ssm_by_regions.Rd @@ -10,7 +10,8 @@ get_ssm_by_regions( streamlined = FALSE, maf_data = maf_data, use_name_column = FALSE, - from_indexed_flatfile = FALSE + from_indexed_flatfile = FALSE, + mode = "slms-3" ) } \arguments{ @@ -21,6 +22,10 @@ get_ssm_by_regions( \item{streamlined}{Return a basic rather than full MAF format} \item{use_name_column}{If your bed-format data frame has a name column (must be named "name") these can be used to name your regions} + +\item{from_indexed_flatfile}{Set to TRUE to avoid using the database and instead rely on flatfiles (only works for streamlined data, not full MAF details)} + +\item{mode}{Only works with indexed flatfiles. Accepts 2 options of "slms-3" and "strelka2" to indicate which variant caller to use. Default is "slms-3".} } \value{ diff --git a/man/theme_Morons.Rd b/man/theme_Morons.Rd index 5665279f..8015d608 100644 --- a/man/theme_Morons.Rd +++ b/man/theme_Morons.Rd @@ -27,7 +27,7 @@ nothing Define function for consistent plot theme } \examples{ -ggplot(mpg, aes(displ, hwy, colour = class)) + +ggplot(mpg, aes(displ, hwy, colour = class)) + geom_point() + -theme_Morons +theme_Morons() } From 545206dc0b400032041f4c4fce2da64211ff646a Mon Sep 17 00:00:00 2001 From: Kdreval Date: Thu, 14 Oct 2021 21:33:14 -0700 Subject: [PATCH 16/16] add new line character --- R/viz.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/viz.R b/R/viz.R index a9c04979..b643fb12 100644 --- a/R/viz.R +++ b/R/viz.R @@ -1301,4 +1301,4 @@ theme_Morons <- function(base_size=14, color="black", fill="white", size=1, linetype="solid"), strip.text = element_text(face="bold") )) -} \ No newline at end of file +}