Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Update to handle capture data #60

Merged
merged 21 commits into from
Mar 22, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 16 additions & 27 deletions R/portal.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ require("dbplyr")
require("tidyverse")
require("data.table")

#functions for creating a cBioportal instance using GAMBL data
#some global variables that we will probably change later
# Define functions for creating a cBioportal instance using GAMBL data
# Set some global variables that we will probably change later
gambl_db = "gambl_test"
gambl_maf = "maf_slms3_hg19"
gambl_icgc_maf = "maf_slms3_hg19_icgc"
Expand Down Expand Up @@ -33,12 +33,13 @@ setup_fusions = function(short_name="GAMBL",
caselist_fusion = paste0(out_dir,"case_lists/cases_fusion.txt")

#determine what table to query and what restrictions to use for the MAF data
#TODO: fix this once we have the ICGC SV data in the database

if(include_icgc_data){
maf_table = gambl_icgc_maf
}else{
maf_table = gambl_maf
}
#NOTES. Leave here until the code is robust
#obligatory file for fusions
#cancer_study_identifier: test_gambl
#genetic_alteration_type: FUSION
Expand All @@ -61,7 +62,7 @@ setup_fusions = function(short_name="GAMBL",
)
cat(meta_fusion_content,file=meta_fusions)

#get SV breakpoints and annotate them
#now get SV breakpoints and annotate them

unannotated_sv = get_manta_sv() #no filters

Expand All @@ -71,7 +72,7 @@ setup_fusions = function(short_name="GAMBL",
fusion_samples = pull(annotated_sv,tumour_sample_id) %>% unique()


#deal with any cases not in metadata
#deal with any cases not represented in the metadata
fusions_df = data.frame(Hugo_Symbol=annotated_sv$gene,
Entrez_Gene_Id=annotated_sv$entrez,
Center = "BCGSC",
Expand All @@ -85,31 +86,15 @@ setup_fusions = function(short_name="GAMBL",

fusions_df = distinct(fusions_df,Tumor_Sample_Barcode,Fusion,.keep_all = TRUE)

#hnrnph1_chr = "5"
#hnrnph1_start = 179046257
#hnrnph1_end = 179046427
#mafdat_chrom = filter(mafdat_full,Chromosome == hnrnph1_chr)
#h1.maf = filter(mafdat_chrom, Start_Position > hnrnph1_start & Start_Position < hnrnph1_end)
#h1.muts = h1.maf$Tumor_Sample_Barcode
#hnrnph1_entrez = "3187"
#h1.mut.df = data.frame(Hugo_Symbol = "HNRNPH1",
# Entrez_Gene_Id = hnrnph1_entrez,
# Center = "BCGSC",
# Tumor_Sample_Barcode = h1.muts,
# Fusion = "HNRNPH1-E5",
# DNA_support = "yes",
# RNA_support="no",
# Method = "SLMS-3",
# Frame = "in-frame")


#determine what table to query and what restrictions to use for the MAF data
# determine what table to query and what restrictions to use for the MAF data
# We should eventually fix how this is done to allow more flexibility (e.g. using GAMBL sample sets?)
if(include_icgc_data){
maf_table = gambl_icgc_maf
}else{
maf_table = gambl_maf
}


nfkbiz_entrez = 64332
nfkbiz_utr_ssm = get_ssm_by_gene(table=maf_table,gene_symbol = "NFKBIZ") %>%
dplyr::filter(Variant_Classification == "3'UTR") %>% pull(Tumor_Sample_Barcode) %>% unique()
Expand All @@ -123,11 +108,15 @@ setup_fusions = function(short_name="GAMBL",
RNA_support="no",
Method = "SLMS-3",
Frame = "in-frame")
#get any SV breakpoints that are in the 3'UTR of NFKBIZ

# get any SV breakpoints that are in the 3'UTR of NFKBIZ, which aren't annotated by the function called above
# additional custom code for other SVs could be added here but we'd need to get rid of hard-coding and generalize it
# easiest fix would be a BED file defining each region of interest
nfkbiz_utr_region = "chr3:101,578,185-101,579,902"


nfkbiz.svs= get_manta_sv(region=nfkbiz_utr_region) %>% pull(tumour_sample_id) %>% unique()
nfkbiz.svs= get_manta_sv(region=nfkbiz_utr_region) %>%
pull(tumour_sample_id) %>% unique()


nfkbiz.sv.df = data.frame(Hugo_Symbol = "NFKBIZ",
Expand All @@ -139,7 +128,7 @@ setup_fusions = function(short_name="GAMBL",
RNA_support="no",
Method = "Manta",
Frame = "in-frame")

# combine the NFKBIZ SVs with the rest
all_fusions = rbind(fusions_df,nfkbiz.sv.df,nfkbiz.mut.df)

fusion.cases= as.character(unique(all_fusions$Tumor_Sample_Barcode))
Expand Down
11 changes: 6 additions & 5 deletions R/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ prettyOncoplot = function(maftools_obj,
mat_origin = om$oncoMatrix
tsbs = levels(maftools:::getSampleSummary(x = maftools_obj)[,Tumor_Sample_Barcode])
print(paste("numcases:",length(tsbs)))
print(paste("numgenes:",length(mat_origin[,1])))
if(!removeNonMutated){
tsb.include = matrix(data = 0, nrow = nrow(mat_origin),
ncol = length(tsbs[!tsbs %in% colnames(mat_origin)]))
Expand Down Expand Up @@ -1231,7 +1232,7 @@ prettyOncoplot = function(maftools_obj,
}
heatmap_legend_param = list(title = "Alterations",
at = c("RNA", "3'UTR" , "Nonsense_Mutation", "Splice_Site","Splice_Region", "Nonstop_Mutation", "Translation_Start_Site",
"In_Frame_Ins", "In_Frame_Del", "Frame_Shift_Ins", "Frame_Shift_Del", "Multi_Hit", "Missense_Mutation", "hot_spot"),
"In_Frame_Ins", "In_Frame_Del", "Frame_Shift_Ins", "Frame_Shift_Del", "Multi_Hit", "Missense_Mutation", "hot_spot"),
labels = c("RNA", "3'UTR", "Nonsense Mutation", "Splice Site","Splice Region", "Nonstop Mutation", "Translation Start Site",
"In Frame Insertion", "In Frame Deletion", "Frame Shift Insertion", "Frame Shift Deletion",
"Multi Hit", "Missense Mutation", "Hotspot"),
Expand Down Expand Up @@ -2087,7 +2088,7 @@ splendidHeatmap = function(this_matrix,
leftStackedWidth=4,
metadataBarFontsize=5,
groupNames = NULL){

comparison_groups <- unique(these_samples_metadata[,splitColumnName])

if(!is.null(splitColumnName) & (splitColumnName %in% metadataColumns)){
Expand Down Expand Up @@ -2138,7 +2139,7 @@ splendidHeatmap = function(this_matrix,

# extract most important features, while taking the feature with highest weight for a particular cluster if it was seen before for other cluster with lower weight
FEATURES <- w[,1] %>%
as.data.frame() %>%
as.data.frame() %>%
`rownames<-`(rownames(w)) %>%
dplyr::arrange(desc(.)) %>%
head(., max_number_of_features_per_group) %>%
Expand Down Expand Up @@ -2247,12 +2248,12 @@ splendidHeatmap = function(this_matrix,
column_to_rownames(., var="Tumor_Sample_Barcode") %>%
dplyr::arrange(!!!syms(metadataColumns), desc(!!!syms(numericMetadataColumns))) %>%
dplyr::select(FEATURES$Feature))

used_for_ordering <- colnames(used_for_ordering_df)

# left annotation: stacked feature weights
ha = rowAnnotation(`feature abundance` = anno_barplot(m, gp = gpar(fill = my_palette[1:length(comparison_groups)+1]),
bar_width = 1, width = unit(leftStackedWidth, "cm"),
bar_width = 1, width = unit(leftStackedWidth, "cm"),
axis_param = list(side = legend_position, labels_rot = 0)))

# bottom annotation: tracks indicating metadata
Expand Down