Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

add cohort colours and fix MZL #93

Merged
merged 7 commits into from
May 26, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions R/database.R
Original file line number Diff line number Diff line change
Expand Up @@ -709,17 +709,17 @@ add_prps_result = function(incoming_metadata){
#' icgc_metadata = add_icgc_metadata(incoming_metadata = my_meta)
#'
add_icgc_metadata = function(incoming_metadata){

#add trio metadata too!
trio_meta = "/projects/rmorin/projects/gambl-repos/gambl-rmorin/data/metadata/private_metadata/2021-04-30-DLBC_LSARP_Trios_with_metadata.tsv"
icgc_publ = suppressMessages(suppressWarnings(read_csv("/projects/rmorin/projects/gambl-repos/gambl-rmorin/data/metadata/raw_metadata/MALY_DE_tableS1.csv")))
repo_base = config::get("repo_base")
icgc_publ_file = paste0(repo_base,"data/metadata/raw_metadata/MALY_DE_tableS1.csv")
icgc_publ = suppressMessages(suppressWarnings(read_csv(icgc_publ_file)))
icgc_publ = icgc_publ[,c(1:20)]
#fix commas as decimals
icgc_publ = mutate(icgc_publ, purity = str_replace(purity, ",", "."))
icgc_publ = mutate(icgc_publ, sex = str_to_upper(sex))

icgc_raw = suppressMessages(read_tsv("/projects/rmorin/projects/gambl-repos/gambl-rmorin/data/metadata/raw_metadata/ICGC_MALY_seq_md.tsv"))


icgc_raw_path = paste0(repo_base,"data/metadata/raw_metadata/ICGC_MALY_seq_md.tsv")
icgc_raw = suppressMessages(read_tsv(icgc_raw_path))

icgc_raw = icgc_raw %>%
dplyr::select(-compression, -bam_available, -read_length, -time_point, -unix_group, -ffpe_or_frozen, -link_name) %>%
dplyr::filter(tissue_status %in% c("tumor", "tumour"))
Expand Down
18 changes: 13 additions & 5 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,8 @@ collate_results = function(sample_table,
output_file = config::get("table_flatfiles")$derived
output_base = config::get("repo_base")
output_file = paste0(output_base, output_file)
sample_table = read_tsv(output_file)
output_file = glue::glue(output_file)
sample_table = read_tsv(output_file) %>% dplyr::filter(sample_id %in% sample_table$sample_id)
}else{
message("Slow option: not using cached result. I suggest from_cache = TRUE whenever possible")
#edit this function and add a new function to load any additional results into the main summary table
Expand All @@ -850,6 +851,7 @@ collate_results = function(sample_table,
}
if(write_to_file){
output_file = config::get("table_flatfiles")$derived
output_file = glue(output_file)
output_base = config::get("repo_base")
output_file = paste0(output_base, output_file)
write_tsv(sample_table, file = output_file)
Expand Down Expand Up @@ -1855,7 +1857,8 @@ get_gambl_colours = function(classification = "all",
"SCBC"="#8c9c90",
"UNSPECIFIED"="#cfba7c",
"OTHER"="#cfba7c",
"MZL"="#065A7F"
"MZL"="#065A7F",
"SMZL"="#065A7F"
)
all_colours[["coo"]] = c(
"ABC" = "#05ACEF",
Expand All @@ -1871,9 +1874,14 @@ get_gambl_colours = function(classification = "all",
"DHITsigPos" = "#D62828",
"NA" = "#ACADAF"
)
all_colours[["cohort"]] = c("Chapuy"="#8B0000",
"Arthur"= "#8845A8",
"Schmitz"= "#2C72B2")
all_colours[["cohort"]] = c("Chapuy"="#8B0000","Chapuy, 2018"="#8B0000",
"Arthur"= "#8845A8","Arthur, 2018"= "#8845A8",
"Schmitz"= "#2C72B2","Schmitz, 2018"= "#2C72B2",
"Reddy" = "#E561C3","Reddy, 2017" = "#E561C3",
"Morin"= "#8DB753", "Morin, 2013"= "#8DB753",
"Kridel"= "#4686B7", "Kridel, 2016"= "#4686B7",
"ICGC"="#E09C3B","ICGC, 2018"="#E09C3B",
"Grande"="#e90c8b", "Grande, 2019"="#e90c8b")

all_colours[["indels"]] = c("DEL" = "#53B1FC", "INS" = "#FC9C6D")

Expand Down
65 changes: 5 additions & 60 deletions config.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
remote:
project_base: "/Users/rmorin/gambl_results/"
repo_base: "/Users/rmorin/git/gambl/"

default:
project_base: "/projects/nhl_meta_analysis_scratch/gambl/results_local/"
repo_base: "/projects/rmorin/projects/gambl-repos/gambl-rmorin/"
Expand Down Expand Up @@ -91,7 +95,7 @@ default:
biopsies: "data/metadata/gambl_biopsy_metadata.tsv"
samples: "data/metadata/gambl_samples_available.tsv"
outcomes: "data/metadata/gambl_all_outcomes.tsv"
derived: "data/metadata/gambl_sample_results.tsv"
derived: "data/metadata/gambl_{seq_type_filter}_results.tsv"

unmatched_normal_ids:
gambl:
Expand Down Expand Up @@ -120,62 +124,3 @@ default:
hg38-nci: "04-24937N-Schmitz"
hg38: "BLGSP-71-06-00286-99A-01D"
hg19-clc: "PA011-G"

testing:
project_base: "/projects/nhl_meta_analysis_scratch/gambl/results_local/"
repo_base: "/projects/rmorin/projects/gambl-repos/gambl-rmorin/"
derived_and_curated: "icgc_dart/derived_and_curated_metadata/"
database_name: "gambl_07_2021"
results_staging:
manta: "manta_current/level_3/01-gamblr_preprocess/"
results_merged:
tidy_expression_file: "/projects/rmorin/projects/gambl-repos/gambl-rmorin/results/icgc_dart/DESeq2-0.0_salmon-1.0/mrna--gambl-icgc-all/vst-matrix-Hugo_Symbol_tidy.tsv"
ex_matrix_file: "/projects/rmorin/projects/gambl-repos/gambl-rmorin/results/icgc_dart/DESeq2-0.0_salmon-1.0/mrna--gambl-icgc-all/vst-matrix-Hugo_Symbol.tsv.gz"

#results from every combination of each of these will be included in the database
unix_groups: "gambl,icgc_dart"
genome_builds: "grch37,hg38,hs37d5"
#the genome build to use for all coordinate-based results in the database
canonical_genome_build: "grch37"
analyses:
matched:
copy_number: "battenberg"
ssm: "slms-3"
sv: "manta"
unmatched:
copy_number: "controlfreec"
ssm: "slms-3"
sv: "manta"

#tables that likely need to be populated sequentially or outside of R due to their size
results_tables:
ssm: "maf_slms3_hg19_icgc"
copy_number: "seg_battenberg_hg19"
sv: "bedpe_manta_hg19"
copy_number_unmatched: "seg_controlfreec_hg19"

results_filatfiles:
sv: "icgc_dart/cbioportal-1.0/01-filtered-manta/genome-grch37/all_merged_sv_info.tsv"
ssm:
all:
full: "icgc_dart/slms_3-1.0_vcf2maf-1.2/level_3/final_merged_grch37.maf"
cds: "icgc_dart/slms_3-1.0_vcf2maf-1.2/level_3/final_merged_grch37.CDS.maf"
gambl:
full: "gambl/slms_3-1.0_vcf2maf-1.2/level_3/final_merged_grch37.maf"
cds: "gambl/slms_3-1.0_vcf2maf-1.2/level_3/final_merged_grch37.CDS.maf"

results_directories:
manta: "manta_current/99-outputs/bedpe/"


tables:
biopsies: "biopsy_metadata"
samples: "sample_metadata"
outcomes: "outcome_metadata"
derived: "derived_data"

table_flatfiles:
biopsies: "data/metadata/gambl_biopsy_metadata.tsv"
samples: "data/metadata/gambl_samples_available.tsv"
outcomes: "data/metadata/gambl_all_outcomes.tsv"
derived: "data/metadata/gambl_sample_results.tsv"
15 changes: 15 additions & 0 deletions test_remote.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

setwd("~/git/gambl/") # set this path to point to your local clone of the gambl repo
# in your config.yml file set repo_base under the remote config (at the top) to point to this path also
library(GAMBLR)
Sys.setenv(R_CONFIG_ACTIVE= "remote")


config::get("project_base")
#[1] "/Users/rmorin/gambl_results/"

all_meta = get_gambl_metadata()

collated = collate_results(from_cache=TRUE)

collated = collate_results(from_cache=TRUE,seq_type_filter="capture")