Skip to content

Commit

Permalink
Merge pull request #30 from taylor-lab/master
Browse files Browse the repository at this point in the history
Merge taylor-lab/facets-suite:master into mskcc/facets-suite:Rpackagev2
  • Loading branch information
ckandoth authored Feb 11, 2020
2 parents 95333a7 + 4cbde6d commit 1c1fdf6
Show file tree
Hide file tree
Showing 13 changed files with 301 additions and 107 deletions.
9 changes: 2 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
Package: facetsSuite
Type: Package
Title: Functions to run and parse output from FACETS
Version: 1.0
Version: 2.0.2
Authors@R: person(given = "Philip",
family = "Jonsson",
role = c("aut", "cre"),
email = "philip.jonsson@gmail.com")
URL: https://github.com/mskcc/facetsSuite
URL: https://github.com/mskcc/facets-suite
Description: This package provides functions that wrap the FACETS package for allele-specific copy-number analysis as well as functions to perform post-hoc analyses thereof.
License: MIT + file LICENSE
RoxygenNote: 6.1.1
Expand All @@ -15,8 +15,6 @@ Encoding: UTF-8
Depends:
R (>= 2.10)
Imports:
facets (>= 0.5.2),
pctGCdata (>= 0.3.0),
data.table (>= 1.11.8),
dplyr (>= 0.8.3),
plyr (>= 1.8.4),
Expand All @@ -28,9 +26,6 @@ Imports:
scales (>= 1.0.0),
argparse (>= 1.1.1),
tibble (>= 2.1.3)
Remotes:
mskcc/facets,
veseshan/pctGCdata
NeedsCompilation: no
Packaged: 2019-02-07 16:38:38 UTC; jonssonp
Suggests:
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(calculate_loh)
export(calculate_lst)
export(calculate_ntai)
export(ccf_annotate_maf)
export(ccf_annotate_maf_legacy)
export(cf_plot)
export(check_fit)
export(closeup_plot)
Expand All @@ -17,11 +18,11 @@ export(icn_plot)
export(plot_segmentation)
export(read_snp_matrix)
export(run_facets)
export(create_legacy_output)
export(load_facets_output)
export(valor_plot)
import(data.table)
import(facets)
import(ggplot2)
import(pctGCdata)
importFrom(data.table,":=")
importFrom(data.table,foverlaps)
importFrom(data.table,setDT)
Expand Down
43 changes: 39 additions & 4 deletions R/ccf-annotate-maf.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,22 @@ ccf_annotate_maf = function(maf,
# Find segments overlapping mutation loci
data.table::setDT(maf, key = c('Chromosome', 'Start_Position', 'End_Position'))
original_cols = names(maf)
maf <- maf[, Chromosome:=as.character(Chromosome)]

segs$chrom[segs$chrom == 23] = 'X'
if (algorithm == 'em') {
segs$tcn = segs$tcn.em
segs$lcn = segs$lcn.em
segs$cf = segs$cf.em
}
segs = segs[, c('chrom', 'start', 'end', 'tcn', 'lcn')]
segs = segs[, c('chrom', 'start', 'end', 'tcn', 'lcn', 'cf')]
data.table::setDT(segs, key = c('chrom', 'start', 'end'))

maf = data.table::foverlaps(maf, segs,
by.x = c('Chromosome', 'Start_Position', 'End_Position'),
by.y = c('chrom', 'start', 'end'),
type = 'within', mult = 'first', nomatch = NA)
maf = maf[, c(original_cols, 'tcn', 'lcn'), with = FALSE]
maf = maf[, c(original_cols, 'tcn', 'lcn', 'cf'), with = FALSE]

# Calculate CCFs
maf[, `:=` (
Expand All @@ -59,13 +61,46 @@ ccf_annotate_maf = function(maf,
maf[, c('ccf_Mcopies', 'ccf_Mcopies_lower', 'ccf_Mcopies_upper', 'ccf_Mcopies_prob95', 'ccf_Mcopies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_1copy', 'ccf_1copy_lower', 'ccf_1copy_upper', 'ccf_1copy_prob95', 'ccf_1copy_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, 1, t_alt_count, t_depth), by = seq_len(nrow(maf))]
maf[, c('ccf_expected_copies', 'ccf_expected_copies_lower', 'ccf_expected_copies_upper',
'ccf_expected_copies_prob95', 'ccf_expected_copies_prob90') :=
estimate_ccf(purity, tcn, tcn - lcn, t_alt_count, t_depth), by = seq_len(nrow(maf))]
estimate_ccf(purity, tcn, expected_alt_copies, t_alt_count, t_depth), by = seq_len(nrow(maf))]
as.data.frame(maf)
}

#' Estimate CCFs of somatic mutations from cncf.txt file.
#'
#' Based on FACETS data, infer cancer-cell fraction (CCF) for somatic mutations in a sample.
#'
#' @param maf Input MAF file.
#' @param cncf_txt_file .cncf.txt file created with legacy output of facets-suite.
#' @param purity Sample purity estimate.
#' @param algorithm Choice between FACETS \code{em} and \code{cncf} algorithm.
#'
#' @importFrom data.table setDT foverlaps :=
#' @importFrom stats dbinom
#'
#' @return MAF file annotated with clonality estimates for each mutation, where the following column prefixes are used:
#' \itemize{
#' \item{\code{ccf_Mcopies*}:} {Inferred CCF if mutation is on the major allele.}
#' \item{\code{ccf_1copy*}:} {Inferred CCF if mutation exists in one copy.}
#' \item{\code{ccf_expected_copies*}:} {Inferred CCF if mutation exists in number of copies expected from observed VAF and local ploidy.}
#' }
#'
#' @export
ccf_annotate_maf_legacy <- function(maf,
cncf_txt_file,
purity,
algorithm = c('em', 'cncf')) {
segs <-
fread(cncf_txt_file) %>%
select(chrom, seg, num.mark, nhet, cnlr.median,
mafR, segclust, cnlr.median.clust, mafR.clust,
start = loc.start, end = loc.end, cf.em, tcn.em, lcn.em, cf, tcn, lcn)

ccf_annotate_maf(maf, segs, purity, algorithm)
}

# Estimate most likely CCF given observed VAF, purity and local ploidy
# Based on PMID 25877892
estimate_ccf = function(purity,
Expand Down
24 changes: 15 additions & 9 deletions R/check-fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#'
#' @return A list object with the following items:
#' \itemize{
#' \item{\code{diplogr_flag}:} {Boolean indicating extreme dipLogR value.}
#' \item{\code{n_alternative_diplogr}:} {Number of alternative dipLogR values.}
#' \item{\code{dipLogR_flag}:} {Boolean indicating extreme dipLogR value.}
#' \item{\code{n_alternative_dipLogR}:} {Number of alternative dipLogR values.}
#' \item{\code{n_dip_bal_segs}, \code{frac_dip_bal_segs}:} {Number of balanced segments at dipLogR and the fraction of genome they represent.}
#' \item{\code{n_dip_imbal_segs}, \code{frac_dip_imbal_segs}:} {Number of imbalanced segments at dipLogR and the fraction of genome they represent.}
#' \item{\code{n_amp}:} {Number of segments at total copy number >= 10.}
Expand Down Expand Up @@ -51,7 +51,7 @@ check_fit = function(facets_output,
# Set variables
segs = as.data.table(facets_output$segs)
snps = facets_output$snps
diplogr = facets_output$diplogr
dipLogR = facets_output$dipLogR
alballogr = as.numeric(facets_output$alballogr[, 1])
purity = facets_output$purity
mafr_thresh = facets_output$mafr_thresh
Expand All @@ -74,15 +74,15 @@ check_fit = function(facets_output,
auto_segs = segs[chrom < 23]

# Check for extrema dipLogR values
diplogr_flag = abs(facets_output$diplogr) > 1
dipLogR_flag = abs(facets_output$dipLogR) > 1

# Check for alternative dipLogR values
n_alt_diplogr = length(setdiff(alballogr, diplogr))
n_alt_dipLogR = length(setdiff(alballogr, dipLogR))

# Check for balance/imbalance of copy-number at segments at dipLogR
# cnlr.median.clust == dipLogR for purity runs, for others, find the closest one
cnlr_clusts = unique(segs$cnlr.median.clust)
cnlr_clust_value = cnlr_clusts[which.min(abs(cnlr_clusts - diplogr))]
cnlr_clust_value = cnlr_clusts[which.min(abs(cnlr_clusts - dipLogR))]

dip_bal_segs = auto_segs[cnlr.median.clust == cnlr_clust_value & mcn == lcn, ]
n_dip_bal_segs = nrow(dip_bal_segs)
Expand All @@ -95,9 +95,13 @@ check_fit = function(facets_output,
# Number of high-level amplifications and homozygous deletions
# Clonal homdels, how much of the genome do they represent
n_amps = nrow(segs[tcn >= 10])
n_homdels = nrow(segs[tcn == 0])
homdels = auto_segs[tcn == 0]
n_homdels = nrow(homdels)

clonal_homdels = auto_segs[tcn == 0 & clonal == TRUE]
n_homdels_clonal = nrow(clonal_homdels)

frac_homdels = sum(homdels$length)/sum(auto_segs$length)
frac_homdels_clonal = sum(clonal_homdels$length)/sum(auto_segs$length)

# Count number of unique copy-number states and total number of segments
Expand Down Expand Up @@ -167,14 +171,16 @@ check_fit = function(facets_output,
# n: denotes number
# frac: denotes fraction of assesses genome
output = list(
diplogr_flag = diplogr_flag,
n_alternative_diplogr = n_alt_diplogr,
dipLogR_flag = dipLogR_flag,
n_alternative_dipLogR = n_alt_dipLogR,
wgd = wgd,
n_dip_bal_segs = n_dip_bal_segs,
frac_dip_bal_segs = frac_dip_bal_segs,
n_dip_imbal_segs = n_dip_imbal_segs,
frac_dip_imbal_segs = frac_dip_imbal_segs,
n_amps = n_amps,
n_homdels = n_homdels,
frac_homdels = frac_homdels,
n_homdels_clonal = n_homdels_clonal,
frac_homdels_clonal = frac_homdels_clonal,
n_cn_states = n_cn_states,
Expand Down
17 changes: 17 additions & 0 deletions R/copy-number-scores.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,23 @@ calculate_ntai = function(segs,
chrom_seg
})

if (nrow(segs) == 0) {
return(
list(
ntelomeric_ai = NA, # telomeric AI
telomeric_ai_mean_size = NA,
ninterstitial_ai = NA, # interstitial AI
interstitial_ai_mean_size = NA,
ncentromeric_ai = NA, # chromosomal AI
ntelomeric_loh = NA, # telomeric LOH
telomeric_loh_mean_size = NA,
ninterstitial_loh = NA, # interstitial LOH
interstitial_loh_mean_size = NA,
ncentromeric_loh = NA # chromosomal LOH
)
)
}

# Add a column to call AI
# Codes for telomeric/interstitial/whole chromosome:
# 0 = no AI
Expand Down
6 changes: 3 additions & 3 deletions R/format-igv-seg.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#' @export
format_igv_seg = function(facets_output, sample_id, normalize = TRUE) {

if (!all(c('snps', 'segs', 'diplogr') %in% names(facets_output))) {
stop(paste0('Input is missing segs, snps or diplogr ojbect.'), call. = FALSE)
if (!all(c('snps', 'segs', 'dipLogR') %in% names(facets_output))) {
stop(paste0('Input is missing segs, snps or dipLogR ojbect.'), call. = FALSE)
}

seg = group_by(facets_output$snps, chrom, seg) %>%
Expand All @@ -26,6 +26,6 @@ format_igv_seg = function(facets_output, sample_id, normalize = TRUE) {
mutate(ID = sample_id) %>%
select(ID, chrom, loc.start, loc.end, num.mark, seg.mean)

if (normalize) { seg = mutate(seg, seg.mean = seg.mean - facets_output$diplogr) }
if (normalize) { seg = mutate(seg, seg.mean = seg.mean - facets_output$dipLogR) }
data.frame(seg)
}
7 changes: 4 additions & 3 deletions R/gene-level-changes.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ gene_level_changes = function(facets_output,
# Set variables
segs = facets_output$segs
snps = as.data.table(facets_output$snps)
diplogr = facets_output$diplogr
dipLogR = facets_output$dipLogR
purity = facets_output$purity
ploidy = facets_output$ploidy

# Get WGD status
fcna_output = calculate_fraction_cna(segs, ploidy, genome, algorithm)
Expand Down Expand Up @@ -87,8 +88,8 @@ gene_level_changes = function(facets_output,
genes_all[, cn_state := ifelse(!cn_state %in% copy_number_states$call, 'INDETERMINATE', cn_state)]

# Test on cnlr against baseline
# cn0_diplogr = unique(segs$cnlr.median.clust)[which.min(abs(unique(segs$cnlr.median.clust)-diplogr))]
# cn0_segs = segs[cnlr.median.clust == cn0_diplogr]
# cn0_dipLogR = unique(segs$cnlr.median.clust)[which.min(abs(unique(segs$cnlr.median.clust)-dipLogR))]
# cn0_segs = segs[cnlr.median.clust == cn0_dipLogR]
# cn0_snps = snps[segclust %in% cn0_segs$segclust]
# cn0_snps = cn0_snps[between(cnlr, quantile(cn0_snps$cnlr, .25), quantile(cn0_snps$cnlr, .75))] # remove noise

Expand Down
18 changes: 9 additions & 9 deletions R/plot-facets.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param plotX If \code{TRUE}, includes chromosome X.
#' @param genome Genome build.
#' @param highlight_gene Highlight gene(s), provide gene symbol or mapped positions (internally).
#' @param adjust_diplogr Normalize by sample dipLogR.
#' @param adjust_dipLogR Normalize by sample dipLogR.
#' @param method When available, choose between plotting solution from \code{em} or \code{cncf} algorithm.
#' @param subset_snps Subset the SNP profile to reduce weight of plotting, supply a factor by which to reduce or \code{TRUE} for default.
#' @param plot_chroms Chromosomes to plot when using \code{closeup_plot}.
Expand All @@ -37,15 +37,15 @@ cnlr_plot = function(facets_data,
plotX = FALSE,
genome = c('hg19', 'hg18', 'hg38'),
highlight_gene = NULL,
adjust_diplogr = TRUE,
adjust_dipLogR = TRUE,
subset_snps = NULL,
return_object = FALSE) {

genome = match.arg(genome, c('hg19', 'hg18', 'hg38'), several.ok = FALSE)

snps = facets_data$snps
segs = facets_data$segs
diplogr = facets_data$diplogr
dipLogR = facets_data$dipLogR
if (!plotX) {
snps = subset(snps, chrom < 23)
segs = subset(segs, chrom < 23)
Expand All @@ -68,11 +68,11 @@ cnlr_plot = function(facets_data,
if (ymin > -3) ymin = -3
if (ymax < 3) ymax = 3

if (adjust_diplogr) {
snps$cnlr = snps$cnlr - diplogr
my_starts$cnlr_median = my_starts$cnlr_median - diplogr
my_ends$cnlr_median = my_ends$cnlr_median - diplogr
diplogr = Inf
if (adjust_dipLogR) {
snps$cnlr = snps$cnlr - dipLogR
my_starts$cnlr_median = my_starts$cnlr_median - dipLogR
my_ends$cnlr_median = my_ends$cnlr_median - dipLogR
dipLogR = Inf
}

if (!is.null(subset_snps)) {
Expand Down Expand Up @@ -138,7 +138,7 @@ valor_plot = function(facets_data,

snps = facets_data$snps
segs = facets_data$segs
diplogr = facets_data$diplogr
dipLogR = facets_data$dipLogR
if (!plotX) {
snps = subset(snps, chrom < 23)
segs = subset(segs, chrom < 23)
Expand Down
Loading

0 comments on commit 1c1fdf6

Please # to comment.