-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownsample.R
25 lines (21 loc) · 1007 Bytes
/
downsample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
library(dplyr)
###################
## Downsample
##################
## data: An input data frame. Must contain the following columns: CDR3.amino.acid.sequence, Read.count. Each row is a unique CDR3 amino acid sequence.
## B: Number of downsampled experiments
## target_library_size: Downsampled to this target_library_size
## seed: A random seed
downsample <- function(data, B = 100, target_library_size, seed = 12345){
downsample.ls <- NULL
for (b in 1:B) {
set.seed(seed+b-1)
downsampled_data <- table(sample(data$CDR3.amino.acid.sequence,
target_library_size,
prob = data$Read.count/sum(data$Read.count),replace = T))
downsample.ls[[b]] <- as.data.frame(downsampled_data) %>%
dplyr::rename(CDR3.amino.acid.sequence = Var1,Read.count = Freq) %>%
mutate(CDR3.amino.acid.sequence = as.character(CDR3.amino.acid.sequence))
}
downsample.ls
}