-
Notifications
You must be signed in to change notification settings - Fork 0
/
Window_average.Rscript
48 lines (39 loc) · 1.86 KB
/
Window_average.Rscript
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
Calculate_mean_non_overlapping_window<-function(data,chr,window, nVariant){
window_mean <- data.frame(Chr = chr, start = 0, end = 0, nvariant = 0, window_mean = -1 )[FALSE,]
cntr <- 1
tmp<-data[which(data$chr==chr),]
tmp_end <- tmp$pos[nrow(tmp)]
window_range <- c(1,seq(window,tmp_end,by=window))
for(n in window_range){
if(cntr ==1){
now_window <- tmp[(tmp$pos >= 1 & tmp$pos < window),]
prev_pos <- window
if(nrow(now_window) >= nVariant){
window_mean[nrow(window_mean) + 1, ] <- c(chr, 0, window, nrow(now_window), mean(now_window$value))
}else{
window_mean[nrow(window_mean) + 1, ] <- c(chr, 0, window, nrow(now_window), NA)
}
}else{
now_window <- tmp[(tmp$pos >= prev_pos & tmp$pos < prev_pos + window),]
if(nrow(now_window) >= nVariant){
window_mean[nrow(window_mean) + 1, ] <- c(chr, prev_pos, prev_pos + window , nrow(now_window), mean(now_window$value))
}else{
window_mean[nrow(window_mean) + 1, ] <- c(chr, prev_pos, prev_pos + window , nrow(now_window), NA)
}
prev_pos <- prev_pos + window
}
cntr <- cntr + 1
}
# Replacing last query coordinate
window_mean[nrow(window_mean), 'end'] <- tail(tmp$pos, 1)
return(window_mean)
}
df <- data.frame(chr = rep(1,100), pos = sort(round(runif(100,min = 1,max = 10000))), value = (runif(100,min = 0,max = 1)))
df <- rbind.data.frame(df, data.frame(chr = rep(2,100), pos = sort(round(runif(100,min = 1,max = 10000))), value = (runif(100,min = 0,max = 1))))
window <- 1000
nVariant <- 10
# Loop through your dataframe by chromosome
# The following will create a list
my_window <- lapply(unique(df$chr), function(x) { Calculate_mean_non_overlapping_window(data = df, chr = x, window = window, nVariant = nVariant)})
# You can convert your list to data.frame
final_output <- do.call(rbind.data.frame, my_window)